<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (Vaguery)</title>
    <link>https://pinboard.in/u:Vaguery/public/</link>
    <description>recent bookmarks from Vaguery</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://ptgui.com/info/stitching_software_for_macos.html"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2504.07092"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2504.11406"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2305.11739"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2404.01775"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2309.14564"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2210.14132"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2204.11041"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2405.18029"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2212.06313"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2110.14968"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2205.09363"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2106.09259"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2202.09179"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2102.01690"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2105.07512"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.07846"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2009.04433"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1711.08995"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2011.02904"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2011.07048"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2109.07683"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1708.06899"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.14575"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1801.08267"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1611.10152"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1906.05856"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2010.01238"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.09444"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.11780"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2006.03829"/>
	<rdf:li rdf:resource="https://libvips.github.io/libvips/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.03856"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1702.03505"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2005.02152"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1903.12003"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1912.11370"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1711.09670"/>
	<rdf:li rdf:resource="https://developer.apple.com/documentation/accelerate"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1802.10038"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1903.12363"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1904.05916"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1905.12536"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1904.08755"/>
	<rdf:li rdf:resource="https://openpreservation.org/blog/2019/11/07/jpeg-got-the-blues/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1803.00407"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1803.08202"/>
	<rdf:li rdf:resource="http://www.vision.ee.ethz.ch/~timofter/publications/Gu-Chapter-2019.pdf"/>
	<rdf:li rdf:resource="https://dl.acm.org/citation.cfm?id=2601224"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1701.07396"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1711.04226"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1608.02153"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1812.07933"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1711.03247"/>
	<rdf:li rdf:resource="https://joel.franusic.com/krazy_kat/about/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1904.01175"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1709.09683"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1709.09479"/>
	<rdf:li rdf:resource="http://library.msri.org/books/Book46/files/08li.pdf"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1711.06588"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1809.02786"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1710.02574"/>
	<rdf:li rdf:resource="https://www.jeremyjordan.me/semantic-segmentation/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1804.01622"/>
	<rdf:li rdf:resource="https://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-017-0399-z"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1611.05896"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1801.06769"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1710.00194"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1710.06647"/>
	<rdf:li rdf:resource="https://design.google/library/ux-ai/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://ptgui.com/info/stitching_software_for_macos.html">
    <title>Stitching software for macOS</title>
    <dc:date>2026-06-25T20:01:53+00:00</dc:date>
    <link>https://ptgui.com/info/stitching_software_for_macos.html</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[PTGui panorama stitching software also runs on macOS. We offer a universal binary for Intel and Apple Silicon processors. PTGui is fully automatic stitching software for Windows and Mac. It will stich any panorama from any lens type. The PTGui free trial version is available for download here.
]]></description>
<dc:subject>image-processing gigapixel software photography to-try</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:fe14ba517613/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:gigapixel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:software"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:photography"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-try"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2504.07092">
    <title>[2504.07092] Are We Done with Object-Centric Learning?</title>
    <dc:date>2026-05-25T12:13:19+00:00</dc:date>
    <link>https://arxiv.org/abs/2504.07092</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Object-centric learning (OCL) seeks to learn representations that only encode an object, isolated from other objects or background cues in a scene. This approach underpins various aims, including out-of-distribution (OOD) generalization, sample-efficient composition, and modeling of structured environments. Most research has focused on developing unsupervised mechanisms that separate objects into discrete slots in the representation space, evaluated using unsupervised object discovery. However, with recent sample-efficient segmentation models, we can separate objects in the pixel space and encode them independently. This achieves remarkable zero-shot performance on OOD object discovery benchmarks, is scalable to foundation models, and can handle a variable number of slots out-of-the-box. Hence, the goal of OCL methods to obtain object-centric representations has been largely achieved. Despite this progress, a key question remains: How does the ability to separate objects within a scene contribute to broader OCL objectives, such as OOD generalization? We address this by investigating the OOD generalization challenge caused by spurious background cues through the lens of OCL. We propose a novel, training-free probe called Object-Centric Classification with Applied Masks (OCCAM), demonstrating that segmentation-based encoding of individual objects significantly outperforms slot-based OCL methods. However, challenges in real-world applications remain. We provide the toolbox for the OCL community to use scalable object-centric representations, and focus on practical applications and fundamental questions, such as understanding object perception in human cognition. Our code is available here: this https URL.
]]></description>
<dc:subject>image-processing image-segmentation machine-learning rather-interesting algorithms neural-networks image-analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:090aa6203061/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2504.11406">
    <title>[2504.11406] Multi-level Cellular Automata for FLIM networks</title>
    <dc:date>2026-05-25T12:01:55+00:00</dc:date>
    <link>https://arxiv.org/abs/2504.11406</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The necessity of abundant annotated data and complex network architectures presents a significant challenge in deep-learning Salient Object Detection (deep SOD) and across the broader deep-learning landscape. This challenge is particularly acute in medical applications in developing countries with limited computational resources. Combining modern and classical techniques offers a path to maintaining competitive performance while enabling practical applications. Feature Learning from Image Markers (FLIM) methodology empowers experts to design convolutional encoders through user-drawn markers, with filters learned directly from these annotations. Recent findings demonstrate that coupling a FLIM encoder with an adaptive decoder creates a flyweight network suitable for SOD, requiring significantly fewer parameters than lightweight models and eliminating the need for backpropagation. Cellular Automata (CA) methods have proven successful in data-scarce scenarios but require proper initialization -- typically through user input, priors, or randomness. We propose a practical intersection of these approaches: using FLIM networks to initialize CA states with expert knowledge without requiring user interaction for each image. By decoding features from each level of a FLIM network, we can initialize multiple CAs simultaneously, creating a multi-level framework. Our method leverages the hierarchical knowledge encoded across different network layers, merging multiple saliency maps into a high-quality final output that functions as a CA ensemble. Benchmarks across two challenging medical datasets demonstrate the competitiveness of our multi-level CA approach compared to established models in the deep SOD literature.
]]></description>
<dc:subject>cellular-automata image-processing rather-interesting to-understand to-simulate consider:representation consider:dynamics metaheuristics classification image-segmentation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:c02405c5a0f1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cellular-automata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:dynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:metaheuristics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2305.11739">
    <title>[2305.11739] Survey of Automatic Plankton Image Recognition: Challenges, Existing Solutions and Future Perspectives</title>
    <dc:date>2025-04-05T23:42:06+00:00</dc:date>
    <link>https://arxiv.org/abs/2305.11739</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Planktonic organisms are key components of aquatic ecosystems and respond quickly to changes in the environment, therefore their monitoring is vital to understand the changes in the environment. Yet, monitoring plankton at appropriate scales still remains a challenge, limiting our understanding of functioning of aquatic systems and their response to changes. Modern plankton imaging instruments can be utilized to sample at high frequencies, enabling novel possibilities to study plankton populations. However, manual analysis of the data is costly, time consuming and expert based, making such approach unsuitable for large-scale application and urging for automatic solutions. The key problem related to the utilization of plankton datasets through image analysis is plankton recognition. Despite the large amount of research done, automatic methods have not been widely adopted for operational use. In this paper, a comprehensive survey on existing solutions for automatic plankton recognition is presented. First, we identify the most notable challenges that that make the development of plankton recognition systems difficult. Then, we provide a detailed description of solutions for these challenges proposed in plankton recognition literature. Finally, we propose a workflow to identify the specific challenges in new datasets and the recommended approaches to address them. For many of the challenges, applicable solutions exist. However, important challenges remain unsolved: 1) the domain shift between the datasets hindering the development of a general plankton recognition system that would work across different imaging instruments, 2) the difficulty to identify and process the images of previously unseen classes, and 3) the uncertainty in expert annotations that affects the training of the machine learning models for recognition. These challenges should be addressed in the future research.
]]></description>
<dc:subject>image-processing biology neural-networks rather-interesting classification annotation data-augmentation to-write-about nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b6fa78292640/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:annotation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-augmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2404.01775">
    <title>[2404.01775] A noisy elephant in the room: Is your out-of-distribution detector robust to label noise?</title>
    <dc:date>2024-12-21T14:53:30+00:00</dc:date>
    <link>https://arxiv.org/abs/2404.01775</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The ability to detect unfamiliar or unexpected images is essential for safe deployment of computer vision systems. In the context of classification, the task of detecting images outside of a model's training domain is known as out-of-distribution (OOD) detection. While there has been a growing research interest in developing post-hoc OOD detection methods, there has been comparably little discussion around how these methods perform when the underlying classifier is not trained on a clean, carefully curated dataset. In this work, we take a closer look at 20 state-of-the-art OOD detection methods in the (more realistic) scenario where the labels used to train the underlying classifier are unreliable (e.g. crowd-sourced or web-scraped labels). Extensive experiments across different datasets, noise types & levels, architectures and checkpointing strategies provide insights into the effect of class label noise on OOD detection, and show that poor separation between incorrectly classified ID samples vs. OOD samples is an overlooked yet important limitation of existing methods. Code: this https URL
]]></description>
<dc:subject>image-processing computer-vision neural-networks your-lying-eyes anomaly-detection rather-interesting robustness noise to-write-about consider:ways-to-be-wrong</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:ade9c6cc5131/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computer-vision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:your-lying-eyes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:anomaly-detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:robustness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:noise"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:ways-to-be-wrong"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2309.14564">
    <title>[2309.14564] Generative Escher Meshes</title>
    <dc:date>2024-10-27T23:25:16+00:00</dc:date>
    <link>https://arxiv.org/abs/2309.14564</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This paper proposes a fully-automatic, text-guided generative method for producing perfectly-repeating, periodic, tile-able 2D imagery, such as the one seen on floors, mosaics, ceramics, and the work of M.C. Escher. In contrast to square texture images that are seamless when tiled, our method generates non-square tilings which comprise solely of repeating copies of the same object. It achieves this by optimizing both geometry and texture of a 2D mesh, yielding a non-square tile in the shape and appearance of the desired object, with close to no additional background details, that can tile the plane without gaps nor overlaps. We enable optimization of the tile's shape by an unconstrained, differentiable parameterization of the space of all valid tileable meshes for given boundary conditions stemming from a symmetry group. Namely, we construct a differentiable family of linear systems derived from a 2D mesh-mapping technique - Orbifold Tutte Embedding - by considering the mesh's Laplacian matrix as differentiable parameters. We prove that the solution space of these linear systems is exactly all possible valid tiling configurations, thereby providing an end-to-end differentiable representation for the entire space of valid tiles. We render the textured mesh via a differentiable renderer, and leverage a pre-trained image diffusion model to induce a loss on the resulting image, updating the mesh's parameters so as to make its appearance match the text prompt. We show our method is able to produce plausible, appealing results, with non-trivial tiles, for a variety of different periodic tiling patterns.
]]></description>
<dc:subject>tiling Escher generative-art rather-interesting purdy-pitchers something-a-little-bit-weird-tho tesselation natural-language-processing image-processing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:12c8743b860a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:tiling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Escher"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-art"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:purdy-pitchers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:something-a-little-bit-weird-tho"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:tesselation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2210.14132">
    <title>[2210.14132] p-adic Cellular Neural Networks: Applications to Image Processing</title>
    <dc:date>2024-09-28T15:43:03+00:00</dc:date>
    <link>https://arxiv.org/abs/2210.14132</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The p-adic cellular neural networks (CNNs) are mathematical generalizations of the neural networks introduced by Chua and Yang in the 80s. In this work we present two new types of CNNs that can perform computations with real data, and whose dynamics can be understood almost completely. The first type of networks are edge detectors for grayscale images. The stationary states of these networks are organized hierarchically in a lattice structure. The dynamics of any of these networks consists of transitions toward some minimal state in the lattice. The second type is a new class of reaction-diffusion networks. We investigate the stability of these networks and show that they can be used as filters to reduce noise, preserving the edges, in grayscale images polluted with additive Gaussian noise. The networks introduced here were found experimentally. They are abstract evolution equations on spaces of real-valued functions defined in the p-adic unit ball for some prime number p. In practical applications the prime p is determined by the size of image, and thus, only small primes are used. We provide several numerical simulations showing how these networks work.
]]></description>
<dc:subject>rather-interesting p-adic-numbers cellular-automata to-understand image-processing approximation algorithms representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:32e459800586/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:p-adic-numbers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cellular-automata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2204.11041">
    <title>[2204.11041] Learning by Erasing: Conditional Entropy based Transferable Out-Of-Distribution Detection</title>
    <dc:date>2024-08-08T18:33:40+00:00</dc:date>
    <link>https://arxiv.org/abs/2204.11041</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Out-of-distribution (OOD) detection is essential to handle the distribution shifts between training and test scenarios. For a new in-distribution (ID) dataset, existing methods require retraining to capture the dataset-specific feature representation or data distribution. In this paper, we propose a deep generative models (DGM) based transferable OOD detection method, which is unnecessary to retrain on a new ID dataset. We design an image erasing strategy to equip exclusive conditional entropy distribution for each ID dataset, which determines the discrepancy of DGM's posteriori ucertainty distribution on different ID datasets. Owing to the powerful representation capacity of convolutional neural networks, the proposed model trained on complex dataset can capture the above discrepancy between ID datasets without retraining and thus achieve transferable OOD detection. We validate the proposed method on five datasets and verity that ours achieves comparable performance to the state-of-the-art group based OOD detection methods that need to be retrained to deploy on new ID datasets. Our code is available at this https URL.
]]></description>
<dc:subject>machine-learning multitask-learning rather-interesting neural-networks image-processing forgetting to-understand consider:symbolic-regression anomaly-detection noise</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:23f2cee91736/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:multitask-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:forgetting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:symbolic-regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:anomaly-detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:noise"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2405.18029">
    <title>[2405.18029] Are Image Distributions Indistinguishable to Humans Indistinguishable to Classifiers?</title>
    <dc:date>2024-08-01T11:52:56+00:00</dc:date>
    <link>https://arxiv.org/abs/2405.18029</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The ultimate goal of generative models is to characterize the data distribution perfectly. For image generation, common metrics of visual quality (e.g., FID), and the truthlikeness of generated images to the human eyes seem to suggest that we are close to achieving it. However, through distribution classification tasks, we find that, in the eyes of classifiers parameterized by neural networks, the strongest diffusion models are still far from this goal. Specifically, classifiers consistently and effortlessly distinguish between real and generated images in various settings. Further, we observe an intriguing discrepancy: classifiers can identify differences between diffusion models with similar performance (e.g., U-ViT-H vs. DiT-XL), but struggle to differentiate between the smallest and largest models in the same family (e.g., EDM2-XS vs. EDM2-XXL), whereas humans exhibit the opposite tendency. As an explanation, our comprehensive empirical study suggests that, unlike humans, classifiers tend to classify images through edge and high-frequency components. We believe that our methodology can serve as a probe to understand how generative models work and inspire further thought on how existing models can be improved and how the abuse of such models can be prevented.
]]></description>
<dc:subject>generative-art machine-learning image-processing rather-interesting who-watches-the-watchbeings? feature-selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:17ff7e674344/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-art"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:who-watches-the-watchbeings?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2212.06313">
    <title>[2212.06313] Metaheuristic-based Energy-aware Image Compression for Mobile App Development</title>
    <dc:date>2023-12-30T13:41:26+00:00</dc:date>
    <link>https://arxiv.org/abs/2212.06313</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The JPEG standard is widely used in different image processing applications. One of the main components of the JPEG standard is the quantisation table (QT) since it plays a vital role in the image properties such as image quality and file size. In recent years, several efforts based on population-based metaheuristic (PBMH) algorithms have been performed to find the proper QT(s) for a specific image, although they do not take into consideration the user opinion in advance. Take an android developer as an example, who prefers a small-size image, while the optimisation process results in a high-quality image, leading to a huge file size. Another pitfall of the current works is a lack of comprehensive coverage, meaning that the QT(s) can not provide all possible combinations of file size and quality. Therefore, this paper aims to propose three distinct contributions. First, to include the user opinion in the compression process, the file size of the output image can be controlled by a user in advance. To this end, we propose a novel objective function for population-based JPEG image compression. Second, to tackle the lack of comprehensive coverage, we suggest a novel representation. Our proposed representation can not only provide more comprehensive coverage but also find the proper value for the quality factor for a specific image without any background knowledge. Both changes in representation and objective function are independent of the search strategies and can be used with any type of population-based metaheuristic (PBMH) algorithm. Therefore, as the third contribution, we also provide a comprehensive benchmark on 22 state-of-the-art and recently-introduced PBMH algorithms. Our extensive experiments on different benchmark images and in terms of different criteria show that our novel formulation for JPEG image compression can work effectively.
]]></description>
<dc:subject>image-processing compression multiobjective-optimization rather-interesting metaheuristics to-write-about nudge-targets consider:animation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:e5e672974303/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:compression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:multiobjective-optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:metaheuristics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:animation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2110.14968">
    <title>[2110.14968] DocScanner: Robust Document Image Rectification with Progressive Learning</title>
    <dc:date>2023-09-17T18:35:38+00:00</dc:date>
    <link>https://arxiv.org/abs/2110.14968</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Compared with flatbed scanners, portable smartphones provide more convenience for physical document digitization. However, such digitized documents are often distorted due to uncontrolled physical deformations, camera positions, and illumination variations. To this end, we present DocScanner, a novel framework for document image rectification. Different from existing solutions, DocScanner addresses this issue by introducing a progressive learning mechanism. Specifically, DocScanner maintains a single estimate of the rectified image, which is progressively corrected with a recurrent architecture. The iterative refinements make DocScanner converge to a robust and superior rectification performance, while the lightweight recurrent architecture ensures the running efficiency. To further improve the rectification quality, based on the geometric priori between the distorted and the rectified images, a geometric regularization is introduced during training to further improve the performance. Extensive experiments are conducted on the Doc3D dataset and the DocUNet Benchmark dataset, and the quantitative and qualitative evaluation results verify the effectiveness of DocScanner, which outperforms previous methods on OCR accuracy, image similarity, and our proposed distortion metric by a considerable margin. Furthermore, our DocScanner shows superior efficiency in runtime latency and model size.
]]></description>
<dc:subject>OCR image-processing algorithms self-organization rather-interesting digitization digital-humanities to-understand to-write-about consider:stitching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:50e950233055/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:self-organization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digitization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stitching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2205.09363">
    <title>[2205.09363] Plane Geometry Diagram Parsing</title>
    <dc:date>2023-05-16T11:10:26+00:00</dc:date>
    <link>https://arxiv.org/abs/2205.09363</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Geometry diagram parsing plays a key role in geometry problem solving, wherein the primitive extraction and relation parsing remain challenging due to the complex layout and between-primitive relationship. In this paper, we propose a powerful diagram parser based on deep learning and graph reasoning. Specifically, a modified instance segmentation method is proposed to extract geometric primitives, and the graph neural network (GNN) is leveraged to realize relation parsing and primitive classification incorporating geometric features and prior knowledge. All the modules are integrated into an end-to-end model called PGDPNet to perform all the sub-tasks simultaneously. In addition, we build a new large-scale geometry diagram dataset named PGDP5K with primitive level annotations. Experiments on PGDP5K and an existing dataset IMP-Geometry3K show that our model outperforms state-of-the-art methods in four sub-tasks remarkably. Our code, dataset and appendix material are available at this https URL.
]]></description>
<dc:subject>image-processing neural-networks plane-geometry abstraction rather-interesting special-cases to-understand to-simulate consider:representation consider:ontology</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:7cbc6bcfd47c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:plane-geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:abstraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:special-cases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:ontology"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2106.09259">
    <title>[2106.09259] A Random CNN Sees Objects: One Inductive Bias of CNN and Its Applications</title>
    <dc:date>2023-04-30T00:34:17+00:00</dc:date>
    <link>https://arxiv.org/abs/2106.09259</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This paper starts by revealing a surprising finding: without any learning, a randomly initialized CNN can localize objects surprisingly well. That is, a CNN has an inductive bias to naturally focus on objects, named as Tobias ("The object is at sight") in this paper. This empirical inductive bias is further analyzed and successfully applied to self-supervised learning (SSL). A CNN is encouraged to learn representations that focus on the foreground object, by transforming every image into various versions with different backgrounds, where the foreground and background separation is guided by Tobias. Experimental results show that the proposed Tobias significantly improves downstream tasks, especially for object detection. This paper also shows that Tobias has consistent improvements on training sets of different sizes, and is more resilient to changes in image augmentations. Code is available at this https URL.
]]></description>
<dc:subject>image-processing neural-networks convolutional-networks rather-interesting self-organization guessing to-follow</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:a4f65f1e1feb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:convolutional-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:self-organization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:guessing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-follow"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2202.09179">
    <title>[2202.09179] Incorporating Texture Information into Dimensionality Reduction for High-Dimensional Images</title>
    <dc:date>2023-02-09T12:39:53+00:00</dc:date>
    <link>https://arxiv.org/abs/2202.09179</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[High-dimensional imaging is becoming increasingly relevant in many fields from astronomy and cultural heritage to systems biology. Visual exploration of such high-dimensional data is commonly facilitated by dimensionality reduction. However, common dimensionality reduction methods do not include spatial information present in images, such as local texture features, into the construction of low-dimensional embeddings. Consequently, exploration of such data is typically split into a step focusing on the attribute space followed by a step focusing on spatial information, or vice versa. In this paper, we present a method for incorporating spatial neighborhood information into distance-based dimensionality reduction methods, such as t-Distributed Stochastic Neighbor Embedding (t-SNE). We achieve this by modifying the distance measure between high-dimensional attribute vectors associated with each pixel such that it takes the pixel's spatial neighborhood into account. Based on a classification of different methods for comparing image patches, we explore a number of different approaches. We compare these approaches from a theoretical and experimental point of view. Finally, we illustrate the value of the proposed methods by qualitative and quantitative evaluation on synthetic data and two real-world use cases.
]]></description>
<dc:subject>image-processing representation rather-interesting patterns machine-learning clustering to-write-about to-visualize consider:nonlocality</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8c6ec7deec3b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:patterns"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-visualize"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:nonlocality"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2102.01690">
    <title>[2102.01690] From Culture to Clothing: Discovering the World Events Behind A Century of Fashion Images</title>
    <dc:date>2022-05-14T10:44:17+00:00</dc:date>
    <link>https://arxiv.org/abs/2102.01690</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Fashion is intertwined with external cultural factors, but identifying these links remains a manual process limited to only the most salient phenomena. We propose a data-driven approach to identify specific cultural factors affecting the clothes people wear. Using large-scale datasets of news articles and vintage photos spanning a century, we present a multi-modal statistical model to detect influence relationships between happenings in the world and people's choice of clothing. Furthermore, on two image datasets we apply our model to improve the concrete vision tasks of visual style forecasting and photo timestamping. Our work is a first step towards a computational, scalable, and easily refreshable approach to link culture to clothing.
]]></description>
<dc:subject>digital-humanities image-processing rather-interesting text-mining to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:58157a36d37f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:text-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2105.07512">
    <title>[2105.07512] Substitutional Neural Image Compression</title>
    <dc:date>2022-03-17T10:52:01+00:00</dc:date>
    <link>https://arxiv.org/abs/2105.07512</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We describe Substitutional Neural Image Compression (SNIC), a general approach for enhancing any neural image compression model, that requires no data or additional tuning of the trained model. It boosts compression performance toward a flexible distortion metric and enables bit-rate control using a single model instance. The key idea is to replace the image to be compressed with a substitutional one that outperforms the original one in a desired way. Finding such a substitute is inherently difficult for conventional codecs, yet surprisingly favorable for neural compression models thanks to their fully differentiable structures. With gradients of a particular loss backpropogated to the input, a desired substitute can be efficiently crafted iteratively. We demonstrate the effectiveness of SNIC, when combined with various neural compression models and target metrics, in improving compression quality and performing bit-rate control measured by rate-distortion curves. Empirical results of control precision and generation speed are also discussed.
]]></description>
<dc:subject>image-processing neural-networks generative-models approximation compression to-write-about to-visualize consider:performance-measures machine-learning computational-complexity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:e52739abcbfa/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:compression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-visualize"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-complexity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.07846">
    <title>[1908.07846] Representing text as abstract images enables image classifiers to also simultaneously classify text</title>
    <dc:date>2022-03-11T11:37:01+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.07846</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We introduce a novel method for converting text data into abstract image representations, which allows image-based processing techniques (e.g. image classification networks) to be applied to text-based comparison problems. We apply the technique to entity disambiguation of inventor names in US patents. The method involves converting text from each pairwise comparison between two inventor name records into a 2D RGB (stacked) image representation. We then train an image classification neural network to discriminate between such pairwise comparison images, and use the trained network to label each pair of records as either matched (same inventor) or non-matched (different inventors), obtaining highly accurate results. Our new text-to-image representation method could also be used more broadly for other NLP comparison problems, such as disambiguation of academic publications, or for problems that require simultaneous classification of both text and image datasets.
]]></description>
<dc:subject>neural-networks strings image-processing representation rather-interesting to-write-about to-visualize consider:genetic-programming consider:robustness</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:eac50b22a9e5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:strings"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-visualize"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:robustness"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2009.04433">
    <title>[2009.04433] not-so-BigGAN: Generating High-Fidelity Images on Small Compute with Wavelet-based Super-Resolution</title>
    <dc:date>2022-03-09T17:34:00+00:00</dc:date>
    <link>https://arxiv.org/abs/2009.04433</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[State-of-the-art models for high-resolution image generation, such as BigGAN and VQVAE-2, require an incredible amount of compute resources and/or time (512 TPU-v3 cores) to train, putting them out of reach for the larger research community. On the other hand, GAN-based image super-resolution models, such as ESRGAN, can not only upscale images to high dimensions, but also are efficient to train. In this paper, we present not-so-big-GAN (nsb-GAN), a simple yet cost-effective two-step training framework for deep generative models (DGMs) of high-dimensional natural images. First, we generate images in low-frequency bands by training a sampler in the wavelet domain. Then, we super-resolve these images from the wavelet domain back to the pixel-space with our novel wavelet super-resolution decoder network. Wavelet-based down-sampling method preserves more structural information than pixel-based methods, leading to significantly better generative quality of the low-resolution sampler (e.g., 64x64). Since the sampler and decoder can be trained in parallel and operate on much lower dimensional spaces than end-to-end models, the training cost is substantially reduced. On ImageNet 512x512, our model achieves a Fréchet Inception Distance (FID) of 10.59 -- beating the baseline BigGAN model -- at half the compute (256 TPU-v3 cores).
]]></description>
<dc:subject>generative-models superresolution image-processing neural-networks rather-interesting representation wavelets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:f735e9e3e5aa/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:superresolution"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:wavelets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1711.08995">
    <title>[1711.08995] Unsupervised Domain Adaptation with Similarity Learning</title>
    <dc:date>2022-02-17T11:27:37+00:00</dc:date>
    <link>https://arxiv.org/abs/1711.08995</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The objective of unsupervised domain adaptation is to leverage features from a labeled source domain and learn a classifier for an unlabeled target domain, with a similar but different data distribution. Most deep learning approaches to domain adaptation consist of two steps: (i) learn features that preserve a low risk on labeled samples (source domain) and (ii) make the features from both domains to be as indistinguishable as possible, so that a classifier trained on the source can also be applied on the target domain. In general, the classifiers in step (i) consist of fully-connected layers applied directly on the indistinguishable features learned in (ii). In this paper, we propose a different way to do the classification, using similarity learning. The proposed method learns a pairwise similarity function in which classification can be performed by computing similarity between prototype representations of each category. The domain-invariant features and the categorical prototype representations are learned jointly and in an end-to-end fashion. At inference time, images from the target domain are compared to the prototypes and the label associated with the one that best matches the image is outputed. The approach is simple, scalable and effective. We show that our model achieves state-of-the-art performance in different unsupervised domain adaptation scenarios.
]]></description>
<dc:subject>performance-space-analysis neural-networks image-processing clustering</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:2631df62f5fe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-space-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2011.02904">
    <title>[2011.02904] Hyperrealistic Image Inpainting with Hypergraphs</title>
    <dc:date>2022-02-17T11:17:29+00:00</dc:date>
    <link>https://arxiv.org/abs/2011.02904</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Image inpainting is a non-trivial task in computer vision due to multiple possibilities for filling the missing data, which may be dependent on the global information of the image. Most of the existing approaches use the attention mechanism to learn the global context of the image. This attention mechanism produces semantically plausible but blurry results because of incapability to capture the global context. In this paper, we introduce hypergraph convolution on spatial features to learn the complex relationship among the data. We introduce a trainable mechanism to connect nodes using hyperedges for hypergraph convolution. To the best of our knowledge, hypergraph convolution have never been used on spatial features for any image-to-image tasks in computer vision. Further, we introduce gated convolution in the discriminator to enforce local consistency in the predicted image. The experiments on Places2, CelebA-HQ, Paris Street View, and Facades datasets, show that our approach achieves state-of-the-art results.
]]></description>
<dc:subject>image-processing hypergraphs neural-networks representation to-understand generative-models hyperresolution image-synthesis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:7f4db679c359/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hypergraphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hyperresolution"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-synthesis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2011.07048">
    <title>[2011.07048] Using Graph Neural Networks to Reconstruct Ancient Documents</title>
    <dc:date>2022-02-05T13:13:25+00:00</dc:date>
    <link>https://arxiv.org/abs/2011.07048</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In recent years, machine learning and deep learning approaches such as artificial neural networks have gained in popularity for the resolution of automatic puzzle resolution problems. Indeed, these methods are able to extract high-level representations from images, and then can be trained to separate matching image pieces from non-matching ones. These applications have many similarities to the problem of ancient document reconstruction from partially recovered fragments. In this work we present a solution based on a Graph Neural Network, using pairwise patch information to assign labels to edges representing the spatial relationships between pairs. This network classifies the relationship between a source and a target patch as being one of Up, Down, Left, Right or None. By doing so for all edges, our model outputs a new graph representing a reconstruction proposal. Finally, we show that our model is not only able to provide correct classifications at the edge-level, but also to generate partial or full reconstruction graphs from a set of patches.
]]></description>
<dc:subject>neural-networks puzzles rather-interesting image-processing representation to-write-about consider:representation consider:other-metaheuristics consider:performance-measures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:46d1bc8604f3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:puzzles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:other-metaheuristics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2109.07683">
    <title>[2109.07683] Intuitive and Efficient Roof Modeling for Reconstruction and Synthesis</title>
    <dc:date>2022-01-27T11:43:08+00:00</dc:date>
    <link>https://arxiv.org/abs/2109.07683</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We propose a novel and flexible roof modeling approach that can be used for constructing planar 3D polygon roof meshes. Our method uses a graph structure to encode roof topology and enforces the roof validity by optimizing a simple but effective planarity metric we propose. This approach is significantly more efficient than using general purpose 3D modeling tools such as 3ds Max or SketchUp, and more powerful and expressive than specialized tools such as the straight skeleton. Our optimization-based formulation is also flexible and can accommodate different styles and user preferences for roof modeling. We showcase two applications. The first application is an interactive roof editing framework that can be used for roof design or roof reconstruction from aerial images. We highlight the efficiency and generality of our approach by constructing a mesh-image paired dataset consisting of 2539 roofs. Our second application is a generative model to synthesize new roof meshes from scratch. We use our novel dataset to combine machine learning and our roof optimization techniques, by using transformers and graph convolutional networks to model roof topology, and our roof optimization methods to enforce the planarity constraint.
]]></description>
<dc:subject>generative-models architecture image-processing rather-interesting algorithms to-write-about consider:similar-for-typography</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:2ea2c5698a2a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:similar-for-typography"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1708.06899">
    <title>[1708.06899] Human experts vs. machines in taxa recognition</title>
    <dc:date>2022-01-26T13:56:44+00:00</dc:date>
    <link>https://arxiv.org/abs/1708.06899</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The step of expert taxa recognition currently slows down the response time of many bioassessments. Shifting to quicker and cheaper state-of-the-art machine learning approaches is still met with expert scepticism towards the ability and logic of machines. In our study, we investigate both the differences in accuracy and in the identification logic of taxonomic experts and machines. We propose a systematic approach utilizing deep Convolutional Neural Nets with the transfer learning paradigm and extensively evaluate it over a multi-pose taxonomic dataset with hierarchical labels specifically created for this comparison. We also study the prediction accuracy on different ranks of taxonomic hierarchy in detail. Our results revealed that human experts using actual specimens yield the lowest classification error (CE⎯⎯⎯⎯⎯⎯⎯⎯=6.1%). However, a much faster, automated approach using deep Convolutional Neural Nets comes close to human accuracy (CE⎯⎯⎯⎯⎯⎯⎯⎯=11.4%). Contrary to previous findings in the literature, we find that for machines following a typical flat classification approach commonly used in machine learning performs better than forcing machines to adopt a hierarchical, local per parent node approach used by human taxonomic experts. Finally, we publicly share our unique dataset to serve as a public benchmark dataset in this field.
]]></description>
<dc:subject>machine-learning taxonomy field-guides natural-history image-processing classification applications-of-machine-learning field-biology rather-interesting see-also:flowers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:294b683d5103/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:taxonomy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:field-guides"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-history"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:applications-of-machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:field-biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:see-also:flowers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.14575">
    <title>[2104.14575] Unsupervised Layered Image Decomposition into Object Prototypes</title>
    <dc:date>2022-01-24T13:58:32+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.14575</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We present an unsupervised learning framework for decomposing images into layers of automatically discovered object models. Contrary to recent approaches that model image layers with autoencoder networks, we represent them as explicit transformations of a small set of prototypical images. Our model has three main components: (i) a set of object prototypes in the form of learnable images with a transparency channel, which we refer to as sprites; (ii) differentiable parametric functions predicting occlusions and transformation parameters necessary to instantiate the sprites in a given image; (iii) a layered image formation model with occlusion for compositing these instances into complete images including background. By jointly learning the sprites and occlusion/transformation predictors to reconstruct images, our approach not only yields accurate layered image decompositions, but also identifies object categories and instance parameters. We first validate our approach by providing results on par with the state of the art on standard multi-object synthetic benchmarks (Tetrominoes, Multi-dSprites, CLEVR6). We then demonstrate the applicability of our model to real images in tasks that include clustering (SVHN, GTSRB), cosegmentation (Weizmann Horse) and object discovery from unfiltered social network images. To the best of our knowledge, our approach is the first layered image decomposition algorithm that learns an explicit and shared concept of object type, and is robust enough to be applied to real images.
]]></description>
<dc:subject>machine-learning neural-networks image-processing computer-vision algorithms rather-interesting autoencoders to-understand to-write-about to-simulate consider:cut-and-paste-service</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:fa99f080eb39/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computer-vision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:autoencoders"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:cut-and-paste-service"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1801.08267">
    <title>[1801.08267] Visual Weather Temperature Prediction</title>
    <dc:date>2021-12-19T13:15:34+00:00</dc:date>
    <link>https://arxiv.org/abs/1801.08267</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper, we attempt to employ convolutional recurrent neural networks for weather temperature estimation using only image data. We study ambient temperature estimation based on deep neural networks in two scenarios a) estimating temperature of a single outdoor image, and b) predicting temperature of the last image in an image sequence. In the first scenario, visual features are extracted by a convolutional neural network trained on a large-scale image dataset. We demonstrate that promising performance can be obtained, and analyze how volume of training data influences performance. In the second scenario, we consider the temporal evolution of visual appearance, and construct a recurrent neural network to predict the temperature of the last image in a given image sequence. We obtain better prediction accuracy compared to the state-of-the-art models. Further, we investigate how performance varies when information is extracted from different scene regions, and when images are captured in different daytime hours. Our approach further reinforces the idea of using only visual information for cost efficient weather prediction in the future.
]]></description>
<dc:subject>image-processing neural-networks computer-vision machine-learning rather-interesting consider:layers-of-deduction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:0823c4c5e603/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computer-vision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:layers-of-deduction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1611.10152">
    <title>[1611.10152] Combining Data-driven and Model-driven Methods for Robust Facial Landmark Detection</title>
    <dc:date>2021-10-01T20:57:56+00:00</dc:date>
    <link>https://arxiv.org/abs/1611.10152</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Facial landmark detection is an important yet challenging task for real-world computer vision applications. This paper proposes an effective and robust approach for facial landmark detection by combining data- and model-driven methods. Firstly, a Fully Convolutional Network (FCN) is trained to compute response maps of all facial landmark points. Such a data-driven method could make full use of holistic information in a facial image for global estimation of facial landmarks. After that, the maximum points in the response maps are fitted with a pre-trained Point Distribution Model (PDM) to generate the initial facial shape. This model-driven method is able to correct the inaccurate locations of outliers by considering the shape prior information. Finally, a weighted version of Regularized Landmark Mean-Shift (RLMS) is employed to fine-tune the facial shape iteratively. This Estimation-Correction-Tuning process perfectly combines the advantages of the global robustness of data-driven method (FCN), outlier correction capability of model-driven method (PDM) and non-parametric optimization of RLMS. Results of extensive experiments demonstrate that our approach achieves state-of-the-art performances on challenging datasets including 300W, AFLW, AFW and COFW. The proposed method is able to produce satisfying detection results on face images with exaggerated expressions, large head poses, and partial occlusions.
]]></description>
<dc:subject>face-recognition image-processing rather-interesting optimization algorithms machine-learning landmark-detection to-write-about consider:feature-discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:15d538da92d2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:face-recognition"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:landmark-detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1906.05856">
    <title>[1906.05856] Detecting Photoshopped Faces by Scripting Photoshop</title>
    <dc:date>2021-07-16T10:27:24+00:00</dc:date>
    <link>https://arxiv.org/abs/1906.05856</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Most malicious photo manipulations are created using standard image editing tools, such as Adobe Photoshop. We present a method for detecting one very popular Photoshop manipulation -- image warping applied to human faces -- using a model trained entirely using fake images that were automatically generated by scripting Photoshop itself. We show that our model outperforms humans at the task of recognizing manipulated images, can predict the specific location of edits, and in some cases can be used to "undo" a manipulation to reconstruct the original, unedited image. We demonstrate that the system can be successfully applied to real, artist-created image manipulations.
]]></description>
<dc:subject>image-processing rather-interesting forensics data-forensics neural-networks deep-learning machine-learning to-understand consider:metadata consider:patchiness</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:dddeac03dcf9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:forensics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-forensics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:metadata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:patchiness"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2010.01238">
    <title>[2010.01238] A Deep Genetic Programming based Methodology for Art Media Classification Robust to Adversarial Perturbations</title>
    <dc:date>2021-06-27T12:08:53+00:00</dc:date>
    <link>https://arxiv.org/abs/2010.01238</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Art Media Classification problem is a current research area that has attracted attention due to the complex extraction and analysis of features of high-value art pieces. The perception of the attributes can not be subjective, as humans sometimes follow a biased interpretation of artworks while ensuring automated observation's trustworthiness. Machine Learning has outperformed many areas through its learning process of artificial feature extraction from images instead of designing handcrafted feature detectors. However, a major concern related to its reliability has brought attention because, with small perturbations made intentionally in the input image (adversarial attack), its prediction can be completely changed. In this manner, we foresee two ways of approaching the situation: (1) solve the problem of adversarial attacks in current neural networks methodologies, or (2) propose a different approach that can challenge deep learning without the effects of adversarial attacks. The first one has not been solved yet, and adversarial attacks have become even more complex to defend. Therefore, this work presents a Deep Genetic Programming method, called Brain Programming, that competes with deep learning and studies the transferability of adversarial attacks using two artworks databases made by art experts. The results show that the Brain Programming method preserves its performance in comparison with AlexNet, making it robust to these perturbations and competing to the performance of Deep Learning.
]]></description>
<dc:subject>genetic-programming image-processing rather-interesting deep-learning adversarial-methods security to-write-about consider:representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:48fb04d3d3e7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:adversarial-methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.09444">
    <title>[2012.09444] Learning and Sharing: A Multitask Genetic Programming Approach to Image Feature Learning</title>
    <dc:date>2021-05-09T11:44:42+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.09444</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Using evolutionary computation algorithms to solve multiple tasks with knowledge sharing is a promising approach. Image feature learning can be considered as a multitask problem because different tasks may have a similar feature space. Genetic programming (GP) has been successfully applied to image feature learning for classification. However, most of the existing GP methods solve one task, independently, using sufficient training data. No multitask GP method has been developed for image feature learning. Therefore, this paper develops a multitask GP approach to image feature learning for classification with limited training data. Owing to the flexible representation of GP, a new knowledge sharing mechanism based on a new individual representation is developed to allow GP to automatically learn what to share across two tasks and to improve its learning performance. The shared knowledge is encoded as a common tree, which can represent the common/general features of two tasks. With the new individual representation, each task is solved using the features extracted from a common tree and a task-specific tree representing task-specific features. To learn the best common and task-specific trees, a new evolutionary process and new fitness functions are developed. The performance of the proposed approach is examined on six multitask problems of 12 image classification datasets with limited training data and compared with three GP and 14 non-GP-based competitive methods. Experimental results show that the new approach outperforms these compared methods in almost all the comparisons. Further analysis reveals that the new approach learns simple yet effective common trees with high effectiveness and transferability.
]]></description>
<dc:subject>machine-learning genetic-programming multi-task-learning image-processing image-segmentation representation rather-interesting to-try</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:a2acce57a15f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:multi-task-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-try"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.11780">
    <title>[2012.11780] Towards an Automatic System for Extracting Planar Orientations from Software Generated Point Clouds</title>
    <dc:date>2021-05-09T11:42:24+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.11780</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In geology, a key activity is the characterisation of geological structures (surface formation topology and rock units) using Planar Orientation measurements such as Strike, Dip and Dip Direction. In general these measurements are collected manually using basic equipment; usually a compass/clinometer and a backboard, recorded on a map by hand. Various computing techniques and technologies, such as Lidar, have been utilised in order to automate this process and update the collection paradigm for these types of measurements. Techniques such as Structure from Motion (SfM) reconstruct of scenes and objects by generating a point cloud from input images, with detailed reconstruction possible on the decimetre scale. SfM-type techniques provide advantages in areas of cost and usability in more varied environmental conditions, while sacrificing the extreme levels of data fidelity. Here is presented a methodology of data acquisition and a Machine Learning-based software system: GeoStructure, developed to automate the measurement of orientation measurements. Rather than deriving measurements using a method applied to the input images, such as the Hough Transform, this method takes measurements directly from the reconstructed point cloud surfaces. Point cloud noise is mitigated using a Mahalanobis distance implementation. Significant structure is characterised using a k-nearest neighbour region growing algorithm, and final surface orientations are quantified using the plane, and normal direction cosines.
]]></description>
<dc:subject>machine-learning inference data-analysis geology image-processing rather-interesting constraint-satisfaction performance-measure</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8a8c856e99b0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:geology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:constraint-satisfaction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2006.03829">
    <title>[2006.03829] 3D Self-Supervised Methods for Medical Imaging</title>
    <dc:date>2021-02-10T20:33:03+00:00</dc:date>
    <link>https://arxiv.org/abs/2006.03829</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Self-supervised learning methods have witnessed a recent surge of interest after proving successful in multiple application fields. In this work, we leverage these techniques, and we propose 3D versions for five different self-supervised methods, in the form of proxy tasks. Our methods facilitate neural network feature learning from unlabeled 3D images, aiming to reduce the required cost for expert annotation. The developed algorithms are 3D Contrastive Predictive Coding, 3D Rotation prediction, 3D Jigsaw puzzles, Relative 3D patch location, and 3D Exemplar networks. Our experiments show that pretraining models with our 3D tasks yields more powerful semantic representations, and enables solving downstream tasks more accurately and efficiently, compared to training the models from scratch and to pretraining them on 2D slices. We demonstrate the effectiveness of our methods on three downstream tasks from the medical imaging domain: i) Brain Tumor Segmentation from 3D MRI, ii) Pancreas Tumor Segmentation from 3D CT, and iii) Diabetic Retinopathy Detection from 2D Fundus images. In each task, we assess the gains in data-efficiency, performance, and speed of convergence. Interestingly, we also find gains when transferring the learned representations, by our methods, from a large unlabeled 3D corpus to a small downstream-specific dataset. We achieve results competitive to state-of-the-art solutions at a fraction of the computational expense. We publish our implementations for the developed algorithms (both 3D and 2D versions) as an open-source library, in an effort to allow other researchers to apply and extend our methods on their datasets.
]]></description>
<dc:subject>image-processing tomography rather-interesting operations-research optimization machine-learning medical-technology</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:afedd94e2c56/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:tomography"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:operations-research"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:medical-technology"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://libvips.github.io/libvips/">
    <title>libvips</title>
    <dc:date>2021-01-25T14:38:11+00:00</dc:date>
    <link>https://libvips.github.io/libvips/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[libvips is a demand-driven, horizontally threaded image processing library. Compared to similar libraries, libvips runs quickly and uses little memory. libvips is licensed under the LGPL 2.1+.

It has around 300 operations covering arithmetic, histograms, convolution, morphological operations, frequency filtering, colour, resampling, statistics and others. It supports a large range of numeric formats, from 8-bit int to 128-bit complex. Images can have any number of bands. It supports a good range of image formats, including JPEG, TIFF, PNG, WebP, FITS, Matlab, OpenEXR, PDF, SVG, HDR, PPM, CSV, GIF, Analyze, NIfTI, DeepZoom, and OpenSlide. It can also load images via ImageMagick or GraphicsMagick, letting it load formats like DICOM.

]]></description>
<dc:subject>graphics library image-processing to-understand to-use digitization consider:deskew</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:50564fcc2aee/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graphics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-use"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digitization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:deskew"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.03856">
    <title>[1909.03856] On the Evaluation and Real-World Usage Scenarios of Deep Vessel Segmentation for Retinography</title>
    <dc:date>2020-10-07T15:50:46+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.03856</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We identify and address three research gaps in the field of vessel segmentation for funduscopy. The first focuses on the task of inference on high-resolution fundus images for which only a limited set of ground-truth data is publicly available. Notably, we highlight that simple rescaling and padding or cropping of lower resolution datasets is surprisingly effective. Additionally we explore the effectiveness of semi-supervised learning for better domain adaptation. Our results show competitive performance on a set of common public retinal vessel datasets using a small and light-weight neural network. For HRF, the only very high-resolution dataset currently available, we reach new state-of-the-art performance by solely relying on training images from lower-resolution datasets. The second topic concerns evaluation metrics. We investigate the variability of the F1-score on the existing datasets and report results for recent SOTA architectures. Our evaluation show that most SOTA results are actually comparable to each other in performance. Last, we address the issue of reproducibility by open-sourcing our complete pipeline.
]]></description>
<dc:subject>image-processing medical-technology rather-interesting deep-learning to-write-about to-simulate consider:feature-discovery consider:training-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8ef8e69f9d0b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:medical-technology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:training-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1702.03505">
    <title>[1702.03505] A Novel Weight-Shared Multi-Stage CNN for Scale Robustness</title>
    <dc:date>2020-10-03T12:18:09+00:00</dc:date>
    <link>https://arxiv.org/abs/1702.03505</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Convolutional neural networks (CNNs) have demonstrated remarkable results in image classification for benchmark tasks and practical applications. The CNNs with deeper architectures have achieved even higher performance recently thanks to their robustness to the parallel shift of objects in images as well as their numerous parameters and the resulting high expression ability. However, CNNs have a limited robustness to other geometric transformations such as scaling and rotation. This limits the performance improvement of the deep CNNs, but there is no established solution. This study focuses on scale transformation and proposes a network architecture called the weight-shared multi-stage network (WSMS-Net), which consists of multiple stages of CNNs. The proposed WSMS-Net is easily combined with existing deep CNNs such as ResNet and DenseNet and enables them to acquire robustness to object scaling. Experimental results on the CIFAR-10, CIFAR-100, and ImageNet datasets demonstrate that existing deep CNNs combined with the proposed WSMS-Net achieve higher accuracies for image classification tasks with only a minor increase in the number of parameters and computation time.
]]></description>
<dc:subject>image-processing machine-learning deep-learning classification transducers rather-interesting algorithms separation-of-concerns specialization-in-neural-layers to-write-about to-simulate</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:074be22d1ab3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:transducers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:separation-of-concerns"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:specialization-in-neural-layers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2005.02152">
    <title>[2005.02152] Augmented Semantic Signatures of Airborne LiDAR Point Clouds for Comparison</title>
    <dc:date>2020-09-23T14:27:19+00:00</dc:date>
    <link>https://arxiv.org/abs/2005.02152</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[LiDAR point clouds provide rich geometric information, which is particularly useful for the analysis of complex scenes of urban regions. Finding structural and semantic differences between two different three-dimensional point clouds, say, of the same region but acquired at different time instances is an important problem. A comparison of point clouds involves computationally expensive registration and segmentation. We are interested in capturing the relative differences in the geometric uncertainty and semantic content of the point cloud without the registration process. Hence, we propose an orientation-invariant geometric signature of the point cloud, which integrates its probabilistic geometric and semantic classifications. We study different properties of the geometric signature, which are an image-based encoding of geometric uncertainty and semantic content. We explore different metrics to determine differences between these signatures, which in turn compare point clouds without performing point-to-point registration. Our results show that the differences in the signatures corroborate with the geometric and semantic differences of the point clouds.
]]></description>
<dc:subject>computer-vision image-processing image-segmentation classification rather-interesting machine-learning performance-measure looking-to-see to-write-about consider:stochastic-resonance consider:noise-methids</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:e52b3e458b24/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computer-vision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stochastic-resonance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:noise-methids"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1903.12003">
    <title>[1903.12003] High Fidelity Face Manipulation with Extreme Poses and Expressions</title>
    <dc:date>2020-07-21T20:45:12+00:00</dc:date>
    <link>https://arxiv.org/abs/1903.12003</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Face manipulation has shown remarkable advances with the flourish of Generative Adversarial Networks. However, due to the difficulties of controlling structures and textures, it is challenging to model poses and expressions simultaneously, especially for the extreme manipulation at high-resolution. In this paper, we propose a novel framework that simplifies face manipulation into two correlated stages: a boundary prediction stage and a disentangled face synthesis stage. The first stage models poses and expressions jointly via boundary images. Specifically, a conditional encoder-decoder network is employed to predict the boundary image of the target face in a semi-supervised way. Pose and expression estimators are introduced to improve the prediction performance. In the second stage, the predicted boundary image and the input face image are encoded into the structure and the texture latent space by two encoder networks, respectively. A proxy network and a feature threshold loss are further imposed to disentangle the latent space. Furthermore, due to the lack of high-resolution face manipulation databases to verify the effectiveness of our method, we collect a new high-quality Multi-View Face (MVF-HQ) database. It contains 120,283 images at 6000x4000 resolution from 479 identities with diverse poses, expressions, and illuminations. MVF-HQ is much larger in scale and much higher in resolution than publicly available high-resolution face manipulation databases. We will release MVF-HQ soon to push forward the advance of face manipulation. Qualitative and quantitative experiments on four databases show that our method dramatically improves the synthesis quality.
]]></description>
<dc:subject>neural-networks image-processing image-synthesis generative-models face-recognition virtuality to-write-about consider:implementation consider:realtime</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:79389e231dbd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-synthesis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:face-recognition"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:virtuality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:implementation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:realtime"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1912.11370">
    <title>[1912.11370] Big Transfer (BiT): General Visual Representation Learning</title>
    <dc:date>2020-06-14T12:06:08+00:00</dc:date>
    <link>https://arxiv.org/abs/1912.11370</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
]]></description>
<dc:subject>image-processing computer-vision machine-learning transfer-learning algorithms representation rather-interesting to-write-about to-generalize consider:feature-discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3c4bd990129b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computer-vision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:transfer-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-generalize"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1711.09670">
    <title>[1711.09670] Improving OCR Accuracy on Early Printed Books by utilizing Cross Fold Training and Voting</title>
    <dc:date>2020-05-04T11:52:46+00:00</dc:date>
    <link>https://arxiv.org/abs/1711.09670</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper we introduce a method that significantly reduces the character error rates for OCR text obtained from OCRopus models trained on early printed books. The method uses a combination of cross fold training and confidence based voting. After allocating the available ground truth in different subsets several training processes are performed, each resulting in a specific OCR model. The OCR text generated by these models then gets voted to determine the final output by taking the recognized characters, their alternatives, and the confidence values assigned to each character into consideration. Experiments on seven early printed books show that the proposed method outperforms the standard approach considerably by reducing the amount of errors by up to 50% and more.
]]></description>
<dc:subject>OCR image-processing machine-learning collective-intelligence metaheuristics to-write-about to-simulate consider:preprocessing consider:performance-measures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:01a08cd7c3af/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:collective-intelligence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:metaheuristics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:preprocessing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://developer.apple.com/documentation/accelerate">
    <title>Accelerate | Apple Developer Documentation</title>
    <dc:date>2020-02-19T11:35:34+00:00</dc:date>
    <link>https://developer.apple.com/documentation/accelerate</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Make large-scale mathematical computations and image calculations, optimized for high performance and low-energy consumption.
]]></description>
<dc:subject>swift library to-understand image-processing GPU programming</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b4c7aa41de93/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:swift"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1802.10038">
    <title>[1802.10038] Improving OCR Accuracy on Early Printed Books by combining Pretraining, Voting, and Active Learning</title>
    <dc:date>2020-01-26T13:51:59+00:00</dc:date>
    <link>https://arxiv.org/abs/1802.10038</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We combine three methods which significantly improve the OCR accuracy of OCR models trained on early printed books: (1) The pretraining method utilizes the information stored in already existing models trained on a variety of typesets (mixed models) instead of starting the training from scratch. (2) Performing cross fold training on a single set of ground truth data (line images and their transcriptions) with a single OCR engine (OCRopus) produces a committee whose members then vote for the best outcome by also taking the top-N alternatives and their intrinsic confidence values into account. (3) Following the principle of maximal disagreement we select additional training lines which the voters disagree most on, expecting them to offer the highest information gain for a subsequent training (active learning). Evaluations on six early printed books yielded the following results: On average the combination of pretraining and voting improved the character accuracy by 46% when training five folds starting from the same mixed model. This number rose to 53% when using different models for pretraining, underlining the importance of diverse voters. Incorporating active learning improved the obtained results by another 16% on average (evaluated on three of the six books). Overall, the proposed methods lead to an average error rate of 2.5% when training on only 60 lines. Using a substantial ground truth pool of 1,000 lines brought the error rate down even further to less than 1% on average.
]]></description>
<dc:subject>OCR digital-humanities digitization text-processing image-processing machine-learning data-cleaning the-mangle-in-practice modeling rather-interesting to-write-about to-simulate consider:stochastic-resonance</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:c2645c071b01/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digitization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:text-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-cleaning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:the-mangle-in-practice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:modeling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stochastic-resonance"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1903.12363">
    <title>[1903.12363] CUTIE: Learning to Understand Documents with Convolutional Universal Text Information Extractor</title>
    <dc:date>2020-01-10T20:46:35+00:00</dc:date>
    <link>https://arxiv.org/abs/1903.12363</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Extracting key information from documents, such as receipts or invoices, and preserving the interested texts to structured data is crucial in the document-intensive streamline processes of office automation in areas that includes but not limited to accounting, financial, and taxation areas. To avoid designing expert rules for each specific type of document, some published works attempt to tackle the problem by learning a model to explore the semantic context in text sequences based on the Named Entity Recognition (NER) method in the NLP field. In this paper, we propose to harness the effective information from both semantic meaning and spatial distribution of texts in documents. Specifically, our proposed model, Convolutional Universal Text Information Extractor (CUTIE), applies convolutional neural networks on gridded texts where texts are embedded as features with semantical connotations. We further explore the effect of employing different structures of convolutional neural network and propose a fast and portable structure. We demonstrate the effectiveness of the proposed method on a dataset with up to 4,484 labelled receipts, without any pre-training or post-processing, achieving state of the art performance that is much better than the NER based methods in terms of either speed and accuracy. Experimental results also demonstrate that the proposed CUTIE model being able to achieve good performance with a much smaller amount of training data.
]]></description>
<dc:subject>OCR document-processing text-processing image-processing rather-interesting neural-networks feature-detection semantic-understanding to-write-about to-simulate</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:68fe50fef126/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:document-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:text-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:semantic-understanding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1904.05916">
    <title>[1904.05916] Synthetic Examples Improve Generalization for Rare Classes</title>
    <dc:date>2020-01-10T12:34:20+00:00</dc:date>
    <link>https://arxiv.org/abs/1904.05916</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The ability to detect and classify rare occurrences in images has important applications - for example, counting rare and endangered species when studying biodiversity, or detecting infrequent traffic scenarios that pose a danger to self-driving cars. Few-shot learning is an open problem: current computer vision systems struggle to categorize objects they have seen only rarely during training, and collecting a sufficient number of training examples of rare events is often challenging and expensive, and sometimes outright impossible. We explore in depth an approach to this problem: complementing the few available training images with ad-hoc simulated data. 
Our testbed is animal species classification, which has a real-world long-tailed distribution. We analyze the effect of different axes of variation in simulation, such as pose, lighting, model, and simulation method, and we prescribe best practices for efficiently incorporating simulated data for real-world performance gain. Our experiments reveal that synthetic data can considerably reduce error rates for classes that are rare, that as the amount of simulated data is increased, accuracy on the target class improves, and that high variation of simulated data provides maximum performance gain.
]]></description>
<dc:subject>machine-learning training the-mangle-in-practice pedagogy well-duh toy-problems image-processing what-is-it-like-to-be-a-NN?</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:2d2d55718977/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:training"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:the-mangle-in-practice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:pedagogy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:well-duh"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:toy-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:what-is-it-like-to-be-a-NN?"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1905.12536">
    <title>[1905.12536] A Quaternion-based Certifiably Optimal Solution to the Wahba Problem with Outliers</title>
    <dc:date>2019-12-29T11:04:29+00:00</dc:date>
    <link>https://arxiv.org/abs/1905.12536</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The Wahba problem, also known as rotation search, seeks to find the best rotation to align two sets of vector observations given putative correspondences, and is a fundamental routine in many computer vision and robotics applications. This work proposes the first polynomial-time certifiably optimal approach for solving the Wahba problem when a large number of vector observations are outliers. Our first contribution is to formulate the Wahba problem using a Truncated Least Squares (TLS) cost that is insensitive to a large fraction of spurious correspondences. The second contribution is to rewrite the problem using unit quaternions and show that the TLS cost can be framed as a Quadratically-Constrained Quadratic Program (QCQP). Since the resulting optimization is still highly non-convex and hard to solve globally, our third contribution is to develop a convex Semidefinite Programming (SDP) relaxation. We show that while a naive relaxation performs poorly in general, our relaxation is tight even in the presence of large noise and outliers. We validate the proposed algorithm, named QUASAR (QUAternion-based Semidefinite relAxation for Robust alignment), in both synthetic and real datasets showing that the algorithm outperforms RANSAC, robust local optimization techniques, global outlier-removal procedures, and Branch-and-Bound methods. QUASAR is able to compute certifiably optimal solutions (i.e. the relaxation is exact) even in the case when 95% of the correspondences are outliers.
]]></description>
<dc:subject>image-processing optimization rather-interesting algorithms to-write-about consider:feature-discovery consider:looking-to-see consider:representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:d78c60f9af61/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1904.08755">
    <title>[1904.08755] 4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural Networks</title>
    <dc:date>2019-12-25T16:50:09+00:00</dc:date>
    <link>https://arxiv.org/abs/1904.08755</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In many robotics and VR/AR applications, 3D-videos are readily-available sources of input (a continuous sequence of depth images, or LIDAR scans). However, those 3D-videos are processed frame-by-frame either through 2D convnets or 3D perception algorithms. In this work, we propose 4-dimensional convolutional neural networks for spatio-temporal perception that can directly process such 3D-videos using high-dimensional convolutions. For this, we adopt sparse tensors and propose the generalized sparse convolution that encompasses all discrete convolutions. To implement the generalized sparse convolution, we create an open-source auto-differentiation library for sparse tensors that provides extensive functions for high-dimensional convolutional neural networks. We create 4D spatio-temporal convolutional neural networks using the library and validate them on various 3D semantic segmentation benchmarks and proposed 4D datasets for 3D-video perception. To overcome challenges in the 4D space, we propose the hybrid kernel, a special case of the generalized sparse convolution, and the trilateral-stationary conditional random field that enforces spatio-temporal consistency in the 7D space-time-chroma space. Experimentally, we show that convolutional neural networks with only generalized 3D sparse convolutions can outperform 2D or 2D-3D hybrid methods by a large margin. Also, we show that on 3D-videos, 4D spatio-temporal convolutional neural networks are robust to noise, outperform 3D convolutional neural networks and are faster than the 3D counterpart in some cases.
]]></description>
<dc:subject>image-processing video deep-learning representation time-series neural-networks to-write-about consider:genetic-programming consider:subsetting</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:07a50c05c722/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:video"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:time-series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:subsetting"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://openpreservation.org/blog/2019/11/07/jpeg-got-the-blues/">
    <title>JPEG Got the Blues - Open Preservation Foundation</title>
    <dc:date>2019-11-27T13:05:46+00:00</dc:date>
    <link>https://openpreservation.org/blog/2019/11/07/jpeg-got-the-blues/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[On October the 20th of 2017, the Department of Performing Arts of the National Library of France (BnF) acquired a set of 622 born-digital photographs representing architectural elements of the major Parisian theaters (palais Garnier, Comédie-Française, etc). Produced by photographers Sabine Hartl and Olaf-Daniel Meyer in 2012, these photographs were to illustrate the book Théâtres parisiens. Un patrimoine du XIXe siècle published in 2013 by editor Citadelles et Mazenod. Though all 622 photographs did not appear in the book, the collection manager decided to acquire the whole set. A contract assigned to BnF the rights to publish them online  in its digital library Gallica. The photographs were delivered in the only available format: JPEG.

]]></description>
<dc:subject>archiving digital-preservation software-development-is-not-programming bug-hunting rather-interesting file-formats image-processing to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:371a19875650/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:archiving"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-preservation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:software-development-is-not-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:bug-hunting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:file-formats"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1803.00407">
    <title>[1803.00407] Yedrouj-Net: An efficient CNN for spatial steganalysis</title>
    <dc:date>2019-11-23T14:40:10+00:00</dc:date>
    <link>https://arxiv.org/abs/1803.00407</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[For about 10 years, detecting the presence of a secret message hidden in an image was performed with an Ensemble Classifier trained with Rich features. In recent years, studies such as Xu et al. have indicated that well-designed convolutional Neural Networks (CNN) can achieve comparable performance to the two-step machine learning approaches. 
In this paper, we propose a CNN that outperforms the state-ofthe-art in terms of error probability. The proposition is in the continuity of what has been recently proposed and it is a clever fusion of important bricks used in various papers. Among the essential parts of the CNN, one can cite the use of a pre-processing filterbank and a Truncation activation function, five convolutional layers with a Batch Normalization associated with a Scale Layer, as well as the use of a sufficiently sized fully connected section. An augmented database has also been used to improve the training of the CNN. 
Our CNN was experimentally evaluated against S-UNIWARD and WOW embedding algorithms and its performances were compared with those of three other methods: an Ensemble Classifier plus a Rich Model, and two other CNN steganalyzers.
]]></description>
<dc:subject>stenography security neural-networks image-processing cryptography pattern-discovery rather-interesting consider:looking-to-see</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:ef923682ec29/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:stenography"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cryptography"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:pattern-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1803.08202">
    <title>[1803.08202] Person Following by Autonomous Robots: A Categorical Overview</title>
    <dc:date>2019-11-02T12:50:11+00:00</dc:date>
    <link>https://arxiv.org/abs/1803.08202</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[A wide range of human-robot collaborative applications in diverse domains such as manufacturing, health care, the entertainment industry, and social interactions, require an autonomous robot to follow its human companion. Different working environments and applications pose diverse challenges by adding constraints on the choice of sensors, the degree of autonomy, and dynamics of a person-following robot. Researchers have addressed these challenges in many ways and contributed to the development of a large body of literature. This paper provides a comprehensive overview of the literature by categorizing different aspects of person-following by autonomous robots. Also, the corresponding operational challenges are identified based on various design choices for ground, underwater, and aerial scenarios. In addition, state-of-the-art methods for perception, planning, control, and interaction are elaborately discussed and their applicability in varied operational scenarios are presented. Then, some of the prominent methods are qualitatively compared, corresponding practicalities are illustrated, and their feasibility is analyzed for various use-cases. Furthermore, several prospective application areas are identified, and open problems are highlighted for future research.
]]></description>
<dc:subject>robotics review rather-interesting goal-balancing planning algorithms experiment looking-to-see machine-learning adaptive-control adaptive-behavior to-write-about image-processing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b54b85280659/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:robotics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:review"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:goal-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:planning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:experiment"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:adaptive-control"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:adaptive-behavior"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.vision.ee.ethz.ch/~timofter/publications/Gu-Chapter-2019.pdf">
    <title>[PDF] A brief review of image denoising algorithms and beyond</title>
    <dc:date>2019-09-19T22:31:10+00:00</dc:date>
    <link>http://www.vision.ee.ethz.ch/~timofter/publications/Gu-Chapter-2019.pdf</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The recent advances in hardware and imaging systems made the digital cameras ubiquitous. Although the development of hardware has steadily improved the quality of images for the last several decades, image degradation is unavoidable due to the many factors affecting the image acquisition process and the subsequent post- processing. Image denoising, which aims to reconstruct a high quality image from its degraded observation, is a classical yet still very active topic in the area of low- level computer vision. It represents an important building block in real applications such as digital photography, medical image analysis, remote sensing, surveillance and digital entertainment. Also, image denoising constitutes an ideal test bed for evaluating image prior modeling methods. In this paper, we briefly review recent progresses in image denoising. We firstly present an overview of prior modeling approaches used in image denoising task. Then, we review conventional sparse representation based denoising algorithms, low-rank based denoising algorithms and recently proposed deep neural networks based approaches. At last, we discuss some emerging topics and open problems about image denoising.]]></description>
<dc:subject>image-processing numerical-methods approximation rather-interesting algorithms machine-learning to-write-about to-simulate</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:03bdf047658f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:numerical-methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://dl.acm.org/citation.cfm?id=2601224">
    <title>The Connect-The-Dots family of puzzles</title>
    <dc:date>2019-09-10T13:05:40+00:00</dc:date>
    <link>https://dl.acm.org/citation.cfm?id=2601224</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper we introduce several innovative variants on the classic Connect-The-Dots puzzle. We study the underlying geometric principles and investigate methods for the automatic generation of high-quality puzzles from line drawings.
Specifically, we introduce three new variants of the classic Connect-The-Dots puzzle. These new variants use different rules for drawing connections, and have several advantages: no need for printed numbers in the puzzle (which look ugly in the final drawing), and perhaps more challenging "game play", making the puzzles suitable for different age groups. We study the rules of all four variants in the family, and design principles describing what makes a good puzzle. We identify general principles that apply across the different variants, as well as specific implementations of those principles in the different variants. We make these mathematically precise in the form of criteria a puzzle should satisfy.
Furthermore, we investigate methods for the automatic generation of puzzles from a plane graph that describes the input drawing. We show that the problem of generating a good puzzle --one satisfying the mentioned criteria-- is computationally hard, and present several heuristic algorithms.
Using our implementation for generating puzzles, we evaluate the quality of the resulting puzzles with respect to two parameters: one for similarity to the original line drawing, and one for ambiguity; i.e. what is the visual accuracy needed to solve the puzzle.
]]></description>
<dc:subject>image-processing feature-construction algorithms rather-interesting optimization performance-measure consider:looking-to-see consider:representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:11bdf7491de7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-construction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1701.07396">
    <title>[1701.07396] LAREX - A semi-automatic open-source Tool for Layout Analysis and Region Extraction on Early Printed Books</title>
    <dc:date>2019-09-08T14:35:13+00:00</dc:date>
    <link>https://arxiv.org/abs/1701.07396</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[A semi-automatic open-source tool for layout analysis on early printed books is presented. LAREX uses a rule based connected components approach which is very fast, easily comprehensible for the user and allows an intuitive manual correction if necessary. The PageXML format is used to support integration into existing OCR workflows. Evaluations showed that LAREX provides an efficient and flexible way to segment pages of early printed books.
]]></description>
<dc:subject>OCR digitization machine-learning algorithms image-processing image-segmentation digital-humanities to-understand to-write-about to-simulate</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:28b874f8086a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digitization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1711.04226">
    <title>[1711.04226] AON: Towards Arbitrarily-Oriented Text Recognition</title>
    <dc:date>2019-08-06T09:05:23+00:00</dc:date>
    <link>https://arxiv.org/abs/1711.04226</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Recognizing text from natural images is a hot research topic in computer vision due to its various applications. Despite the enduring research of several decades on optical character recognition (OCR), recognizing texts from natural images is still a challenging task. This is because scene texts are often in irregular (e.g. curved, arbitrarily-oriented or seriously distorted) arrangements, which have not yet been well addressed in the literature. Existing methods on text recognition mainly work with regular (horizontal and frontal) texts and cannot be trivially generalized to handle irregular texts. In this paper, we develop the arbitrary orientation network (AON) to directly capture the deep features of irregular texts, which are combined into an attention-based decoder to generate character sequence. The whole network can be trained end-to-end by using only images and word-level annotations. Extensive experiments on various benchmarks, including the CUTE80, SVT-Perspective, IIIT5k, SVT and ICDAR datasets, show that the proposed AON-based method achieves the-state-of-the-art performance in irregular datasets, and is comparable to major existing methods in regular datasets.
]]></description>
<dc:subject>OCR image-processing image-segmentation machine-learning invariant-models generalization to-write-about to-simulate</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:17af25eed5ec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:invariant-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generalization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1608.02153">
    <title>[1608.02153] OCR of historical printings with an application to building diachronic corpora: A case study using the RIDGES herbal corpus</title>
    <dc:date>2019-08-03T12:49:20+00:00</dc:date>
    <link>https://arxiv.org/abs/1608.02153</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This article describes the results of a case study that applies Neural Network-based Optical Character Recognition (OCR) to scanned images of books printed between 1487 and 1870 by training the OCR engine OCRopus [@breuel2013high] on the RIDGES herbal text corpus [@OdebrechtEtAlSubmitted]. Training specific OCR models was possible because the necessary *ground truth* is available as error-corrected diplomatic transcriptions. The OCR results have been evaluated for accuracy against the ground truth of unseen test sets. Character and word accuracies (percentage of correctly recognized items) for the resulting machine-readable texts of individual documents range from 94% to more than 99% (character level) and from 76% to 97% (word level). This includes the earliest printed books, which were thought to be inaccessible by OCR methods until recently. Furthermore, OCR models trained on one part of the corpus consisting of books with different printing dates and different typesets *(mixed models)* have been tested for their predictive power on the books from the other part containing yet other fonts, mostly yielding character accuracies well above 90%. It therefore seems possible to construct generalized models trained on a range of fonts that can be applied to a wide variety of historical printings still giving good results. A moderate postcorrection effort of some pages will then enable the training of individual models with even better accuracies. Using this method, diachronic corpora including early printings can be constructed much faster and cheaper than by manual transcription. The OCR methods reported here open up the possibility of transforming our printed textual cultural heritage into electronic text by largely automatic means, which is a prerequisite for the mass conversion of scanned books.
]]></description>
<dc:subject>OCR image-processing natural-language-processing algorithms machine-learning rather-interesting commodity-software digital-humanities to-write-about consider:swarms consider:stochastic-resonance</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:105d3a25b2ab/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:commodity-software"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:swarms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stochastic-resonance"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1812.07933">
    <title>[1812.07933] Dynamic Programming Approach to Template-based OCR</title>
    <dc:date>2019-08-03T11:12:09+00:00</dc:date>
    <link>https://arxiv.org/abs/1812.07933</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper we propose a dynamic programming solution to the template-based recognition task in OCR case. We formulate a problem of optimal position search for complex objects consisting of parts forming a sequence. We limit the distance between every two adjacent elements with predefined upper and lower thresholds. We choose the sum of penalties for each part in given position as a function to be minimized. We show that such a choice of restrictions allows a faster algorithm to be used than the one for the general form of deformation penalties. We named this algorithm Dynamic Squeezeboxes Packing (DSP) and applied it to solve the two OCR problems: text fields extraction from an image of document Visual Inspection Zone (VIZ) and license plate segmentation. The quality and the performance of resulting solutions were experimentally proved to meet the requirements of the state-of-the-art industrial recognition systems.
]]></description>
<dc:subject>OCR image-segmentation image-processing algorithms optimization mathematical-programming to-write-about to-compare</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:52bfc20b7aff/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:mathematical-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-compare"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1711.03247">
    <title>[1711.03247] The nonsmooth landscape of phase retrieval</title>
    <dc:date>2019-07-23T20:30:39+00:00</dc:date>
    <link>https://arxiv.org/abs/1711.03247</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We consider a popular nonsmooth formulation of the real phase retrieval problem. We show that under standard statistical assumptions, a simple subgradient method converges linearly when initialized within a constant relative distance of an optimal solution. Seeking to understand the distribution of the stationary points of the problem, we complete the paper by proving that as the number of Gaussian measurements increases, the stationary points converge to a codimension two set, at a controlled rate. Experiments on image recovery problems illustrate the developed algorithm and theory.
]]></description>
<dc:subject>inverse-problems feature-extraction approximation rather-interesting nonlinear-programming to-write-about image-processing signal-processing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:195f98a6d64e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nonlinear-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:signal-processing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://joel.franusic.com/krazy_kat/about/">
    <title>Krazy Kat Comics</title>
    <dc:date>2019-07-13T11:37:48+00:00</dc:date>
    <link>https://joel.franusic.com/krazy_kat/about/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This page goes into detail on how I used Machine Learning to find hundreds of Krazy Kat comics that are now in the public domain.

As a result of this project, several hundred high resolution scans of Krazy Kat comics are now easily available online, including a comic that I couldn't find in any published book!

What follows is a detailed description of what I did to find these comics in online newspaper archives.

]]></description>
<dc:subject>OCR archives newspapers rather-interesting digitization to-write-about deep-learning machine-learning image-processing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:06c7bbb8a820/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:archives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:newspapers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digitization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1904.01175">
    <title>[1904.01175] DeepLight: Learning Illumination for Unconstrained Mobile Mixed Reality</title>
    <dc:date>2019-06-23T11:29:23+00:00</dc:date>
    <link>https://arxiv.org/abs/1904.01175</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We present a learning-based method to infer plausible high dynamic range (HDR), omnidirectional illumination given an unconstrained, low dynamic range (LDR) image from a mobile phone camera with a limited field of view (FOV). For training data, we collect videos of various reflective spheres placed within the camera's FOV, leaving most of the background unoccluded, leveraging that materials with diverse reflectance functions reveal different lighting cues in a single exposure. We train a deep neural network to regress from the LDR background image to HDR lighting by matching the LDR ground truth sphere images to those rendered with the predicted illumination using image-based relighting, which is differentiable. Our inference runs at interactive frame rates on a mobile device, enabling realistic rendering of virtual objects into real scenes for mobile mixed reality. Training on automatically exposed and white-balanced videos, we improve the realism of rendered objects compared to the state-of-the art methods for both indoor and outdoor scenes.
]]></description>
<dc:subject>augmented-reality deep-learning image-processing data-fusion rather-interesting generative-models generative-art algorithms to-write-about consider:performance-measures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1bb227741439/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:augmented-reality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-fusion"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-art"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1709.09683">
    <title>[1709.09683] Exact Camera Location Recovery by Least Unsquared Deviations</title>
    <dc:date>2019-04-27T11:57:40+00:00</dc:date>
    <link>https://arxiv.org/abs/1709.09683</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We establish exact recovery for the Least Unsquared Deviations (LUD) algorithm of Ozyesil and Singer. More precisely, we show that for sufficiently many cameras with given corrupted pairwise directions, where both camera locations and pairwise directions are generated by a special probabilistic model, the LUD algorithm exactly recovers the camera locations with high probability. A similar exact recovery guarantee was established for the ShapeFit algorithm by Hand, Lee and Voroninski, but with typically less corruption.
]]></description>
<dc:subject>inverse-problems image-processing rather-interesting benchmarking nudge-targets consider:feature-discovery consider:rediscovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1ea9d5a60ac9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:benchmarking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:rediscovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1709.09479">
    <title>[1709.09479] Fast Convolutional Sparse Coding in the Dual Domain</title>
    <dc:date>2019-04-24T14:56:28+00:00</dc:date>
    <link>https://arxiv.org/abs/1709.09479</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Convolutional sparse coding (CSC) is an important building block of many computer vision applications ranging from image and video compression to deep learning. We present two contributions to the state of the art in CSC. First, we significantly speed up the computation by proposing a new optimization framework that tackles the problem in the dual domain. Second, we extend the original formulation to higher dimensions in order to process a wider range of inputs, such as RGB images and videos. Our results show up to 20 times speedup compared to current state-of-the-art CSC solvers.
]]></description>
<dc:subject>machine-learning representation compressed-sensing sparse-representations optimization rather-interesting feature-extraction image-processing consider:looking-to-see horse-races performance-measure algorithms computational-complexity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:4a6eda0a2ecb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:compressed-sensing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:sparse-representations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:horse-races"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-complexity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://library.msri.org/books/Book46/files/08li.pdf">
    <title>[PDF] Image Compression: The Mathematics of JPEG 2000</title>
    <dc:date>2019-02-28T11:21:28+00:00</dc:date>
    <link>http://library.msri.org/books/Book46/files/08li.pdf</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Abstract. We briefly review the mathematics in the coding engine of JPEG 2000, a state-of-the-art image compression system. We focus in depth on the transform, entropy coding and bitstream assembler modules. Our goal is to present a general overview of the mathematics underlying a state of the art scalable image compression technology.]]></description>
<dc:subject>image-processing compression algorithms rather-interesting to-understand performance-measure feature-extraction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8dab56073455/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:compression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1711.06588">
    <title>[1711.06588] Dependent landmark drift: robust point set registration with a Gaussian mixture model and a statistical shape model</title>
    <dc:date>2019-02-23T12:03:20+00:00</dc:date>
    <link>https://arxiv.org/abs/1711.06588</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The goal of point set registration is to find point-by-point correspondences between point sets, each of which characterizes the shape of an object. Because local preservation of object geometry is assumed, prevalent algorithms in the area can often elegantly solve the problems without using geometric information specific to the objects. This means that registration performance can be further improved by using prior knowledge of object geometry. In this paper, we propose a novel point set registration method using the Gaussian mixture model with prior shape information encoded as a statistical shape model. Our transformation model is defined as a combination of the similar transformation, motion coherence, and the statistical shape model. Therefore, the proposed method works effectively if the target point set includes outliers and missing regions, or if it is rotated. The computational cost can be reduced to linear, and therefore the method is scalable to large point sets. The effectiveness of the method will be verified through comparisons with existing algorithms using datasets concerning human body shapes, hands, and faces.]]></description>
<dc:subject>statistics algorithms rather-interesting image-processing optimization to-write-about nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:89cb5bb9b116/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1809.02786">
    <title>[1809.02786] Structure-Preserving Transformation: Generating Diverse and Transferable Adversarial Examples</title>
    <dc:date>2019-02-19T10:52:27+00:00</dc:date>
    <link>https://arxiv.org/abs/1809.02786</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Adversarial examples are perturbed inputs designed to fool machine learning models. Most recent works on adversarial examples for image classification focus on directly modifying pixels with minor perturbations. A common requirement in all these works is that the malicious perturbations should be small enough (measured by an L_p norm for some p) so that they are imperceptible to humans. However, small perturbations can be unnecessarily restrictive and limit the diversity of adversarial examples generated. Further, an L_p norm based distance metric ignores important structure patterns hidden in images that are important to human perception. Consequently, even the minor perturbation introduced in recent works often makes the adversarial examples less natural to humans. More importantly, they often do not transfer well and are therefore less effective when attacking black-box models especially for those protected by a defense mechanism. In this paper, we propose a structure-preserving transformation (SPT) for generating natural and diverse adversarial examples with extremely high transferability. The key idea of our approach is to allow perceptible deviation in adversarial examples while keeping structure patterns that are central to a human classifier. Empirical results on the MNIST and the fashion-MNIST datasets show that adversarial examples generated by our approach can easily bypass strong adversarial training. Further, they transfer well to other target models with no loss or little loss of successful attack rate.
]]></description>
<dc:subject>adversarial-tricks neural-networks image-processing clustering deep-learning rather-interesting feature-intuiting to-write-about consider:genetic-programming</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:731aa2f65a42/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:adversarial-tricks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-intuiting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:genetic-programming"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1710.02574">
    <title>[1710.02574] Partially Asynchronous Distributed Unmixing of Hyperspectral Images</title>
    <dc:date>2019-02-05T10:27:35+00:00</dc:date>
    <link>https://arxiv.org/abs/1710.02574</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[So far, the problem of unmixing large or multitemporal hyperspectral datasets has been specifically addressed in the remote sensing literature only by a few dedicated strategies. Among them, some attempts have been made within a distributed estimation framework, in particular relying on the alternating direction method of multipliers (ADMM). In this paper, we propose to study the interest of a partially asynchronous distributed unmixing procedure based on a recently proposed asynchronous algorithm. Under standard assumptions, the proposed algorithm inherits its convergence properties from recent contributions in non-convex optimization, while allowing the problem of interest to be efficiently addressed. Comparisons with a distributed synchronous counterpart of the proposed unmixing procedure allow its interest to be assessed on synthetic and real data. Besides, thanks to its genericity and flexibility, the procedure investigated in this work can be implemented to address various matrix factorization problems.
]]></description>
<dc:subject>image-processing algorithms distributed-processing data-fusion (the-simplest) regression to-understand</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:c4cb22960fa3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:distributed-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-fusion"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:(the-simplest)"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.jeremyjordan.me/semantic-segmentation/">
    <title>An overview of semantic image segmentation.</title>
    <dc:date>2019-01-03T12:40:42+00:00</dc:date>
    <link>https://www.jeremyjordan.me/semantic-segmentation/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[More specifically, the goal of semantic image segmentation is to label each pixel of an image with a corresponding class of what is being represented. Because we're predicting for every pixel in the image, this task is commonly referred to as dense prediction.

]]></description>
<dc:subject>image-segmentation image-processing distributed-processing rather-interesting neural-networks to-write-about to-do nudge-targets consider:representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:754baaca51d3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:distributed-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-do"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1804.01622">
    <title>[1804.01622] Image Generation from Scene Graphs</title>
    <dc:date>2018-04-15T10:05:44+00:00</dc:date>
    <link>https://arxiv.org/abs/1804.01622</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[To truly understand the visual world our models should be able not only to recognize images but also generate them. To this end, there has been exciting recent progress on generating images from natural language descriptions. These methods give stunning results on limited domains such as descriptions of birds or flowers, but struggle to faithfully reproduce complex sentences with many objects and relationships. To overcome this limitation we propose a method for generating images from scene graphs, enabling explicitly reasoning about objects and their relationships. Our model uses graph convolution to process input graphs, computes a scene layout by predicting bounding boxes and segmentation masks for objects, and converts the layout to an image with a cascaded refinement network. The network is trained adversarially against a pair of discriminators to ensure realistic outputs. We validate our approach on Visual Genome and COCO-Stuff, where qualitative results, ablations, and user studies demonstrate our method's ability to generate complex images with multiple objects.
]]></description>
<dc:subject>deep-learning image-processing generative-art generative-models turning-the-handle-the-other-way to-write-about to-do</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3f79cf6bdc72/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-art"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:turning-the-handle-the-other-way"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-do"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-017-0399-z">
    <title>Image analysis driven single-cell analytics for systems microbiology | BMC Systems Biology | Full Text</title>
    <dc:date>2018-04-02T12:27:46+00:00</dc:date>
    <link>https://bmcsystbiol.biomedcentral.com/articles/10.1186/s12918-017-0399-z</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[BaSCA can be used to analyze accurately and efficiently cell movies both at a high resolution (single-cell level) and at a large scale (communities with many dense colonies) as needed to shed light on e.g. how bacterial community effects and epigenetic information transfer play a role on important phenomena for human health, such as biofilm formation, persisters’ emergence etc. Moreover, it enables studying the role of single-cell stochasticity without losing sight of community effects that may drive it.

]]></description>
<dc:subject>machine-learning image-processing image-segmentation cell-biology indistinguishable-from-magic experimental-biology wow algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:dce14d4031a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cell-biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:indistinguishable-from-magic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:experimental-biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:wow"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1611.05896">
    <title>[1611.05896] Answering Image Riddles using Vision and Reasoning through Probabilistic Soft Logic</title>
    <dc:date>2018-03-10T13:41:36+00:00</dc:date>
    <link>https://arxiv.org/abs/1611.05896</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this work, we explore a genre of puzzles ("image riddles") which involves a set of images and a question. Answering these puzzles require both capabilities involving visual detection (including object, activity recognition) and, knowledge-based or commonsense reasoning. We compile a dataset of over 3k riddles where each riddle consists of 4 images and a groundtruth answer. The annotations are validated using crowd-sourced evaluation. We also define an automatic evaluation metric to track future progress. Our task bears similarity with the commonly known IQ tasks such as analogy solving, sequence filling that are often used to test intelligence. 
We develop a Probabilistic Reasoning-based approach that utilizes probabilistic commonsense knowledge to answer these riddles with a reasonable accuracy. We demonstrate the results of our approach using both automatic and human evaluations. Our approach achieves some promising results for these riddles and provides a strong baseline for future attempts. We make the entire dataset and related materials publicly available to the community in ImageRiddle Website (this http URL).]]></description>
<dc:subject>machine-learning image-processing natural-language-processing deep-learning puzzles rather-interesting to-write-about nudge-targets consider:looking-to-see consider:integrating-NLP</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:c5a993f62ebd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:puzzles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:integrating-NLP"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1801.06769">
    <title>[1801.06769] Deep joint rain and haze removal from single images</title>
    <dc:date>2018-03-10T13:40:15+00:00</dc:date>
    <link>https://arxiv.org/abs/1801.06769</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Rain removal from a single image is a challenge which has been studied for a long time. In this paper, a novel convolutional neural network based on wavelet and dark channel is proposed. On one hand, we think that rain streaks correspond to high frequency component of the image. Therefore, haar wavelet transform is a good choice to separate the rain streaks and background to some extent. More specifically, the LL subband of a rain image is more inclined to express the background information, while LH, HL, HH subband tend to represent the rain streaks and the edges. On the other hand, the accumulation of rain streaks from long distance makes the rain image look like haze veil. We extract dark channel of rain image as a feature map in network. By increasing this mapping between the dark channel of input and output images, we achieve haze removal in an indirect way. All of the parameters are optimized by back-propagation. Experiments on both synthetic and real- world datasets reveal that our method outperforms other state-of- the-art methods from a qualitative and quantitative perspective.
]]></description>
<dc:subject>image-processing machine-learning deep-learning algorithms rather-interesting consider:performance-measures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:a99297007a97/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1710.00194">
    <title>[1710.00194] Where computer vision can aid physics: dynamic cloud motion forecasting from satellite images</title>
    <dc:date>2018-02-24T12:28:35+00:00</dc:date>
    <link>https://arxiv.org/abs/1710.00194</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This paper describes a new algorithm for solar energy forecasting from a sequence of Cloud Optical Depth (COD) images. The algorithm is based on the following simple observation: the dynamics of clouds represented by COD images resembles the motion (transport) of a density in a fluid flow. This suggests that, to forecast the motion of COD images, it is sufficient to forecast the flow. The latter, in turn, can be accomplished by fitting a parametric model of the fluid flow to the COD images observed in the past. Namely, the learning phase of the algorithm is composed of the following steps: (i) given a sequence of COD images, the snapshots of the optical flow are estimated from two consecutive COD images; (ii) these snapshots are then assimilated into a Navier-Stokes Equation (NSE), i.e. an initial velocity field for NSE is selected so that the corresponding NSE' solution is as close as possible to the optical flow snapshots. The prediction phase consists of utilizing a linear transport equation, which describes the propagation of COD images in the fluid flow predicted by NSE, to estimate the future motion of the COD images. The algorithm has been tested on COD images provided by two geostationary operational environmental satellites from NOAA serving the west-hemisphere.
]]></description>
<dc:subject>image-processing prediction nudge-targets consider:looking-to-see representation low-hanging-fruit</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:5014b730df49/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:low-hanging-fruit"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1710.06647">
    <title>[1710.06647] Image Restoration by Iterative Denoising and Backward Projections</title>
    <dc:date>2018-02-24T12:11:07+00:00</dc:date>
    <link>https://arxiv.org/abs/1710.06647</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Inverse problems appear in many applications such as image deblurring and inpainting. The common approach to address them is to design a specific algorithm for each problem. The Plug-and-Play (P&P) framework, which has been recently introduced, allows solving general inverse problems by leveraging the impressive capabilities of existing denoising algorithms. While this fresh strategy has found many applications, a burdensome parameter tuning is often required in order to obtain high-quality results. In this work, we propose an alternative method for solving inverse problems using denoising algorithms, which requires less parameter tuning. We provide a theoretical analysis of the method, and empirically demonstrate that it is competitive with task-specific techniques and the P&P approach for image inpainting and deblurring.
]]></description>
<dc:subject>inverse-problems image-processing superresolution algorithms performance-measure to-write-about nudge-targets consider:looking-to-see</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b1b97064eb8d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:superresolution"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://design.google/library/ux-ai/">
    <title>The UX of AI - Library - Google Design</title>
    <dc:date>2018-01-27T23:27:01+00:00</dc:date>
    <link>https://design.google/library/ux-ai/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[As was the case with the mobile revolution, and the web before that, machine learning will cause us to rethink, restructure, and reconsider what’s possible in virtually every experience we build. In the Google UX community, we’ve started an effort called “human-centered machine learning” to help focus and guide that conversation. Using this lens, we look across products to see how machine learning (ML) can stay grounded in human needs while solving for them—in ways that are uniquely possible through ML. Our team at Google works across the company to bring UXers up to speed on core ML concepts, understand how to best integrate ML into the UX utility belt, and ensure we're building ML and AI in inclusive ways.
]]></description>
<dc:subject>image-processing the-mangle-in-practice philosophy-of-engineering user-experience learning-by-doing to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:cc8d84470a9d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:the-mangle-in-practice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:philosophy-of-engineering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:user-experience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-by-doing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>