<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (Vaguery)</title>
    <link>https://pinboard.in/u:Vaguery/public/</link>
    <description>recent bookmarks from Vaguery</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://arxiv.org/abs/2410.04480"/>
	<rdf:li rdf:resource="https://www.science.org/doi/10.1126/science.adv7924"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2107.08013"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2102.02365"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2003.01943"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2010.11831"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.02179"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.02894"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1808.06423"/>
	<rdf:li rdf:resource="https://www.microsoft.com/en-us/research/publication/automating-string-processing-spreadsheets-using-input-output-examples/"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1176325360"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1703.05105"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1704.08676"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1104.5557"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1703.05210"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1308.0419"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1609.08455"/>
	<rdf:li rdf:resource="http://link.springer.com/chapter/10.1007/978-3-642-03798-6_28"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1702.02680"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1702.05037"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1701.09123"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1612.02542"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1612.02710"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1612.00423"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1612.00085"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1608.03396"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1602.00061"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1512.06238"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1501.00592"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1503.03745"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1501.04656"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1403.2150"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1412.5903"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1411.0707"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1401.2431"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1408.6618"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1404.3606"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1402.3902"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1407.3017"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1312.6607"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1205.4265"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1403.1942"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1310.3167"/>
	<rdf:li rdf:resource="http://jmlr.org/papers/v14/clark13a.html"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1311.4486"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1311.5763"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1310.6767"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1309.5823"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1211.4909"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1308.6273"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1309.4061"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0912.5193"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1303.3257"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1303.7264"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1302.4242"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1302.1543"/>
	<rdf:li rdf:resource="http://school.maths.uwa.edu.au/~gordon/sudokumin.php"/>
	<rdf:li rdf:resource="http://www.setbb.com/phpbb/viewtopic.php?mforum=sudoku&amp;p=8575"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1204.2765"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.0773"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1111.3304"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1201.5568"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1109.2618"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1109.3248"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1112.5794"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1105.2584"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1107.0674"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1107.0550"/>
	<rdf:li rdf:resource="http://languagelog.ldc.upenn.edu/nll/?p=3180"/>
	<rdf:li rdf:resource="http://falkenblog.blogspot.com/2011/05/high-frequency-trading-paper.html"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://arxiv.org/abs/2410.04480">
    <title>[2410.04480] Learning to Solve Abstract Reasoning Problems with Neurosymbolic Program Synthesis and Task Generation</title>
    <dc:date>2026-05-24T10:59:46+00:00</dc:date>
    <link>https://arxiv.org/abs/2410.04480</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The ability to think abstractly and reason by analogy is a prerequisite to rapidly adapt to new conditions, tackle newly encountered problems by decomposing them, and synthesize knowledge to solve problems comprehensively. We present TransCoder, a method for solving abstract problems based on neural program synthesis, and conduct a comprehensive analysis of decisions made by the generative module of the proposed architecture. At the core of TransCoder is a typed domain-specific language, designed to facilitate feature engineering and abstract reasoning. In training, we use the programs that failed to solve tasks to generate new tasks and gather them in a synthetic dataset. As each synthetic task created in this way has a known associated program (solution), the model is trained on them in supervised mode. Solutions are represented in a transparent programmatic form, which can be inspected and verified. We demonstrate TransCoder's performance using the Abstract Reasoning Corpus dataset, for which our framework generates tens of thousands of synthetic problems with corresponding solutions and facilitates systematic progress in learning.
]]></description>
<dc:subject>ARC machine-learning genetic-programming neural-networks program-synthesis learning-from-data artificial-intelligence rather-interesting hey-I-know-this-guy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:5ba29e7a72c7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:ARC"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:program-synthesis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:artificial-intelligence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hey-I-know-this-guy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.science.org/doi/10.1126/science.adv7924">
    <title>DefensePredictor: A machine learning model to discover prokaryotic immune systems | Science</title>
    <dc:date>2026-04-26T12:32:02+00:00</dc:date>
    <link>https://www.science.org/doi/10.1126/science.adv7924</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Bacteria have diverse immune systems that protect them from viral infection, yet the full extent of this diversity remains unknown. Two groups of researchers have now independently developed machine learning and deep learning models that leverage protein sequences and genomic context to predict antiphage defense systems at scale. DeWeirdt et al. developed a model called DefensePredictor and applied it to Escherichia coli, experimentally validating dozens of previously uncharacterized defense systems. Mordret et al. developed several different models and applied them to over 120 million proteins from bacterial genomes, identifying hundreds of thousands of candidate antiphage families, many lacking any prior annotation. Together, these studies reveal that bacterial immunity is far more extensive than previously thought and highlight how such discoveries can inspire powerful biotechnologies. —Di Jiang
]]></description>
<dc:subject>structural-biology machine-learning bioinformatics indistinguishable-from-magic learning-from-data to-understand</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:d94701a9a214/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:structural-biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:bioinformatics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:indistinguishable-from-magic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2107.08013">
    <title>[2107.08013] Machine learning of Kondo physics using variational autoencoders and symbolic regression</title>
    <dc:date>2024-07-10T13:41:50+00:00</dc:date>
    <link>https://arxiv.org/abs/2107.08013</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We employ variational autoencoders to extract physical insight from a dataset of one-particle Anderson impurity model spectral functions. Autoencoders are trained to find a low-dimensional, latent space representation that faithfully characterizes each element of the training set, as measured by a reconstruction error. Variational autoencoders, a probabilistic generalization of standard autoencoders, further condition the learned latent space to promote highly interpretable features. In our study, we find that the learned latent variables strongly correlate with well known, but nontrivial, parameters that characterize emergent behaviors in the Anderson impurity model. In particular, one latent variable correlates with particle-hole asymmetry, while another is in near one-to-one correspondence with the Kondo temperature, a dynamically generated low-energy scale in the impurity model. Using symbolic regression, we model this variable as a function of the known bare physical input parameters and "rediscover" the non-perturbative formula for the Kondo temperature. The machine learning pipeline we develop suggests a general purpose approach which opens opportunities to discover new domain knowledge in other physical systems.
]]></description>
<dc:subject>materials-science learning-from-data genetic-programming symbolic-regression machine-learning rather-interesting clustering pattern-discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:af6a41ae0e59/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:materials-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:symbolic-regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:pattern-discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2102.02365">
    <title>[2102.02365] Wind Field Reconstruction with Adaptive Random Fourier Features</title>
    <dc:date>2022-04-19T10:30:54+00:00</dc:date>
    <link>https://arxiv.org/abs/2102.02365</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We investigate the use of spatial interpolation methods for reconstructing the horizontal near-surface wind field given a sparse set of measurements. In particular, random Fourier features is compared to a set of benchmark methods including Kriging and Inverse distance weighting. Random Fourier features is a linear model β(xx)=∑Kk=1βkeiωkxx approximating the velocity field, with frequencies ωk randomly sampled and amplitudes βk trained to minimize a loss function. We include a physically motivated divergence penalty term |∇⋅β(xx)|2, as well as a penalty on the Sobolev norm. We derive a bound on the generalization error and derive a sampling density that minimizes the bound. Following (arXiv:2007.10683 [math.NA]), we devise an adaptive Metropolis-Hastings algorithm for sampling the frequencies of the optimal distribution. In our experiments, our random Fourier features model outperforms the benchmark models.
]]></description>
<dc:subject>approximation inverse-problems rather-interesting learning-from-data nonlinear-dynamics online-learning to-understand statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3fa7289011de/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nonlinear-dynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:online-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2003.01943">
    <title>[2003.01943] Machine Learning of Mechanical Properties of Steels</title>
    <dc:date>2021-10-31T11:05:48+00:00</dc:date>
    <link>https://arxiv.org/abs/2003.01943</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The mechanical properties are essential for structural materials. The analyzed 360 data on four mechanical properties of steels, viz. fatigue strength, tensile strength, fracture strength, and hardness, are selected from the NIMS database, including carbon steels, and low-alloy steels. Five machine learning algorithms were applied on the 360 data to predict the mechanical properties and random forest regression illustrates the best performance. The feature selection was conducted by random forest and symbolic regressions, leading to the four most important features of tempering temperature, and alloying elements of carbon, chromium, and molybdenum to the mechanical properties of steels. Besides, mathematic expressions were generated via symbolic regression, and the expressions explicitly predict how each of the four mechanical properties varies quantitatively with the four most important features. The present work demonstrates the great potential of symbolic regression in the discovery of novel advanced materials.
]]></description>
<dc:subject>machine-learning manufacturing materials-science rather-interesting learning-from-data to-read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:55704138e300/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:manufacturing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:materials-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2010.11831">
    <title>[2010.11831] Data-driven RANS closures for three-dimensional flows around bluff bodies</title>
    <dc:date>2021-10-22T10:37:33+00:00</dc:date>
    <link>https://arxiv.org/abs/2010.11831</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this short note we apply the recently proposed data-driven RANS closure modelling framework of Schmelzer et al. (2020) to fully three-dimensional, high Reynolds number flows: namely wall-mounted cubes and cuboids at Re=40,000, and a cylinder at Re=140,000. For each flow, a new RANS closure is generated using sparse symbolic regression based on LES or DES reference data. This new model is implemented in a CFD solver, and subsequently applied to prediction of the other flows. We see consistent improvements compared to the baseline k−ω SST model in predictions of mean-velocity in the complete flow domain.
]]></description>
<dc:subject>symbolic-regression genetic-programming learning-from-data fluid-dynamics rather-interesting nonlinear-dynamics to-write-about consider:representation consider:rediscovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:19ae5ae3a8af/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:symbolic-regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:fluid-dynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nonlinear-dynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:rediscovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.02179">
    <title>[2012.02179] Reconstructing cellular automata rules from observations at nonconsecutive times</title>
    <dc:date>2020-12-05T01:11:57+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.02179</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Recent experiments by Springer and Kenyon have shown that a deep neural network can be trained to predict the action of t steps of Conway's Game of Life automaton given millions of examples of this action on random initial states. However, training was never completely successful for t>1, and even when successful, a reconstruction of the elementary rule (t=1) from t>1 data is not within the scope of what the neural network can deliver. We describe an alternative network-like method, based on constraint projections, where this is possible. From a single data item this method perfectly reconstructs not just the automaton rule but also the states in the time steps it did not see. For a unique reconstruction, the size of the initial state need only be large enough that it and the t−1 states it evolves into contain all possible automaton input patterns. We demonstrate the method on 1D binary cellular automata that take inputs from n adjacent cells. The unknown rules in our experiments are not restricted to simple rules derived from a few linear functions on the inputs (as in Game of Life), but include all 22n possible rules on n inputs. Our results extend to n=6, for which exhaustive rule-search is not feasible. By relaxing translational symmetry in space and also time, our method is attractive as a platform for the learning of binary data, since the discreteness of the variables does not pose the same challenge it does for gradient-based methods.
]]></description>
<dc:subject>via:cshalizi cellular-automata prediction learning-from-data rather-interesting robustness inference to-write-about consider:representation to-simulate</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1a035887ab30/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:via:cshalizi"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cellular-automata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:robustness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.02894">
    <title>[1908.02894] How much data is sufficient to learn high-performing algorithms?</title>
    <dc:date>2019-09-22T12:05:29+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.02894</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Algorithms for scientific analysis typically have tunable parameters that significantly influence computational efficiency and solution quality. If a parameter setting leads to strong algorithmic performance on average over a set of typical problem instances, that parameter setting---ideally---will perform well in the future. However, if the set of typical problem instances is small, average performance will not generalize to future performance. This raises the question: how large should this set be? We answer this question for any algorithm satisfying an easy-to-describe, ubiquitous property: its performance is a piecewise-structured function of its parameters. We are the first to provide a unified sample complexity framework for algorithm parameter configuration; prior research followed case-by-case analyses. We present applications from diverse domains, including biology, political science, and economics.
]]></description>
<dc:subject>machine-learning looking-to-see learning-from-data rather-interesting to-write-about to-simulate consider:performance-measures consider:lexicase consider:symbolic-regression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:275778e41fa8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-simulate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:lexicase"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:symbolic-regression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1808.06423">
    <title>[1808.06423] What Stands-in for a Missing Tool? A Prototypical Grounded Knowledge-based Approach to Tool Substitution</title>
    <dc:date>2019-02-05T11:45:34+00:00</dc:date>
    <link>https://arxiv.org/abs/1808.06423</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[When a robot is operating in a dynamic environment, it cannot be assumed that a tool required to solve a given task will always be available. In case of a missing tool, an ideal response would be to find a substitute to complete the task. In this paper, we present a proof of concept of a grounded knowledge-based approach to tool substitution. In order to validate the suitability of a substitute, we conducted experiments involving 22 substitution scenarios. The substitutes computed by the proposed approach were validated on the basis of the experts' choices for each scenario. Our evaluation showed, in 20 out of 22 scenarios (91%), the approach identified the same substitutes as experts.
]]></description>
<dc:subject>artificial-intelligence feature-construction learning-from-data rather-interesting to-write-about consider:looking-to-see</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1333d51fe74f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:artificial-intelligence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-construction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.microsoft.com/en-us/research/publication/automating-string-processing-spreadsheets-using-input-output-examples/">
    <title>Automating String Processing in Spreadsheets using Input-Output Examples - Microsoft Research</title>
    <dc:date>2018-11-18T13:46:14+00:00</dc:date>
    <link>https://www.microsoft.com/en-us/research/publication/automating-string-processing-spreadsheets-using-input-output-examples/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We describe the design of a string programming/expression language that supports restricted forms of regular expressions, conditionals and loops. The language is expressive enough to represent a wide variety of string manipulation tasks that end-users struggle with. We describe an algorithm based on several novel concepts for synthesizing a desired program in this language from input-output examples. The synthesis algorithm is very efficient taking fraction of a second for various benchmark examples. The synthesis algorithm is interactive and has several desirable features: it can rank multiple solutions and has fast convergence, it can detect noise in the user input, and it supports an active interaction model wherein the user is prompted to provide outputs on inputs that may have multiple computational interpretations.

The algorithm has been implemented as an interactive add-in for Microsoft Excel spreadsheet system. The prototype tool has met the golden test – it has synthesized part of itself, and has been used to solve problems beyond authors’ imagination.

]]></description>
<dc:subject>learning-from-data microsoft software-synthesis rather-interesting pattern-discovery to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:201a3ea0697e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:microsoft"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:software-synthesis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:pattern-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1176325360">
    <title>Dembo , Peres : A Topological Criterion for Hypothesis Testing</title>
    <dc:date>2018-08-07T10:39:15+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1176325360</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[A simple topological criterion is given for the existence of a sequence of tests for composite hypothesis testing problems, such that almost surely only finitely many errors are made.]]></description>
<dc:subject>statistics learning-from-data algorithms existence-proof consider:looking-to-see to-write-about nudge-targets via:cshalizi</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:929d3ba7c032/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:existence-proof"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:via:cshalizi"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1703.05105">
    <title>[1703.05105] A Data Driven Approach for Compound Figure Separation Using Convolutional Neural Networks</title>
    <dc:date>2017-08-14T13:13:32+00:00</dc:date>
    <link>https://arxiv.org/abs/1703.05105</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[A key problem in automatic analysis and understanding of scientific papers is to extract semantic information from non-textual paper components like figures, diagrams, tables, etc. This research always requires a very first preprocessing step: decomposing compound multi-part figures into individual subfigures. Previous work in compound figure separation has been based on manually designed features and separation rules, which often fail for less common figure types and layouts. Moreover, no implementation for compound figure decomposition is publicly available. 
This paper proposes a data driven approach to separate compound figures using modern deep Convolutional Neural Networks (CNNs) to train the separator in an end-to-end manner. CNNs eliminate the need for manually designing features and separation rules, but require large amount of annotated training data. We overcome this challenge using transfer learning as well as automatically synthesizing training exemplars. We evaluate our technique on the ImageCLEF Medical dataset, achieving 85.9% accuracy and outperforming manually engineered previous techniques. We made the resulting approach available as an easy-to-use Python library, aiming to promote further research in scientific figure mining.]]></description>
<dc:subject>OCR neural-networks image-processing page-structure learning-from-data rather-interesting algorithms machine-learning feature-extraction nudge-targets consider:looking-to-see</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8d8df90b8562/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:page-structure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1704.08676">
    <title>[1704.08676] A quantitative assessment of the effect of different algorithmic schemes to the task of learning the structure of Bayesian Networks</title>
    <dc:date>2017-08-07T11:24:37+00:00</dc:date>
    <link>https://arxiv.org/abs/1704.08676</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[One of the most challenging tasks when adopting Bayesian Networks (BNs) is the one of learning their structure from data. This task is complicated by the huge search space of possible solutions and turned out to be a well-known NP-hard problem and, hence, approximations are required. However, to the best of our knowledge, a quantitative analysis of the performance and characteristics of the different heuristics to solve this problem has never been done before. 
For this reason, in this work, we provide a detailed study of the different state-of-the-arts methods for structural learning on simulated data considering both BNs with discrete and continuous variables, and with different rates of noise in the data. In particular, we investigate the characteristics of different widespread scores proposed for the inference and the statistical pitfalls within them.]]></description>
<dc:subject>learning-from-data machine-learning statistics algorithms rather-interesting inference nudge-targets consider:looking-to-see consider:representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:64171189960c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1104.5557">
    <title>[1104.5557] Randomized algorithms for matrices and data</title>
    <dc:date>2017-05-09T16:02:22+00:00</dc:date>
    <link>https://arxiv.org/abs/1104.5557</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Randomized algorithms for very large matrix problems have received a great deal of attention in recent years. Much of this work was motivated by problems in large-scale data analysis, and this work was performed by individuals from many different research communities. This monograph will provide a detailed overview of recent work on the theory of randomized matrix algorithms as well as the application of those ideas to the solution of practical problems in large-scale data analysis. An emphasis will be placed on a few simple core ideas that underlie not only recent theoretical advances but also the usefulness of these tools in large-scale data applications. Crucial in this context is the connection with the concept of statistical leverage. This concept has long been used in statistical regression diagnostics to identify outliers; and it has recently proved crucial in the development of improved worst-case matrix algorithms that are also amenable to high-quality numerical implementation and that are useful to domain scientists. Randomized methods solve problems such as the linear least-squares problem and the low-rank matrix approximation problem by constructing and operating on a randomized sketch of the input matrix. Depending on the specifics of the situation, when compared with the best previously-existing deterministic algorithms, the resulting randomized algorithms have worst-case running time that is asymptotically faster; their numerical implementations are faster in terms of clock-time; or they can be implemented in parallel computing environments where existing numerical algorithms fail to run at all. Numerous examples illustrating these observations will be described in detail.
]]></description>
<dc:subject>via:arthegall data-analysis matrices feature-extraction learning-from-data data-mining rather-interesting to-read to-understand</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:39273ba6e6fb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:via:arthegall"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:matrices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1703.05210">
    <title>[1703.05210] Neural Networks retrieving Boolean patterns in a sea of Gaussian ones</title>
    <dc:date>2017-04-26T11:39:39+00:00</dc:date>
    <link>https://arxiv.org/abs/1703.05210</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Restricted Boltzmann Machines are key tools in Machine Learning and are described by the energy function of bipartite spin-glasses. From a statistical mechanical perspective, they share the same Gibbs measure of Hopfield networks for associative memory. In this equivalence, weights in the former play as patterns in the latter. As Boltzmann machines usually require real weights to be trained with gradient descent like methods, while Hopfield networks typically store binary patterns to be able to retrieve, the investigation of a mixed Hebbian network, equipped with both real (e.g., Gaussian) and discrete (e.g., Boolean) patterns naturally arises. We prove that, in the challenging regime of a high storage of real patterns, where retrieval is forbidden, an extra load of Boolean patterns can still be retrieved, as long as the ratio among the overall load and the network size does not exceed a critical threshold, that turns out to be the same of the standard Amit-Gutfreund-Sompolinsky theory. Assuming replica symmetry, we study the case of a low load of Boolean patterns combining the stochastic stability and Hamilton-Jacobi interpolating techniques. The result can be extended to the high load by a non rigorous but standard replica computation argument.
]]></description>
<dc:subject>machine-learning learning-from-data unsupervised-learning rather-interesting information-theory to-write-about nudge-targets consider:robustness</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:92a40590865b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:unsupervised-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:information-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:robustness"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1308.0419">
    <title>[1308.0419] Inverse Procedural Modeling of Facade Layouts</title>
    <dc:date>2017-04-23T10:46:19+00:00</dc:date>
    <link>https://arxiv.org/abs/1308.0419</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper, we address the following research problem: How can we generate a meaningful split grammar that explains a given facade layout? To evaluate if a grammar is meaningful, we propose a cost function based on the description length and minimize this cost using an approximate dynamic programming framework. Our evaluation indicates that our framework extracts meaningful split grammars that are competitive with those of expert users, while some users and all competing automatic solutions are less successful.]]></description>
<dc:subject>grammar L-systems generative-models image-processing learning-from-data machine-learning inverse-problems nudge-targets consider:representation consider:feature-discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:7c0dd60c4f76/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:grammar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:L-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1609.08455">
    <title>[1609.08455] Stratified construction of neural network based interatomic models for multicomponent materials</title>
    <dc:date>2017-02-28T12:21:52+00:00</dc:date>
    <link>https://arxiv.org/abs/1609.08455</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Recent application of neural networks (NNs) to modeling interatomic interactions has shown the learning machines' encouragingly accurate performance for select elemental and multicomponent systems. In this study, we explore the possibility of building a library of NN-based models by introducing a hierarchical NN training. In such a stratified procedure NNs for multicomponent systems are obtained by sequential training from the bottom up: first unaries, then binaries, and so on. Advantages of constructing NN sets with shared parameters include acceleration of the training process and intact description of the constituent systems. We use an automated generation of diverse structure sets for NN training on density functional theory-level reference energies. In the test case of Cu, Pd, Ag, Cu-Pd, Cu-Ag, Pd-Ag, and Cu-Pd-Ag systems, NNs trained in the traditional and stratified fashions are found to have essentially identical accuracy for defect energies, phonon dispersions, formation energies, etc. The models' robustness is further illustrated via unconstrained evolutionary structure searches in which the NN is used for the local optimization of crystal unit cells.
]]></description>
<dc:subject>neural-networks metaheuristics materials-science rather-interesting learning-from-data empirical-models to-write-about nudge-targets consider:feature-discovery consider:representation consider:interpretability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:9144b5d5821e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:metaheuristics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:materials-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:empirical-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:interpretability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://link.springer.com/chapter/10.1007/978-3-642-03798-6_28">
    <title>Making Archetypal Analysis Practical - Springer</title>
    <dc:date>2017-02-27T01:31:46+00:00</dc:date>
    <link>http://link.springer.com/chapter/10.1007/978-3-642-03798-6_28</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Archetypal analysis represents the members of a set of multivariate data as a convex combination of extremal points of the data. It allows for dimensionality reduction and clustering and is particularly useful whenever the data are superpositions of basic entities. However, since its computation costs grow quadratically with the number of data points, the original algorithm hardly applies to modern pattern recognition or data mining settings. In this paper, we introduce ways of notably accelerating archetypal analysis. Our experiments are the first successful application of the technique to large scale data analysis problems.
]]></description>
<dc:subject>archetypal-analysis dimension-reduction learning-from-data machine-learning algorithms rather-interesting to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:d3551f0801e9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:archetypal-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:dimension-reduction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1702.02680">
    <title>[1702.02680] Manifold Based Low-rank Regularization for Image Restoration and Semi-supervised Learning</title>
    <dc:date>2017-02-26T12:33:44+00:00</dc:date>
    <link>https://arxiv.org/abs/1702.02680</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Low-rank structures play important role in recent advances of many problems in image science and data science. As a natural extension of low-rank structures for data with nonlinear structures, the concept of the low-dimensional manifold structure has been considered in many data processing problems. Inspired by this concept, we consider a manifold based low-rank regularization as a linear approximation of manifold dimension. This regularization is less restricted than the global low-rank regularization, and thus enjoy more flexibility to handle data with nonlinear structures. As applications, we demonstrate the proposed regularization to classical inverse problems in image sciences and data sciences including image inpainting, image super-resolution, X-ray computer tomography (CT) image reconstruction and semi-supervised learning. We conduct intensive numerical experiments in several image restoration problems and a semi-supervised learning problem of classifying handwritten digits using the MINST data. Our numerical tests demonstrate the effectiveness of the proposed methods and illustrate that the new regularization methods produce outstanding results by comparing with many existing methods.
]]></description>
<dc:subject>image-processing superresolution rather-interesting inference learning-from-data algorithms nudge-targets consider:feature-discovery consider:looking-to-see</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:9233f0754a0b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:superresolution"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1702.05037">
    <title>[1702.05037] Additive Models with Trend Filtering</title>
    <dc:date>2017-02-23T22:13:41+00:00</dc:date>
    <link>https://arxiv.org/abs/1702.05037</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We consider additive models built with trend filtering, i.e., additive models whose components are each regularized by the (discrete) total variation of their (k+1)st (discrete) derivative, for a chosen integer k≥0. This results in kth degree piecewise polynomial components, (e.g., k=0 gives piecewise constant components, k=1 gives piecewise linear, k=2 gives piecewise quadratic, etc.). In univariate nonparametric regression, the localized nature of the total variation regularizer used by trend filtering has been shown to produce estimates with superior local adaptivity to those from smoothing splines (and linear smoothers, more generally) (Tibshirani [2014]). Further, the structured nature of this regularizer has been shown to lead to highly efficient computational routines for trend filtering (Kim et al. [2009], Ramdas and Tibshirani [2016]). In this paper, we argue that both of these properties carry over to the additive models setting. We derive fast error rates for additive trend filtering estimates, and prove that these rates are minimax optimal when the underlying function is itself additive and has components whose derivatives are of bounded variation. We argue that backfitting provides an efficient algorithm for additive trend filtering, as it is built around the fast univariate trend filtering solvers; furthermore, we describe a modified backfitting procedure whose iterations can be run in parallel. Finally, we present experiments that examine the empirical properties of additive trend filtering models, and outline some interesting extensions.
]]></description>
<dc:subject>statistics curve-fitting models learning-from-data rather-interesting representation nudge-targets consider:primitives</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3af91983d932/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:curve-fitting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:primitives"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1701.09123">
    <title>[1701.09123] Robust Multilingual Named Entity Recognition with Shallow Semi-Supervised Features</title>
    <dc:date>2017-02-12T11:54:39+00:00</dc:date>
    <link>https://arxiv.org/abs/1701.09123</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We present a multilingual Named Entity Recognition approach based on a robust and general set of features across languages and datasets. Our system combines shallow local information with clustering semi-supervised features induced on large amounts of unlabeled text. Understanding via empirical experimentation how to effectively combine various types of clustering features allows us to seamlessly export our system to other datasets and languages. The result is a simple but highly competitive system which obtains state of the art results across five languages and twelve datasets. The results are reported on standard shared task evaluation data such as CoNLL for English, Spanish and Dutch. Furthermore, and despite the lack of linguistically motivated features, we also report best results for languages such as Basque and German. In addition, we demonstrate that our method also obtains very competitive results even when the amount of supervised data is cut by half, alleviating the dependency on manually annotated data. Finally, the results show that our emphasis on clustering features is crucial to develop robust out-of-domain models. The system and models are freely available to facilitate its use and guarantee the reproducibility of results.
]]></description>
<dc:subject>natural-language-processing classification algorithms machine-learning rather-interesting linguistics corpus learning-from-data nudge-targets consider:feature-discovery data-fusion</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:0d5dd9d19980/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:linguistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:corpus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-fusion"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1612.02542">
    <title>[1612.02542] Minimum Rates of Approximate Sufficient Statistics</title>
    <dc:date>2017-02-01T12:03:05+00:00</dc:date>
    <link>https://arxiv.org/abs/1612.02542</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Given a sufficient statistic for a parametric family of distributions, one can estimate the parameter without access to the data itself. However, the memory or code size for storing the sufficient statistic may nonetheless still be prohibitive. Indeed, for n independent data samples drawn from a k-nomial distribution with d=k−1 degrees of freedom, the length of the code scales as dlogn+O(1). In many applications though, we may not have a useful notion of sufficient statistics (e.g., when the parametric family is not an exponential family) and also may not need to reconstruct the generating distribution exactly. By adopting a Shannon-theoretic approach in which we consider allow a small error in estimating the generating distribution, we construct various notions of {\em approximate sufficient statistics} and show that the code length can be reduced to d2logn+O(1). We also note that the locality assumption that is used to describe the notion of local approximate sufficient statistics when the parametric family is not an exponential family can be dispensed of. We consider errors measured according to the relative entropy and variational distance criteria. For the code construction parts, we leverage Rissanen's minimum description length principle, which yields a non-vanishing error measured using the relative entropy. For the converse parts, we use Clarke and Barron's asymptotic expansion for the relative entropy of a parametrized distribution and the corresponding mixture distribution. The limitation of this method is that only a weak converse for the variational distance can be shown. We develop new techniques to achieve vanishing errors and we also prove strong converses for all our statements. The latter means that even if the code is allowed to have a non-vanishing error, its length must still be at least d2logn.
]]></description>
<dc:subject>statistics information-theory learning-from-data proof nudge-targets consider:performance-measures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:aa823c77a8f1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:information-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:proof"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1612.02710">
    <title>[1612.02710] Monte Carlo profile confidence intervals</title>
    <dc:date>2017-01-10T13:44:52+00:00</dc:date>
    <link>https://arxiv.org/abs/1612.02710</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Monte Carlo methods to evaluate and maximize the likelihood function enable the construction of confidence intervals and hypothesis tests, facilitating scientific investigation using models for which the likelihood function is intractable. When Monte Carlo error can be made small, by sufficiently exhaustive computation, then the standard theory and practice of likelihood-based inference applies. As data become larger, and models more complex, situations arise where no reasonable amount of computation can render Monte Carlo error negligible. We present profile likelihood methodology to provide frequentist inferences that take into account Monte Carlo uncertainty. We demonstrate our methodology in three situations, analyzing nonlinear dynamic models for spatiotemporal data, panel data, and genetic sequence data.
]]></description>
<dc:subject>statistics algorithms inference learning-from-data simulation rather-interesting nudge-targets consider:looking-to-see consider:performance-measures performance-measure</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3ec57ea8a6fc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:simulation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:performance-measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1612.00423">
    <title>[1612.00423] TorontoCity: Seeing the World with a Million Eyes</title>
    <dc:date>2017-01-07T14:47:10+00:00</dc:date>
    <link>https://arxiv.org/abs/1612.00423</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper we introduce the TorontoCity benchmark, which covers the full greater Toronto area (GTA) with 712.5 km2 of land, 8439 km of road and around 400,000 buildings. Our benchmark provides different perspectives of the world captured from airplanes, drones and cars driving around the city. Manually labeling such a large scale dataset is infeasible. Instead, we propose to utilize different sources of high-precision maps to create our ground truth. Towards this goal, we develop algorithms that allow us to align all data sources with the maps while requiring minimal human supervision. We have designed a wide variety of tasks including building height estimation (reconstruction), road centerline and curb extraction, building instance segmentation, building contour extraction (reorganization), semantic labeling and scene type classification (recognition). Our pilot study shows that most of these tasks are still difficult for modern convolutional neural networks.
]]></description>
<dc:subject>learning-from-data benchmarking rather-interesting data-integration nudge-targets consider:looking-to-see consider:the-right-tool-for-the-job to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:dc6a30789bf2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:benchmarking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-integration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:the-right-tool-for-the-job"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1612.00085">
    <title>[1612.00085] Texture Enhancement via High-Resolution Style Transfer for Single-Image Super-Resolution</title>
    <dc:date>2016-12-17T14:54:37+00:00</dc:date>
    <link>https://arxiv.org/abs/1612.00085</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Recently, various deep-neural-network (DNN)-based approaches have been proposed for single-image super-resolution (SISR). Despite their promising results on major structure regions such as edges and lines, they still suffer from limited performance on texture regions that consist of very complex and fine patterns. This is because, during the acquisition of a low-resolution (LR) image via down-sampling, these regions lose most of the high frequency information necessary to represent the texture details. In this paper, we present a novel texture enhancement framework for SISR to effectively improve the spatial resolution in the texture regions as well as edges and lines. We call our method, high-resolution (HR) style transfer algorithm. Our framework consists of three steps: (i) generate an initial HR image from an interpolated LR image via an SISR algorithm, (ii) generate an HR style image from the initial HR image via down-scaling and tiling, and (iii) combine the HR style image with the initial HR image via a customized style transfer algorithm. Here, the HR style image is obtained by down-scaling the initial HR image and then repetitively tiling it into an image of the same size as the HR image. This down-scaling and tiling process comes from the idea that texture regions are often composed of small regions that similar in appearance albeit sometimes different in scale. This process creates an HR style image that is rich in details, which can be used to restore high-frequency texture details back into the initial HR image via the style transfer algorithm. Experimental results on a number of texture datasets show that our proposed HR style transfer algorithm provides more visually pleasing results compared with competitive methods.
]]></description>
<dc:subject>superresolution image-processing learning-from-data neural-networks feature-construction nudge-targets consider:representation consider:looking-to-see</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:356414ec0b74/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:superresolution"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-construction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:looking-to-see"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1608.03396">
    <title>[1608.03396] A machine learning method for the large-scale evaluation of urban visual environment</title>
    <dc:date>2016-09-05T12:00:40+00:00</dc:date>
    <link>http://arxiv.org/abs/1608.03396</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Given the size of modern cities in the urbanising age, it is beyond the perceptual capacity of most people to develop a good knowledge about the beauty and ugliness of the city at every street corner. Correspondingly, for planners, it is also difficult to accurately answer questions like 'where are the worst-looking places in the city that regeneration should give first consideration', or 'in the fast urbanising cities, how is the city appearance changing', etc. To address this issue, we here present a computer vision method for the large-scale and automatic evaluation of the urban visual environment, by leveraging state-of-the-art machine learning techniques and the wide-coverage street view images. From the various factors that are at work, we choose two key features, the visual quality of street facade and the continuity of street wall, as the starting point of this line of analysis. In order to test the validity of this method, we further compare the machine ratings with ratings collected on site from 752 passers-by on fifty-six locations. We show that the machine learning model can produce a good estimation of people's real visual experience, and it holds much potential for various tasks in terms of urban design evaluation, culture identification, etc.
]]></description>
<dc:subject>image-processing machine-learning rather-interesting learning-from-data classification nudge-targets consider:rediscovery consider:feature-discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:7f28b990387f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:rediscovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1602.00061">
    <title>[1602.00061] Spectrum Estimation from Samples</title>
    <dc:date>2016-08-06T13:03:36+00:00</dc:date>
    <link>http://arxiv.org/abs/1602.00061</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We consider the problem of approximating the set of eigenvalues of the covariance matrix of a multivariate distribution (equivalently, the problem of approximating the "population spectrum"), given access to samples drawn from the distribution. The eigenvalues of the covariance of a distribution contain basic information about the distribution, including the presence or lack of structure in the distribution, the effective dimensionality of the distribution, and the applicability of higher-level machine learning and multivariate statistical tools. We consider this fundamental recovery problem in the regime where the number of samples is comparable, or even sublinear in the dimensionality of the distribution in question. First, we propose a theoretically optimal and computationally efficient algorithm for recovering the moments of the eigenvalues of the population covariance matrix. We then leverage this accurate moment recovery, via a Wasserstein distance argument, to show that the vector of eigenvalues can be accurately recovered. Specifically, we show that our eigenvalue reconstruction algorithm is asymptotically consistent as the dimensionality of the distribution and sample size tend towards infinity, even in the sublinear sample regime where the ratio of the sample size to the dimensionality tends to zero. In addition to our theoretical results, we show that our approach performs well in practice for a broad range of distributions and sample sizes.
]]></description>
<dc:subject>matrices learning-from-data inference nudge-targets algorithms inverse-problems consider:feature-discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:57b9af7313ac/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:matrices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:feature-discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1512.06238">
    <title>[1512.06238] The Limitations of Optimization from Samples</title>
    <dc:date>2015-12-28T13:17:13+00:00</dc:date>
    <link>http://arxiv.org/abs/1512.06238</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[As we grow highly dependent on data for making predictions, we translate these predictions into models that help us make informed decisions. But what are the guarantees we have? Can we optimize decisions on models learned from data and be guaranteed that we achieve desirable outcomes? In this paper we formalize this question through a novel model called optimization from samples (OPS). In the OPS model, we are given sampled values of a function drawn from some distribution and our objective is to optimize the function under some constraint. Our main interest is in the following question: are functions that are learnable (from samples) and approximable (given oracle access to the function) also optimizable from samples? 
We show that there are classes of submodular functions which have desirable approximation and learnability guarantees and for which no reasonable approximation for optimizing from samples is achievable. In particular, our main result shows that even for maximization of coverage functions under a cardinality constraint k, there exists a hypothesis class of functions that cannot be approximated within a factor of n−1/4+ϵ (for any constant ϵ>0) of the optimal solution, from samples drawn from the uniform distribution over all sets of size at most k. In the general case of monotone submodular functions, we show an n−1/3+ϵ lower bound and an almost matching Ω~(n−1/3)-optimization from samples algorithm. Additive and unit-demand functions can be optimized from samples to within arbitrarily good precision. Finally, we also consider a corresponding notion of additive approximation for continuous optimization from samples, and show near-optimal hardness for concave maximization and convex minimization.
]]></description>
<dc:subject>theory-and-practice-sitting-in-a-tree statistics inference learning-from-data no-free-lunch nudge-targets consider:stress-testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:e3e7d573d961/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:theory-and-practice-sitting-in-a-tree"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:no-free-lunch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stress-testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1501.00592">
    <title>[1501.00592] Robust Classification of High Dimension Low Sample Size Data</title>
    <dc:date>2015-07-05T11:27:46+00:00</dc:date>
    <link>http://arxiv.org/abs/1501.00592</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The robustification of pattern recognition techniques has been the subject of intense research in recent years. Despite the multiplicity of papers on the subject, very few articles have deeply explored the topic of robust classification in the high dimension low sample size context. In this work, we explore and compare the predictive performances of robust classification techniques with a special concentration on robust discriminant analysis and robust PCA applied to a wide variety of large p small n data sets. We also explore the performance of random forest by way of comparing and contrasting the differences single model methods and ensemble methods in this context. Our work reveals that Random Forest, although not inherently designed to be robust to outliers, substantially outperforms the existing techniques specifically designed to achieve robustness. Indeed, random forest emerges as the best predictively on both real life and simulated data.
]]></description>
<dc:subject>classification robustness horse-races wide-data learning-from-data nudge-targets consider:stress-testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:176eacdf8fca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:robustness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:horse-races"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:wide-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stress-testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1503.03745">
    <title>[1503.03745] Weiqi games as a loop-free tree</title>
    <dc:date>2015-03-15T12:20:58+00:00</dc:date>
    <link>http://arxiv.org/abs/1503.03745</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Weiqi is one of the most complex board games played by two persons. The placement strategies adopted by Weiqi players are often used to analog the philosophy of human wars. Contrary to the western chess, Weiqi games are less studied by academics partially because Weiqi is popular only in East Asia, especially in China, Japan and Korea. Here, we propose to construct a directed tree using a database of extensive Weiqi games and perform a quantitative analysis of the Weiqi tree. We find that the popularity distribution of Weiqi openings with a same number of moves is distributed according to a power law and the tail exponent increases with the number of moves. Intriguingly, the superposition of the popularity distributions of Weiqi openings with the number of moves no more than a given number also has a power-law tail in which the tail exponent increases with the number of moves, and the superposed distribution approaches to the Zipf law. These findings are the same as for chess and support the conjecture that the popularity distribution of board game openings follows the Zipf law with a universal exponent. We also find that the distribution of out-degrees has a power-law form, the distribution of branching ratios has a very complicated pattern, and the distribution of uniqueness scores defined by the path lengths from the root vertex to the leaf vertices exhibits a unimodal shape. Our work provides a promising direction for the study of the decision making process of Weiqi playing from the angle of directed branching tree.
]]></description>
<dc:subject>games board-games planning hard-problems strategy learning-from-data rather-interesting nudge-targets Go consider:mining-db-and-coevolving</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:a6016bb2084c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:games"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:board-games"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:planning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hard-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:strategy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Go"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:mining-db-and-coevolving"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1501.04656">
    <title>[1501.04656] Microscopic Advances with Large-Scale Learning: Stochastic Optimization for Cryo-EM</title>
    <dc:date>2015-02-06T12:01:47+00:00</dc:date>
    <link>http://arxiv.org/abs/1501.04656</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Determining the 3D structures of biological molecules is a key problem for both biology and medicine. Electron Cryomicroscopy (Cryo-EM) is a promising technique for structure estimation which relies heavily on computational methods to reconstruct 3D structures from 2D images. This paper introduces the challenging Cryo-EM density estimation problem as a novel application for stochastic optimization techniques. Structure discovery is formulated as MAP estimation in a probabilistic latent-variable model, resulting in an optimization problem to which an array of seven stochastic optimization methods are applied. The methods are tested on both real and synthetic data, with some methods recovering reasonable structures in less than one epoch from a random initialization. Complex quasi-Newton methods are found to converge more slowly than simple gradient-based methods, but all stochastic methods are found to converge to similar optima. This method represents a major improvement over existing methods as it is significantly faster and is able to converge from a random initialization.
]]></description>
<dc:subject>microscopy machine-learning learning-from-data algorithms nudge-targets image-processing inverse-problems inference simulation rather-interesting</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8aa670d0f4ca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:microscopy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:simulation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1403.2150">
    <title>[1403.2150] Constraint-based Causal Discovery from Multiple Interventions over Overlapping Variable Sets</title>
    <dc:date>2015-01-11T19:51:23+00:00</dc:date>
    <link>http://arxiv.org/abs/1403.2150</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Scientific practice typically involves repeatedly studying a system, each time trying to unravel a different perspective. In each study, the scientist may take measurements under different experimental conditions (interventions, manipulations, perturbations) and measure different sets of quantities (variables). The result is a collection of heterogeneous data sets coming from different data distributions. In this work, we present algorithm COmbINE, which accepts a collection of data sets over overlapping variable sets under different experimental conditions; COmbINE then outputs a summary of all causal models indicating the invariant and variant structural characteristics of all models that simultaneously fit all of the input data sets. COmbINE converts estimated dependencies and independencies in the data into path constraints on the data-generating causal model and encodes them as a SAT instance. The algorithm is sound and complete in the sample limit. To account for conflicting constraints arising from statistical errors, we introduce a general method for sorting constraints in order of confidence, computed as a function of their corresponding p-values. In our empirical evaluation, COmbINE outperforms in terms of efficiency the only pre-existing similar algorithm; the latter additionally admits feedback cycles, but does not admit conflicting constraints which hinders the applicability on real data. As a proof-of-concept, COmbINE is employed to co-analyze 4 real, mass-cytometry data sets measuring phosphorylated protein concentrations of overlapping protein sets under 3 different interventions.
]]></description>
<dc:subject>machine-learning algorithms experimental-design learning-from-data nudge-targets consistency</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:675e5c65190f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:experimental-design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consistency"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1412.5903">
    <title>[1412.5903] Deep Structured Output Learning for Unconstrained Text Recognition</title>
    <dc:date>2014-12-25T20:54:42+00:00</dc:date>
    <link>http://arxiv.org/abs/1412.5903</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We develop a representation suitable for the unconstrained recognition of words in natural images: the general case of no fixed lexicon and unknown length. 
To this end we propose a convolutional neural network (CNN) based architecture which incorporates a Conditional Random Field (CRF) graphical model, taking the whole word image as a single input. The unaries of the CRF are provided by a CNN that predicts characters at each position of the output, while higher order terms are provided by another CNN that detects the presence of N-grams. We show that this entire model (CRF, character predictor, N-gram predictor) can be jointly optimised by back-propagating the structured output loss, essentially requiring the system to perform multi-task learning, and training uses purely synthetically generated data. The resulting model is a more accurate system on standard real-world text recognition benchmarks than character prediction alone, setting a benchmark for systems that have not been trained on a particular lexicon. In addition, our model achieves state-of-the-art accuracy in lexicon-constrained scenarios, without being specifically modelled for constrained recognition. To test the generalisation of our model, we also perform experiments with random alpha-numeric strings to evaluate the method when no visual language model is applicable.
]]></description>
<dc:subject>deep-learning OCR image-analysis learning-from-data image-segmentation rather-interesting data-fusion metaheuristics nudge-targets algorithms note:datasets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1a9ad135b427/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OCR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-fusion"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:metaheuristics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:note:datasets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1411.0707">
    <title>[1411.0707] A Nonparametric Adaptive Nonlinear Statistical Filter</title>
    <dc:date>2014-12-14T14:18:01+00:00</dc:date>
    <link>http://arxiv.org/abs/1411.0707</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We use statistical learning methods to construct an adaptive state estimator for nonlinear stochastic systems. Optimal state estimation, in the form of a Kalman filter, requires knowledge of the system's process and measurement uncertainty. We propose that these uncertainties can be estimated from (conditioned on) past observed data, and without making any assumptions of the system's prior distribution. The system's prior distribution at each time step is constructed from an ensemble of least-squares estimates on sub-sampled sets of the data via jackknife sampling. As new data is acquired, the state estimates, process uncertainty, and measurement uncertainty are updated accordingly, as described in this manuscript.
]]></description>
<dc:subject>statistics learning-from-data modeling algorithms nudge-targets consider:stress-testing consider:computational-effort</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:9b257c881478/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:modeling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stress-testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:computational-effort"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1401.2431">
    <title>[1401.2431] Numerical methods for multiscale inverse problems</title>
    <dc:date>2014-12-13T12:35:12+00:00</dc:date>
    <link>http://arxiv.org/abs/1401.2431</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We will consider inverse problems for multiscale partial differential equations of the form −∇⋅(aϵ∇uϵ)+bϵuϵ=f in which solution data is used to determine coefficients in the equation. Such problems contain both the general difficulty of finding an inverse and the challenge of multiscale modeling, which is hard even for forward computations. The problem in its full generality is typically ill-posed and one approach is to reduce the dimensionality of the original problem by just considering the inverse of an effective equation without microscale ϵ. We will here include microscale features directly in the inverse problem. In order to reduce the dimension of the unknowns and avoid ill-posedness, we will assume that the microscale can be accurately parametrized by piecewise smooth coefficients. We indicate in numerical examples how the technique can be applied to medical imaging and exploration seismology.
]]></description>
<dc:subject>inverse-problems models learning-from-data machine-learning operations-research nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:550c516cdd9a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:operations-research"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1408.6618">
    <title>[1408.6618] Falsifiable implies Learnable</title>
    <dc:date>2014-12-08T21:51:16+00:00</dc:date>
    <link>http://arxiv.org/abs/1408.6618</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The paper demonstrates that falsifiability is fundamental to learning. We prove the following theorem for statistical learning and sequential prediction: If a theory is falsifiable then it is learnable -- i.e. admits a strategy that predicts optimally. An analogous result is shown for universal induction.
]]></description>
<dc:subject>philosophy-of-science learning-from-data rather-odd rather-interesting machine-learning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:a791ff531b0b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:philosophy-of-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-odd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1404.3606">
    <title>[1404.3606] PCANet: A Simple Deep Learning Baseline for Image Classification?</title>
    <dc:date>2014-11-29T20:19:25+00:00</dc:date>
    <link>http://arxiv.org/abs/1404.3606</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this work, we propose a very simple deep learning network for image classification which comprises only the very basic data processing components: cascaded principal component analysis (PCA), binary hashing, and block-wise histograms. In the proposed architecture, PCA is employed to learn multistage filter banks. It is followed by simple binary hashing and block histograms for indexing and pooling. This architecture is thus named as a PCA network (PCANet) and can be designed and learned extremely easily and efficiently. For comparison and better understanding, we also introduce and study two simple variations to the PCANet, namely the RandNet and LDANet. They share the same topology of PCANet but their cascaded filters are either selected randomly or learned from LDA. We have tested these basic networks extensively on many benchmark visual datasets for different tasks, such as LFW for face verification, MultiPIE, Extended Yale B, AR, FERET datasets for face recognition, as well as MNIST for hand-written digits recognition. Surprisingly, for all tasks, such a seemingly naive PCANet model is on par with the state of the art features, either prefixed, highly hand-crafted or carefully learned (by DNNs). Even more surprisingly, it sets new records for many classification tasks in Extended Yale B, AR, FERET datasets, and MNIST variations. Additional experiments on other public datasets also demonstrate the potential of the PCANet serving as a simple but highly competitive baseline for texture classification and object recognition.
]]></description>
<dc:subject>deep-learning neural-networks image-analysis image-processing learning-from-data machine-learning nudge-targets algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:934feb187e3a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1402.3902">
    <title>[1402.3902] Sparse Polynomial Learning and Graph Sketching</title>
    <dc:date>2014-11-14T11:38:51+00:00</dc:date>
    <link>http://arxiv.org/abs/1402.3902</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Let f:{−1,1}n be a polynomial with at most s non-zero real coefficients. We give an algorithm for exactly reconstructing f given random examples from the uniform distribution on {−1,1}n that runs in time polynomial in n and 2s and succeeds if the function satisfies the unique sign property: there is one output value which corresponds to a unique set of values of the participating parities. This sufficient condition is satisfied when every coefficient of f is perturbed by a small random noise, or satisfied with high probability when s parity functions are chosen randomly or when all the coefficients are positive. Learning sparse polynomials over the Boolean domain in time polynomial in n and 2s is considered notoriously hard in the worst-case. Our result shows that the problem is tractable for almost all sparse polynomials. Then, we show an application of this result to hypergraph sketching which is the problem of learning a sparse (both in the number of hyperedges and the size of the hyperedges) hypergraph from uniformly drawn random cuts. We also provide experimental results on a real world dataset.
]]></description>
<dc:subject>inference learning-from-data learning-theory algorithms formalization rather-odd rather-interesting nudge-targets not-quite-sure-I-see-the-points</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:f61be40c858f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:formalization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-odd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:not-quite-sure-I-see-the-points"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1407.3017">
    <title>[1407.3017] A Bayesian Approach for Parameter Estimation and Prediction using a Computationally Intensive Model</title>
    <dc:date>2014-10-09T11:39:00+00:00</dc:date>
    <link>http://arxiv.org/abs/1407.3017</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Bayesian methods have been very successful in quantifying uncertainty in physics-based problems in parameter estimation and prediction. In these cases, physical measurements y are modeled as the best fit of a physics-based model η(θ) where θ denotes the uncertain, best input setting. Hence the statistical model is of the form y=η(θ)+ϵ, where ϵ accounts for measurement, and possibly other error sources. When non-linearity is present in η(⋅), the resulting posterior distribution for the unknown parameters in the Bayesian formulation is typically complex and non-standard, requiring computationally demanding computational approaches such as Markov chain Monte Carlo (MCMC) to produce multivariate draws from the posterior. While quite generally applicable, MCMC requires thousands, or even millions of evaluations of the physics model η(⋅). This is problematic if the model takes hours or days to evaluate. To overcome this computational bottleneck, we present an approach adapted from Bayesian model calibration. This approach combines output from an ensemble of computational model runs with physical measurements, within a statistical formulation, to carry out inference. A key component of this approach is a statistical response surface, or emulator, estimated from the ensemble of model runs. We demonstrate this approach with a case study in estimating parameters for a density functional theory (DFT) model, using experimental mass/binding energy measurements from a collection of atomic nuclei. We also demonstrate how this approach produces uncertainties in predictions for recent mass measurements obtained at Argonne National Laboratory (ANL).
]]></description>
<dc:subject>modeling probability-theory inference nudge-targets learning-from-data machine-learning consider:all-kinds-of-stuff</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:7305ecc0b9fd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:modeling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:probability-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:all-kinds-of-stuff"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1312.6607">
    <title>[1312.6607] Using Latent Binary Variables for Online Reconstruction of Large Scale Systems</title>
    <dc:date>2014-04-19T08:17:56+00:00</dc:date>
    <link>http://arxiv.org/abs/1312.6607</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We propose a probabilistic graphical model realizing a minimal encoding of real variables dependencies based on possibly incomplete observation and an empirical cumulative distribution function per variable. The target application is a large scale partially observed system, like e.g. a traffic network, where a small proportion of real valued variables are observed, and the other variables have to be predicted. Our design objective is therefore to have good scalability in a real-time setting. Instead of attempting to encode the dependencies of the system directly in the description space, we propose a way to encode them in a latent space of binary variables, reflecting a rough perception of the observable (congested/non-congested for a traffic road). The method relies in part on message passing algorithms, i.e. belief propagation, but the core of the work concerns the definition of meaningful latent variables associated to the variables of interest and their pairwise dependencies. Numerical experiments demonstrate the applicability of the method in practice.
]]></description>
<dc:subject>graphical-models inference statistics learning-from-data algorithms nudge-targets consider:the-other-path consider:complexity-vs-accuracy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:905f20e1710e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graphical-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:the-other-path"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:complexity-vs-accuracy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1205.4265">
    <title>[1205.4265] Quantifying synergistic mutual information</title>
    <dc:date>2014-04-19T07:58:02+00:00</dc:date>
    <link>http://arxiv.org/abs/1205.4265</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Quantifying cooperation or synergy among random variables in predicting a single target random variable is an important problem in many complex systems. We review three prior information-theoretic measures of synergy and introduce a novel synergy measure defined as the difference between the whole and the union of its parts. We apply all four measures against a suite of binary circuits to demonstrate that our measure alone quantifies the intuitive concept of synergy across all examples. We show that for our measure of synergy that independent predictors can have positive redundant information.
]]></description>
<dc:subject>information-theory statistics prediction models synergy learning-from-data nudge</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8eeae2c1c345/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:information-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:synergy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1403.1942">
    <title>[1403.1942] Predictive Overlapping Co-Clustering</title>
    <dc:date>2014-04-04T11:33:17+00:00</dc:date>
    <link>http://arxiv.org/abs/1403.1942</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In the past few years co-clustering has emerged as an important data mining tool for two way data analysis. Co-clustering is more advantageous over traditional one dimensional clustering in many ways such as, ability to find highly correlated sub-groups of rows and columns. However, one of the overlooked benefits of co-clustering is that, it can be used to extract meaningful knowledge for various other knowledge extraction purposes. For example, building predictive models with high dimensional data and heterogeneous population is a non-trivial task. Co-clusters extracted from such data, which shows similar pattern in both the dimension, can be used for a more accurate predictive model building. Several applications such as finding patient-disease cohorts in health care analysis, finding user-genre groups in recommendation systems and community detection problems can benefit from co-clustering technique that utilizes the predictive power of the data to generate co-clusters for improved data analysis. 
In this paper, we present the novel idea of Predictive Overlapping Co-Clustering (POCC) as an optimization problem for a more effective and improved predictive analysis. Our algorithm generates optimal co-clusters by maximizing predictive power of the co-clusters subject to the constraints on the number of row and column clusters. In this paper precision, recall and f-measure have been used as evaluation measures of the resulting co-clusters. Results of our algorithm has been compared with two other well-known techniques - K-means and Spectral co-clustering, over four real data set namely, Leukemia, Internet-Ads, Ovarian cancer and MovieLens data set. The results demonstrate the effectiveness and utility of our algorithm POCC in practice.
]]></description>
<dc:subject>clustering data-analysis nudge-targets learning-from-data machine-learning algorithms statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:cdd52fd98630/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1310.3167">
    <title>[1310.3167] Well-Posedness And Accuracy Of The Ensemble Kalman Filter In Discrete And Continuous Time</title>
    <dc:date>2014-03-31T11:16:25+00:00</dc:date>
    <link>http://arxiv.org/abs/1310.3167</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The ensemble Kalman filter (EnKF) is a method for combining a dynamical model with data in a sequential fashion. Despite its widespread use, there has been little analysis of its theoretical properties. Many of the algorithmic innovations associated with the filter, which are required to make a useable algorithm in practice, are derived in an ad hoc fashion. The aim of this paper is to initiate the development of a systematic analysis of the EnKF, in particular to do so in the small ensemble size limit. The perspective is to view the method as a state estimator, and not as an algorithm which approximates the true filtering distribution. The perturbed observation version of the algorithm is studied, without and with variance inflation. Without variance inflation well-posedness of the filter is established; with variance inflation accuracy of the filter, with resepct to the true signal underlying the data, is established. The algorithm is considered in discrete time, and also for a continuous time limit arising when observations are frequent and subject to large noise. The underlying dynamical model, and assumptions about it, is sufficiently general to include the Lorenz '63 and '96 models, together with the incompressible Navier-Stokes equation on a two-dimensional torus. The analysis is limited to the case of complete observation of the signal with additive white noise. Numerical results are presented for the Navier-Stokes equation on a two-dimensional torus for both complete and partial observations of the signal with additive white noise.
]]></description>
<dc:subject>theory-and-practice-sitting-in-a-tree statistics algorithms interesting learning-from-data learning-by-doing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b33bdbc4df9b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:theory-and-practice-sitting-in-a-tree"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-by-doing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.org/papers/v14/clark13a.html">
    <title>Learning Trees from Strings: A Strong Learning Algorithm for some Context-Free Grammars</title>
    <dc:date>2014-01-28T11:26:49+00:00</dc:date>
    <link>http://jmlr.org/papers/v14/clark13a.html</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Standard models of language learning are concerned with weak learning: the learner, receiving as input only information about the strings in the language, must learn to generalise and to generate the correct, potentially infinite, set of strings generated by some target grammar. Here we define the corresponding notion of strong learning: the learner, again only receiving strings as input, must learn a grammar that generates the correct set of structures or parse trees. We formalise this using a modification of Gold's identification in the limit model, requiring convergence to a grammar that is isomorphic to the target grammar. We take as our starting point a simple learning algorithm for substitutable context-free languages, based on principles of distributional learning, and modify it so that it will converge to a canonical grammar for each language. We prove a corresponding strong learning result for a subclass of context-free grammars.
]]></description>
<dc:subject>via:cshalizi learning-from-data inference grammar nudge-targets see:Sipper see:Fogel</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:d2b7453ea92c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:via:cshalizi"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:grammar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:see:Sipper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:see:Fogel"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1311.4486">
    <title>[1311.4486] Discriminative Density-ratio Estimation</title>
    <dc:date>2014-01-14T11:59:00+00:00</dc:date>
    <link>http://arxiv.org/abs/1311.4486</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The covariate shift is a challenging problem in supervised learning that results from the discrepancy between the training and test distributions. An effective approach which recently drew a considerable attention in the research community is to reweight the training samples to minimize that discrepancy. In specific, many methods are based on developing Density-ratio (DR) estimation techniques that apply to both regression and classification problems. Although these methods work well for regression problems, their performance on classification problems is not satisfactory. This is due to a key observation that these methods focus on matching the sample marginal distributions without paying attention to preserving the separation between classes in the reweighted space. In this paper, we propose a novel method for Discriminative Density-ratio (DDR) estimation that addresses the aforementioned problem and aims at estimating the density-ratio of joint distributions in a class-wise manner. The proposed algorithm is an iterative procedure that alternates between estimating the class information for the test data and estimating new density ratio for each class. To incorporate the estimated class information of the test data, a soft matching technique is proposed. In addition, we employ an effective criterion which adopts mutual information as an indicator to stop the iterative procedure while resulting in a decision boundary that lies in a sparse region. Experiments on synthetic and benchmark datasets demonstrate the superiority of the proposed method in terms of both accuracy and robustness.
]]></description>
<dc:subject>performance-measure learning-from-data the-mangle-in-practice covariance statistics nudge-targets consider:define-your-goals consider:data-balancing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8187ff7d0f6d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:performance-measure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:the-mangle-in-practice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:covariance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:define-your-goals"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:data-balancing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1311.5763">
    <title>[1311.5763] Automated and Weighted Self-Organizing Time Maps</title>
    <dc:date>2013-11-29T13:21:34+00:00</dc:date>
    <link>http://arxiv.org/abs/1311.5763</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This paper proposes schemes for automated and weighted Self-Organizing Time Maps (SOTMs). The SOTM provides means for a visual approach to evolutionary clustering, which aims at producing a sequence of clustering solutions. This task we denote as visual dynamic clustering. The implication of an automated SOTM is not only a data-driven parametrization of the SOTM, but also the feature of adjusting the training to the characteristics of the data at each time step. The aim of the weighted SOTM is to improve learning from more trustworthy or important data with an instance-varying weight. The schemes for automated and weighted SOTMs are illustrated on two real-world datasets: (i) country-level risk indicators to measure the evolution of global imbalances, and (ii) credit applicant data to measure the evolution of firm-level credit risks.
]]></description>
<dc:subject>visualization time-series SOMs statistics learning-from-data interesting feature-extraction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:598552ce08d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:time-series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:SOMs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1310.6767">
    <title>[1310.6767] Curiosity Based Exploration for Learning Terrain Models</title>
    <dc:date>2013-11-03T12:19:37+00:00</dc:date>
    <link>http://arxiv.org/abs/1310.6767</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We present a robotic exploration technique in which the goal is to learn to a visual model and be able to distinguish between different terrains and other visual components in an unknown environment. We use ROST, a realtime online spatiotemporal topic modeling framework to model these terrains using the observations made by the robot, and then use an information theoretic path planning technique to define the exploration path. We conduct experiments with aerial view and underwater datasets with millions of observations and varying path lengths, and find that paths that are biased towards locations with high topic perplexity produce better terrain models with high discriminative power, especially with paths of length close to the diameter of the world.
]]></description>
<dc:subject>algorithms exploration robotics curiosity diversity nudge-targets learning-from-data planning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:64d8485d0abd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:exploration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:robotics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:curiosity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:diversity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:planning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1309.5823">
    <title>[1309.5823] A Kernel Classification Framework for Metric Learning</title>
    <dc:date>2013-09-24T11:29:42+00:00</dc:date>
    <link>http://arxiv.org/abs/1309.5823</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Learning a distance metric from the given training samples plays a crucial role in many machine learning tasks, and various models and optimization algorithms have been proposed in the past decade. In this paper, we generalize several state-of-the-art metric learning methods, such as large margin nearest neighbor (LMNN) and information theoretic metric learning (ITML), into a kernel classification framework. First, doublets and triplets are constructed from the training samples, and a family of degree-2 polynomial kernel functions are proposed for pairs of doublets or triplets. Then, a kernel classification framework is established, which can not only generalize many popular metric learning methods such as LMNN and ITML, but also suggest new metric learning methods, which can be efficiently implemented, interestingly, by using the standard support vector machine (SVM) solvers. Two novel metric learning methods, namely doublet-SVM and triplet-SVM, are then developed under the proposed framework. Experimental results show that doublet-SVM and triplet-SVM achieve competitive classification accuracies with state-of-the-art metric learning methods such as ITML and LMNN but with significantly less training time.
]]></description>
<dc:subject>learning-from-data machine-learning algorithms metrics nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:2d6c8b29f665/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1211.4909">
    <title>[1211.4909] Fast Marginalized Block Sparse Bayesian Learning Algorithm</title>
    <dc:date>2013-09-20T12:21:03+00:00</dc:date>
    <link>http://arxiv.org/abs/1211.4909</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The performance of sparse signal recovery from noise corrupted, underdetermined measurements can be improved if both sparsity and correlation structure of signals are exploited. One typical correlation structure is the intra-block correlation in block sparse signals. To exploit this structure, a framework, called block sparse Bayesian learning (BSBL), has been proposed recently. Algorithms derived from this framework showed superior performance but they are not very fast, which limits their applications. This work derives an efficient algorithm from this framework, using a marginalized likelihood maximization method. Compared to existing BSBL algorithms, it has close recovery performance but is much faster. Therefore, it is more suitable for large scale datasets and applications requiring real-time implementation.
]]></description>
<dc:subject>signal-processing learning-from-data sparse-stuff nudge-targets algorithms consider:symbolic-regression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:fc03a58966cb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:signal-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:sparse-stuff"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:symbolic-regression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1308.6273">
    <title>[1308.6273] New Algorithms for Learning Incoherent and Overcomplete Dictionaries</title>
    <dc:date>2013-09-17T12:29:41+00:00</dc:date>
    <link>http://arxiv.org/abs/1308.6273</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[...Here we give the first polynomial time algorithm for dictionary learning when $A$ is overcomplete. It succeeds under natural conditions on how $X$ is generated, provided that $X$ has at most $$k \leq c \min(\sqrt{n}/\mu \log n, m^{1/2 - \epsilon})$$ non-zero entries (for any $\epsilon > 0$). In other words it can handle almost as many non-zeros as the best sparse recovery algorithms could tolerate {\em even if one knew the dictionary $A$ exactly}.
]]></description>
<dc:subject>compressed-sensing sparse-learning optimization representation learning-from-data nudge-targets computational-complexity consider:stress-testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b846c1cfe73b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:compressed-sensing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:sparse-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-complexity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:stress-testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1309.4061">
    <title>[1309.4061] Learning a Loopy Model For Semantic Segmentation Exactly</title>
    <dc:date>2013-09-17T11:16:10+00:00</dc:date>
    <link>http://arxiv.org/abs/1309.4061</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Learning structured models using maximum margin techniques has become an indispensable tool for com- puter vision researchers, as many computer vision applications can be cast naturally as an image labeling problem. Pixel-based or superpixel-based conditional random fields are particularly popular examples. Typ- ically, neighborhood graphs, which contain a large number of cycles, are used. As exact inference in loopy graphs is NP-hard in general, learning these models without approximations is usually deemed infeasible. In this work we show that, despite the theoretical hardness, it is possible to learn loopy models exactly in practical applications. To this end, we analyze the use of multiple approximate inference techniques together with cutting plane training of structural SVMs. We show that our proposed method yields exact solutions with an optimality guarantees in a computer vision application, for little additional computational cost. We also propose a dynamic caching scheme to accelerate training further, yielding runtimes that are comparable with approximate methods. We hope that this insight can lead to a reconsideration of the tractability of loopy models in computer vision.
]]></description>
<dc:subject>image-segmentation learning-from-data algorithms nudge-targets interesting collective-intelligence</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:95182a628714/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:collective-intelligence"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0912.5193">
    <title>[0912.5193] Ranking relations using analogies in biological and information networks</title>
    <dc:date>2013-09-03T18:08:54+00:00</dc:date>
    <link>http://arxiv.org/abs/0912.5193</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Analogical reasoning depends fundamentally on the ability to learn and generalize about relations between objects. We develop an approach to relational learning which, given a set of pairs of objects $\mathbf{S}=\{A^{(1)}:B^{(1)},A^{(2)}:B^{(2)},\ldots,A^{(N)}:B ^{(N)}\}$, measures how well other pairs A:B fit in with the set $\mathbf{S}$. Our work addresses the following question: is the relation between objects A and B analogous to those relations found in $\mathbf{S}$? Such questions are particularly relevant in information retrieval, where an investigator might want to search for analogous pairs of objects that match the query set of interest. There are many ways in which objects can be related, making the task of measuring analogies very challenging. Our approach combines a similarity measure on function spaces with Bayesian analysis to produce a ranking. It requires data containing features of the objects of interest and a link matrix specifying which relationships exist; no further attributes of such relationships are necessary. We illustrate the potential of our method on text analysis and information networks. An application on discovering functional interactions between pairs of proteins is discussed in detail, where we show that our approach can work in practice even if a small set of protein pairs is provided.
]]></description>
<dc:subject>analogies learning-from-data machine-learning algorithms natural-language-processing artificial-intelligence nudge-targets digital-humanities</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:a9165c420b21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:analogies"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:artificial-intelligence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1303.3257">
    <title>[1303.3257] Ranking and combining multiple predictors without labeled data</title>
    <dc:date>2013-08-31T21:02:26+00:00</dc:date>
    <link>http://arxiv.org/abs/1303.3257</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In a broad range of classification and decision making problems, one is given the advice or predictions of several classifiers, of unknown reliability, over multiple questions or queries. This scenario is different from the standard supervised setting, where each classifier accuracy can be assessed using available labeled data, and raises two questions: given only the predictions of several classifiers over a large set of unlabeled test data, is it possible to a) reliably rank them; and b) construct a meta-classifier more accurate than most classifiers in the ensemble? 
Here we present a novel spectral approach to address these questions. First, assuming conditional independence between classifiers, we show that the off-diagonal entries of their covariance matrix correspond to a rank-one matrix. Moreover, the classifiers can be ranked using the leading eigenvector of this covariance matrix, as its entries are proportional to their balanced accuracies. Second, via a linear approximation to the maximum likelihood estimator, we derive the Spectral Metal Learner (SML), a novel ensemble classifier whose weights are equal to this eigenvector entries. On both simulated and real data, SML typically achieves a higher accuracy than most classifiers in the ensemble and can provide a better starting point than majority voting, for estimating the maximum likelihood solution. Furthermore, SML is robust to the presence of small malicious groups of classifiers designed to veer the ensemble prediction away from the (unknown) ground truth.
]]></description>
<dc:subject>statistics learning-from-data meta-optimization machine-learning algorithms nudge-targets interesting</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:bea7799e3f05/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:meta-optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:interesting"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1303.7264">
    <title>[1303.7264] Scalable Text and Link Analysis with Mixed-Topic Link Models</title>
    <dc:date>2013-04-01T15:50:37+00:00</dc:date>
    <link>http://arxiv.org/abs/1303.7264</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Many data sets contain rich information about objects, as well as pairwise relations between them. For instance, in networks of websites, scientific papers, and other documents, each node has content consisting of a collection of words, as well as hyperlinks or citations to other nodes. In order to perform inference on such data sets, and make predictions and recommendations, it is useful to have models that are able to capture the processes which generate the text at each node and the links between them. In this paper, we combine classic ideas in topic modeling with a variant of the mixed-membership block model recently developed in the statistical physics community. The resulting model has the advantage that its parameters, including the mixture of topics of each document and the resulting overlapping communities, can be inferred with a simple and scalable expectation-maximization algorithm. We test our model on three data sets, performing unsupervised topic classification and link prediction. For both tasks, our model outperforms several existing state-of-the-art methods, achieving higher accuracy with significantly less computation, analyzing a data set with 1.3 million words and 44 thousand links in a few minutes.]]></description>
<dc:subject>text-mining digital-humanities algorithms natural-language-processing clustering learning-from-data nudge-targets Cris-Moore</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:2aec8f71fc29/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:text-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:digital-humanities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Cris-Moore"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1302.4242">
    <title>[1302.4242] Metrics for Multivariate Dictionaries</title>
    <dc:date>2013-03-10T11:45:45+00:00</dc:date>
    <link>http://arxiv.org/abs/1302.4242</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Overcomplete representations and dictionary learning algorithms kept attracting a growing interest in the machine learning community. This paper addresses the emerging problem of comparing multivariate overcomplete representations. Despite a recurrent need to rely on a distance for learning or assessing multivariate overcomplete representations, no metrics in their underlying spaces have yet been proposed. Henceforth we propose to study overcomplete representations from the perspective of frame theory and matrix manifolds. We consider distances between multivariate dictionaries as distances between their spans which reveal to be elements of a Grassmannian manifold. We introduce Wasserstein-like set-metrics defined on Grassmannian spaces and study their properties both theoretically and numerically. Indeed a deep experimental study based on tailored synthetic datasetsand real EEG signals for Brain-Computer Interfaces (BCI) have been conducted. In particular, the introduced metrics have been embedded in clustering algorithm and applied to BCI Competition IV-2a for dataset quality assessment. Besides, a principled connection is made between three close but still disjoint research fields, namely, Grassmannian packing, dictionary learning and compressed sensing.]]></description>
<dc:subject>feature-extraction representation nudge-targets models learning-from-data data-analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:69ecea8c9037/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1302.1543">
    <title>[1302.1543] Probability Update: Conditioning vs. Cross-Entropy</title>
    <dc:date>2013-02-17T12:50:05+00:00</dc:date>
    <link>http://arxiv.org/abs/1302.1543</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Conditioning is the generally agreed-upon method for updating probability distributions when one learns that an event is certainly true. But it has been argued that we need other rules, in particular the rule of cross-entropy minimization, to handle updates that involve uncertain information. In this paper we re-examine such a case: van Fraassen's Judy Benjamin problem, which in essence asks how one might update given the value of a conditional probability. We argue that -- contrary to the suggestions in the literature -- it is possible to use simple conditionalization in this case, and thereby obtain answers that agree fully with intuition. This contrasts with proposals such as cross-entropy, which are easier to apply but can give unsatisfactory answers. Based on the lessons from this example, we speculate on some general philosophical issues concerning probability update.]]></description>
<dc:subject>probability-theory philosophy learning-from-data theory-and-practice-sitting-in-a-tree</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:30e3a83ce5c1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:probability-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:philosophy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:theory-and-practice-sitting-in-a-tree"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://school.maths.uwa.edu.au/~gordon/sudokumin.php">
    <title>Minimum Sudoku</title>
    <dc:date>2013-01-23T00:49:07+00:00</dc:date>
    <link>http://school.maths.uwa.edu.au/~gordon/sudokumin.php</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This page is concerned with the question of what is the smallest number of entries in a Sudoku puzzle that has a unique completion.

At the moment, there are examples of 17-hint uniquely completable Sudoku puzzles, but no known 16-hint examples. Hence I am collecting as many 17-hint examples as possible, in the hope that their analysis will yield some insight.

Currently I have a collection of 49151 distinct Sudoku configurations with 17 entries

]]></description>
<dc:subject>sudoku training-data learning-from-data genetic-programming nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:8c7c52a17497/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:sudoku"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:training-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.setbb.com/phpbb/viewtopic.php?mforum=sudoku&amp;p=8575">
    <title>Sudoku Programmers :: View topic - Datasets for solver benchmarking</title>
    <dc:date>2013-01-23T00:39:34+00:00</dc:date>
    <link>http://www.setbb.com/phpbb/viewtopic.php?mforum=sudoku&amp;p=8575</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[the data files (except s17) are posted at: 
http://www.research.att.com/~gsf/sudoku/data/m1n-100000.dat.gz etc. 

each contains: 
Code:

puzzle # Cclues[.minimal] [Ssymmetry.order] active-constraints 

where active constraints are constraints that produce at least one placement/elimination 
and minimal is m for minimal and M for symmetric minimal 
]]></description>
<dc:subject>sudoku puzzles datasets learning-from-data learning-by-watching nudge-targets genetic-programming</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:79ad923f85c9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:sudoku"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:puzzles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:datasets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-by-watching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1204.2765">
    <title>[1204.2765] A practical approach to language complexity: a Wikipedia case study</title>
    <dc:date>2012-09-23T12:04:42+00:00</dc:date>
    <link>http://arxiv.org/abs/1204.2765</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper we present statistical analysis of English texts from Wikipedia. We try to address the issue of language complexity empirically by comparing the simple English Wikipedia (Simple) to comparable samples of the main English Wikipedia (Main). Simple is supposed to use a more simplified language with a limited vocabulary, and editors are explicitly requested to follow this guideline, yet in practice the vocabulary richness of both samples are at the same level. Detailed analysis of longer units (n-grams of words and part of speech tags) shows that the language of Simple is less complex than that of Main primarily due to the use of shorter sentences, as opposed to drastically simplified syntax or vocabulary. Comparing the two language varieties by the Gunning readability index supports this conclusion. We also report on the topical dependence of language complexity, e.g. that the language is more advanced in conceptual articles compared to person-based (biographical) and object-based articles. Finally, we investigate the relation between conflict and language complexity by analyzing the content of the talk pages associated to controversial and peacefully developing articles, concluding that controversy has the effect of reducing language complexity.]]></description>
<dc:subject>learning-from-data natural-language-processing statistics subjective-measures corpora wikipedia</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:9eaeba0a36d9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:natural-language-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:subjective-measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:corpora"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:wikipedia"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.0773">
    <title>[1206.0773] Changepoint Detection over Graphs with the Spectral Scan Statistic</title>
    <dc:date>2012-08-29T11:13:54+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.0773</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We consider the change-point detection problem of deciding, based on noisy measurements, whether an unknown signal over a given graph is constant or is instead piecewise constant over two connected induced subgraphs of relatively low cut size. We analyze the corresponding generalized likelihood ratio (GLR) statistics and relate it to the problem of finding a sparsest cut in a graph. We develop a tractable relaxation of the GLR statistic based on the combinatorial Laplacian of the graph, which we call the spectral scan statistic, and analyze its properties. We show how its performance as a testing procedure depends directly on the spectrum of the graph, and use this result to explicitly derive its asymptotic properties on few significant graph topologies. Finally, we demonstrate both theoretically and by simulations that the spectral scan statistic can outperform naive testing procedures based on edge thresholding and $chi^2$ testing.]]></description>
<dc:subject>statistics inference graphs learning-from-data nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3d6493317387/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1111.3304">
    <title>[1111.3304] Eigenvector Synchronization, Graph Rigidity and the Molecule Problem</title>
    <dc:date>2012-03-06T11:59:41+00:00</dc:date>
    <link>http://arxiv.org/abs/1111.3304</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["The graph realization problem has received a great deal of attention in recent years, due to its importance in applications such as wireless sensor networks and structural biology.…"]]></description>
<dc:subject>algorithms statistics structure learning-from-data nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:e751fb31531d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:structure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1201.5568">
    <title>[1201.5568] Dynamic trees for streaming and massive data contexts</title>
    <dc:date>2012-01-30T21:11:20+00:00</dc:date>
    <link>http://arxiv.org/abs/1201.5568</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["Data collection at a massive scale is becoming ubiquitous in a wide variety of settings, from vast offline databases to streaming real-time information. Learning algorithms deployed in such contexts must rely on single-pass inference, where the data history is never revisited. In streaming contexts, learning must also be temporally adaptive to remain up-to-date against unforeseen changes in the data generating mechanism. Although rapidly growing, the online Bayesian inference literature remains challenged by massive data and transient, evolving data streams. Non-parametric modelling techniques can prove particularly ill-suited, as the complexity of the model is allowed to increase with the sample size. In this work, we take steps to overcome these challenges by porting standard streaming techniques, like data discarding and downweighting, into a fully Bayesian framework via the use of informative priors and active learning heuristics. We showcase our methods by augmenting a modern non-parametric modelling framework, dynamic trees, and illustrate its performance on a number of practical examples. The end product is a powerful streaming regression and classification tool, whose performance compares favourably to the state-of-the-art."]]></description>
<dc:subject>data-analysis learning-from-data algorithms drinking-from-the-firehose nudge data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3f6d28022889/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:drinking-from-the-firehose"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1109.2618">
    <title>[1109.2618] Fast and Accurate Modeling of Molecular Atomization Energies with Machine Learning</title>
    <dc:date>2012-01-05T13:34:06+00:00</dc:date>
    <link>http://arxiv.org/abs/1109.2618</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We introduce a machine learning model to predict atomization energies of a diverse set of organic molecules, based on nuclear charges and atomic positions only. The problem of solving the molecular Schr"odinger equation is mapped onto a non-linear statistical regression problem of reduced complexity. Regression models are trained on and compared to atomization energies computed with hybrid density-functional theory. Cross-validation over more than seven thousand small organic molecules yields a mean absolute error of ~10 kcal/mol. Applicability is demonstrated for the prediction of molecular atomization potential energy curves.
]]></description>
<dc:subject>machine-learning learning-from-data biochemistry computational-science nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:4990c6d3af52/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:biochemistry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1109.3248">
    <title>[1109.3248] Reconstruction of sequential data with density models</title>
    <dc:date>2012-01-03T11:33:09+00:00</dc:date>
    <link>http://arxiv.org/abs/1109.3248</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We introduce the problem of reconstructing a sequence of multidimensional real vectors where some of the data are missing. This problem contains regression and mapping inversion as particular cases where the pattern of missing data is independent of the sequence index. The problem is hard because it involves possibly multivalued mappings at each vector in the sequence, where the missing variables can take more than one value given the present variables; and the set of missing variables can vary from one vector to the next. To solve this problem, we propose an algorithm based on two redundancy assumptions: vector redundancy (the data live in a low-dimensional manifold), so that the present variables constrain the missing ones; and sequence redundancy (e.g. continuity), so that consecutive vectors constrain each other. We capture the low-dimensional nature of the data in a probabilistic way with a joint density model, here the generative topographic mapping, which results in a Gaussian mixture. Candidate reconstructions at each vector are obtained as all the modes of the conditional distribution of missing variables given present variables. The reconstructed sequence is obtained by minimising a global constraint, here the sequence length, by dynamic programming. We present experimental results for a toy problem and for inverse kinematics of a robot arm.
]]></description>
<dc:subject>inverse-problems statistics algorithms learning-from-data nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:6479f3030a70/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1112.5794">
    <title>[1112.5794] BATMAN-an R package for the automated quantification of metabolites from NMR spectra using a Bayesian Model</title>
    <dc:date>2012-01-02T15:16:22+00:00</dc:date>
    <link>http://arxiv.org/abs/1112.5794</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Motivation: NMR spectra are widely used in metabolomics to obtain metabolite profiles in complex biological mixtures. Common methods used to assign and estimate concentrations of metabolite involve either an expert manual peak fitting or extra pre-processing steps, such as peak alignment and binning. Peak fitting is very time consuming and is subject to human error. Conversely, alignment and binning can introduce artifacts and limit immediate biological interpretation of models. Results: We present the Bayesian AuTomated Metabolite Analyser for NMR spectra (BATMAN), an R package which deconvolves peaks from 1-dimensional NMR spectra, automatically assigns them to specific metabolites and obtains concentration estimates. The Bayesian model incorporates information on characteristic peak patterns of metabolites and is able to account for shifts in the position of peaks commonly seen in NMR spectra of biological samples. It applies a Markov Chain Monte Carlo (MCMC) algorithm to sample from a joint posterior distribution of the model parameters and obtains concentration estimates with reduced mean estimation error compared with conventional numerical integration methods.]]></description>
<dc:subject>learning-from-data statistics modeling biochemistry nudge-targets image-segmentation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1a3062560e0a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:modeling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:biochemistry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1105.2584">
    <title>[1105.2584] Workload Classification &amp; Software Energy Measurement for Efficient Scheduling on Private Cloud Platforms</title>
    <dc:date>2011-10-10T10:58:16+00:00</dc:date>
    <link>http://arxiv.org/abs/1105.2584</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["At present there are a number of barriers to creating an energy efficient workload scheduler for a Private Cloud based data center. Firstly, the relationship between different workloads and power consumption must be investigated. Secondly, current hardware-based solutions to providing energy usage statistics are unsuitable in warehouse scale data centers where low cost and scalability are desirable properties. In this paper we discuss the effect of different workloads on server power consumption in a Private Cloud platform. We display a noticeable difference in energy consumption when servers are given tasks that dominate various resources (CPU, Memory, Hard Disk and Network). We then use this insight to develop CloudMonitor, a software utility that is capable of >95% accurate power predictions from monitoring resource consumption of workloads, after a "training phase" in which a dynamic power model is developed."]]></description>
<dc:subject>operations-research cloud-computing system-administration learning-from-data nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:c2e098238a82/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:operations-research"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cloud-computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:system-administration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1107.0674">
    <title>[1107.0674] &quot;Memory foam&quot; approach to unsupervised learning</title>
    <dc:date>2011-08-03T15:11:16+00:00</dc:date>
    <link>http://arxiv.org/abs/1107.0674</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["We propose an alternative approach to construct an artificial learning system, which naturally learns in an unsupervised manner. Its mathematical prototype is a dynamical system, which automatically shapes its vector field in response to the input signal. The vector field converges to a gradient of a multi-dimensional probability density distribution of the input process, taken with negative sign. The most probable patterns are represented by the stable fixed points, whose basins of attraction are formed automatically. The performance of this system is illustrated with musical signals."]]></description>
<dc:subject>machine-learning classification learning-from-data algorithms nudge-targets rather-interesting</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b6fc80c9eb86/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1107.0550">
    <title>[1107.0550] 3D Terrestrial LiDAR data classification of complex natural scenes using a multi-scale dimensionality criterion: applications in geomorphology</title>
    <dc:date>2011-08-03T15:07:52+00:00</dc:date>
    <link>http://arxiv.org/abs/1107.0550</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["3D point clouds of natural environments relevant to geomorphology problems (rivers, cliffs...) often require to classify the data into elementary relevant classes. A typical example is the separation of riparian vegetation from soil in fluvial environments, the distinction between fresh surfaces and rockfall in cliff environments, or more generally the classification of surfaces according to their morphology (ripples, grain size...). Natural surfaces are very heterogeneous and their distinctive properties are seldom defined at a unique scale. We have thus defined a multi-scale measure of the point cloud dimensionality around each point. The dimensionality characterizes the local 3D organization of the point cloud and varies from being 1D (points set along a line) to really taking all 3D volume, at each scale. We present the technique and illustrate its efficiency in separating riparian vegetation from ground and classifying a mountain stream in vegetation, rock, gravel and water surface. The superiority of the multi-scale analysis in enhancing class separability and spatial resolution of the classification is also demonstrated. Large scenes can be classified on a commodity laptop in a reasonable time. The technique is robust to missing data and especially shadow zones. The classification is fast and accurate and can account for some degree of intra-class morphological variability such as different vegetation types. A probabilistic confidence in the classification result is given at each point allowing the user to remove the points for which the classification is uncertain. The process can be both fully automated but also fully customized by the user including a graphical definition of the classifiers if so desired. Although developed for fully 3D data, the method can be readily applied to 2.5D airborne LiDAR data."]]></description>
<dc:subject>image-analysis image-segmentation learning-from-data classification nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:76c6c581fd14/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://languagelog.ldc.upenn.edu/nll/?p=3180">
    <title>Language Log » Straw men and Bee Science</title>
    <dc:date>2011-06-10T14:33:56+00:00</dc:date>
    <link>http://languagelog.ldc.upenn.edu/nll/?p=3180</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["Let me start by saying that there's a way to take all this that makes it entirely correct. The key motive of science is explanation, and it's often essential to abstract away from the complexities of raw observation, and so on. I took courses from Chomsky as an undergraduate and a graduate student, and I'm grateful for what I learned from him, and for the eminently fair way that he always treated me. But increasingly, it seems to me, he has been elevating his personal distaste for the complexities of the real world into a systematic philosophy. To the extent that others accept these views, it excludes them from participation in (what I think are) the most promising and exciting current directions in the sciences of speech and language."]]></description>
<dc:subject>Noam-Chomsky theory-and-practice-sitting-in-a-tree bias science learning-from-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:de3d20e7f395/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Noam-Chomsky"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:theory-and-practice-sitting-in-a-tree"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:bias"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://falkenblog.blogspot.com/2011/05/high-frequency-trading-paper.html">
    <title>Falkenblog: High Frequency Trading Paper</title>
    <dc:date>2011-06-05T14:45:19+00:00</dc:date>
    <link>http://falkenblog.blogspot.com/2011/05/high-frequency-trading-paper.html</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["The point is that in fast moving markets, one needs something a little better than simple historical moving averages of daily closing prices. This is better, and extending the idea of 'volume time' vs. 'chronological time' is an intriguing direction. But one can also look at bid-ask spreads directly, or the VIX futures, or its etf, the VXX, and combinations, to gauge intraday volatility as well. Further, one can better estimate 'buy volume' using the transaction price relative to the then extant bid-ask spread, rather than if the price was weakly increasing, though this then involves syncing the trade information with quote information, and for academics such data are often hard to come by (further, quote information is often 10 times as large)."]]></description>
<dc:subject>learning-from-data financial-engineering trading analytics nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:f4083f480cae/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:learning-from-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:financial-engineering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:trading"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:analytics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>