<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (mraginsky)</title>
    <link>https://pinboard.in/u:mraginsky/public/</link>
    <description>recent bookmarks from mraginsky</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://arxiv.org/abs/2602.04770"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2507.10524"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1007/s11186-021-09452-2"/>
	<rdf:li rdf:resource="https://www.jmlr.org/papers/v3/bengio03a.html"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2606.13280"/>
	<rdf:li rdf:resource="https://journals.sagepub.com/doi/10.1177/1468795X18817146"/>
	<rdf:li rdf:resource="https://journals.aps.org/prl/abstract/10.1103/g1cz-wk1l"/>
	<rdf:li rdf:resource="https://www.pnas.org/doi/full/10.1073/pnas.2502353122"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2603.11784"/>
	<rdf:li rdf:resource="https://www.sciencedirect.com/science/article/pii/S0951832006000330"/>
	<rdf:li rdf:resource="https://www.pnas.org/doi/10.1073/pnas.2509612123"/>
	<rdf:li rdf:resource="https://www.cell.com/neuron/fulltext/S0896-6273(25)00716-0?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0896627325007160%3Fshowall%3Dtrue"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/elements/small-probabilities-and-high-stakes/13D562255077904A204C8659BD0AB34F?utm_id=97758_v0_s00_e231_tv4_tp1_a1dennhawg2fhs&amp;fbclid=IwY2xjawR5aspleHRuA2FlbQIxMABicmlkETFCNDcyd0ZBT0g0aWMwN01ic3J0YwZhcHBfaWQQMjIyMDM5MTc4ODIwMDg5MgABHsqqQ3oKimlTk8OOI5JVSdwaeJ14tae_kosyVOjfV8lMxZevM9qLTga0e12V_aem_7OG_-ZBz2GaUiibkJ0j-bA"/>
	<rdf:li rdf:resource="https://www.pnas.org/doi/10.1073/pnas.2108492118"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2510.15464"/>
	<rdf:li rdf:resource="https://seantrott.substack.com/p/the-problem-of-induction-heads-pt"/>
	<rdf:li rdf:resource="https://www.pnas.org/doi/10.1073/pnas.2514107123"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2410.01576"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2605.01172"/>
	<rdf:li rdf:resource="https://www.sciencedirect.com/science/article/pii/000510987790022X"/>
	<rdf:li rdf:resource="https://royalsocietypublishing.org/rspa/article/482/2336/20250413/481461/On-computing-quantum-waves-exactly-from-classical"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2602.07488"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2604.21691"/>
	<rdf:li rdf:resource="https://projecteuclid.org/journals/statistical-science/volume-29/issue-3/Instrumental-Variables-An-Econometricians-Perspective/10.1214/14-STS480.full"/>
	<rdf:li rdf:resource="https://proceedings.mlr.press/r2/jaakkola99a.html"/>
	<rdf:li rdf:resource="https://proceedings.neurips.cc/paper_files/paper/2024/hash/b78adc1c1558bf344809275854d40fd6-Abstract-Conference.html"/>
	<rdf:li rdf:resource="https://openreview.net/forum?id=FGTDe6EA0B&amp;referrer=%5Bthe%20profile%20of%20Jon%20Kleinberg%5D(%2Fprofile%3Fid%3D~Jon_Kleinberg3)"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/journals/philosophy-of-science/article/standards-for-modest-bayesian-credences/6FE3D870363912FE2EE5596E734D4B0B"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2306.00802"/>
	<rdf:li rdf:resource="https://www.sciencedirect.com/science/article/pii/S0167691123001950?via%3Dihub"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/journals/economics-and-philosophy/article/abstraction-as-flexibility-the-veil-of-evaluative-uncertainty/FA12D9D5BB3783F8EACF396DFE4B93EB"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/journals/glasgow-mathematical-journal/article/linear-pde-with-constant-coefficients/E406F1D5A27DC4B4414B8A3DF3A225F6"/>
	<rdf:li rdf:resource="https://www.cell.com/trends/cognitive-sciences/fulltext/S1364-6613(26)00052-5"/>
	<rdf:li rdf:resource="https://www.fabricatedknowledge.com/p/mythos-and-engels-pause?fbclid=IwY2xjawRPrTNleHRuA2FlbQIxMQBicmlkETE4eDlndjllUld2cjlBTkFzc3J0YwZhcHBfaWQQMjIyMDM5MTc4ODIwMDg5MgABHsMwf89zs3VKJoXCiZg-1RshLfpVCcwbEgNOrrL2erKG5FX6sPEz6qE5axUQ_aem_RVUflDX1Xm89FsiXcKYlDQ"/>
	<rdf:li rdf:resource="https://thinkingmachines.ai/blog/defeating-nondeterminism-in-llm-inference/"/>
	<rdf:li rdf:resource="https://archive.is/DKKXJ"/>
	<rdf:li rdf:resource="https://www.thephilosopher1923.org/post/artificial-bodies-and-the-promise-of-abstraction"/>
	<rdf:li rdf:resource="https://hbr.org/2026/02/ai-doesnt-reduce-work-it-intensifies-it"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/journals/economics-and-philosophy/article/market-as-a-creative-process/4A62A60230535D4A8553A7C26A184F56"/>
	<rdf:li rdf:resource="https://newlinesinstitute.org/tech-econ-sov-sec/making-machine-learning-safe-for-the-world/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2604.06107"/>
	<rdf:li rdf:resource="https://jmlr.org/papers/v16/santhanam15a.html"/>
	<rdf:li rdf:resource="https://direct.mit.edu/neco/article/35/3/277/112379/Feelings-Are-the-Source-of-Consciousness"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.01489"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.01490"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2602.23268"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2602.24083"/>
	<rdf:li rdf:resource="https://economics.mit.edu/sites/default/files/2026-02/AI%2C%20Human%20Cognition%20and%20Knowledge%20Collapse%2002-20-26.pdf"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1023/B:JOSS.0000033245.43421.14"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2602.22271"/>
	<rdf:li rdf:resource="https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(11)60563-1/fulltext?wptouch_preview_theme=enabled"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1007/s11084-016-9494-1"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2512.24999"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2512.24945"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1007/s00332-002-0506-0"/>
	<rdf:li rdf:resource="https://www.bostonreview.net/articles/how-to-lie-with-political-statistics/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2510.15511"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2508.11990"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1007/s10450-006-9683-8"/>
	<rdf:li rdf:resource="https://www.sciencedirect.com/science/article/pii/0095895677900570"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2509.19601"/>
	<rdf:li rdf:resource="https://proceedings.mlr.press/v195/bosch23a.html"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2209.08832"/>
	<rdf:li rdf:resource="https://www.pnas.org/doi/full/10.1073/pnas.2502599122"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2301.10414"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1007/BF00212098"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2508.05776"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2010.11929"/>
	<rdf:li rdf:resource="https://aeon.co/essays/the-sovereign-individual-and-the-paradox-of-the-digital-age"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2302.10488"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://arxiv.org/abs/2602.04770">
    <title>[2602.04770] Generative Modeling via Drifting</title>
    <dc:date>2026-06-24T17:26:20+00:00</dc:date>
    <link>https://arxiv.org/abs/2602.04770</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Generative modeling can be formulated as learning a mapping f such that its pushforward distribution matches the data distribution. The pushforward behavior can be carried out iteratively at inference time, for example in diffusion and flow-based models. In this paper, we propose a new paradigm called Drifting Models, which evolve the pushforward distribution during training and naturally admit one-step inference. We introduce a drifting field that governs the sample movement and achieves equilibrium when the distributions match. This leads to a training objective that allows the neural network optimizer to evolve the distribution. In experiments, our one-step generator achieves state-of-the-art results on ImageNet at 256 x 256 resolution, with an FID of 1.54 in latent space and 1.61 in pixel space. We hope that our work opens up new opportunities for high-quality one-step generation. ]]></description>
<dc:subject>papers to-read generative-models diffusions neural-networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:23902c99e580/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:diffusions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2507.10524">
    <title>[2507.10524] Mixture-of-Recursions: Learning Dynamic Recursive Depths for Adaptive Token-Level Computation</title>
    <dc:date>2026-06-24T17:25:58+00:00</dc:date>
    <link>https://arxiv.org/abs/2507.10524</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Scaling language models unlocks impressive capabilities, but the accompanying computational and memory demands make both training and deployment expensive. Existing efficiency efforts typically target either parameter sharing or adaptive computation, leaving open the question of how to attain both simultaneously. We introduce Mixture-of-Recursions (MoR), a unified framework that combines the two axes of efficiency inside a single Recursive Transformer. MoR reuses a shared stack of layers across recursion steps to achieve parameter efficiency, while lightweight routers enable adaptive token-level thinking by dynamically assigning different recursion depths to individual tokens. This allows MoR to focus quadratic attention computation only among tokens still active at a given recursion depth, further improving memory access efficiency by selectively caching only their key-value pairs. Beyond these core mechanisms, we also propose a KV sharing variant that reuses KV pairs from the first recursion, specifically designed to further decrease memory footprint. Across model scales ranging from 135M to 1.7B parameters, MoR forms a new Pareto frontier: at equal training FLOPs and smaller model sizes, it significantly lowers validation perplexity and improves few-shot accuracy, while delivering higher throughput compared with vanilla and existing recursive baselines. These gains demonstrate that MoR is an effective path towards large-model quality without incurring large-model cost. ]]></description>
<dc:subject>papers to-read machine-learning neural-networks large-language-models transformers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:766a4f286462/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1007/s11186-021-09452-2">
    <title>For a probabilistic sociology: A history of concept formation with Pierre Bourdieu | Theory and Society | Springer Nature Link</title>
    <dc:date>2026-06-21T00:40:23+00:00</dc:date>
    <link>https://link.springer.com/article/10.1007/s11186-021-09452-2</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[This article uses a history of concept formation focused on Pierre Bourdieu’s probabilism to provide the groundwork for a probabilistic sociology. We argue that not only was Bourdieu a probabilist, but that reframing probability along heterodox lines holds empirical promise when it is also linked to new concept formation, as evident in the case of Bourdieu. For the anglophone sociological field, probability is of primary significance for method and epistemic commitment. Sociological theory continues to react to the integral role of probability used for the purposes of sociological knowledge but finds very little in the way of concept formation that does not adopt the same commitments as the methodologists. The history we outline retrieves a different approach, one which finds Bourdieu aligned with objective probability borrowed from the sociology of Max Weber. This version of probabilism locates probability directly in the world and makes it a source of concept formation without the intervention of the methodologists. This article follows Bourdieu as he recognizes objective probability in the work of Weber (around 1973) and then engages in novel concept formation on these grounds. Ranging between spaces of objective probability (fields), spaces of randomness (games of chance), and spaces of determinism (apparatus), Bourdieu’s mature probabilism reveals the conceptual and meta-methodological differences that come with making probability objective. Probabilistic expectations derive from the world itself, rather than existing as part of explanation or method. Specifically, this history of concept formation reveals a looping relation between objective probability (chances) and learned probability (expectations) that, as Bourdieu himself appreciated, holds wide-ranging implications for best knowledge practices and empirical sociological research.

-- Since the authors throw the term "probabilism" around so much, I was expecting to see a reference to De Finetti (http://users.df.uba.ar/alejo/materias/InferenciaBayesiana/lecturas/De_Finetti_--_Probabilism.pdf) ...]]></description>
<dc:subject>papers to-read sociology via:rvenkat</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:c10a7d697e14/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:sociology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:via:rvenkat"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.jmlr.org/papers/v3/bengio03a.html">
    <title>jmlr.org/papers/v3/bengio03a.html</title>
    <dc:date>2026-06-17T20:39:23+00:00</dc:date>
    <link>https://www.jmlr.org/papers/v3/bengio03a.html</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[A goal of statistical language modeling is to learn the joint probability function of sequences of words in a language. This is intrinsically difficult because of the curse of dimensionality: a word sequence on which the model will be tested is likely to be different from all the word sequences seen during training. Traditional but very successful approaches based on n-grams obtain generalization by concatenating very short overlapping sequences seen in the training set. We propose to fight the curse of dimensionality by learning a distributed representation for words which allows each training sentence to inform the model about an exponential number of semantically neighboring sentences. The model learns simultaneously (1) a distributed representation for each word along with (2) the probability function for word sequences, expressed in terms of these representations. Generalization is obtained because a sequence of words that has never been seen before gets high probability if it is made of words that are similar (in the sense of having a nearby representation) to words forming an already seen sentence. Training such large models (with millions of parameters) within a reasonable time is itself a significant challenge. We report on experiments using neural networks for the probability function, showing on two text corpora that the proposed approach significantly improves on state-of-the-art n-gram models, and that the proposed approach allows to take advantage of longer contexts. ]]></description>
<dc:subject>papers to-read language machine-learning neural-networks large-language-models</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:4c52bf105324/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2606.13280">
    <title>[2606.13280] Generalization Bounds for Transformer-Based Next-Token Prediction in a Language Model</title>
    <dc:date>2026-06-16T13:55:17+00:00</dc:date>
    <link>https://arxiv.org/abs/2606.13280</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[A refined statistical understanding of LLM pre-training requires the analysis of the transformer architecture for data distributions that encapsulate key characteristics of text data. To address this, we propose a text data distribution based on an extension of the log-bilinear language model from the natural language processing literature. For this data generating process, we derive generalization bounds for deep transformer architectures, highlighting the dependence on the network architecture, the vocabulary size, the number of documents and the document length. ]]></description>
<dc:subject>papers to-read machine-learning generalization-bounds transformers large-language-models</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:a5c4d1b8ae13/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:generalization-bounds"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.sagepub.com/doi/10.1177/1468795X18817146">
    <title>Reassessing the “rules of the game”: Max Weber and Peter Winch on rule-following - Akos Sivado, 2020</title>
    <dc:date>2026-06-09T15:50:47+00:00</dc:date>
    <link>https://journals.sagepub.com/doi/10.1177/1468795X18817146</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Peter Winch’s critical remarks concerning Max Weber’s interpretive sociology are centered around the notions of “rule” and “rule-following.” While Winch gave credit to Weber for much of his theoretical insight, he nevertheless found his account unsatisfactory for two reasons: its neglect of rules and rule-following in social life, and its apparent reliance on causal explanations. This article attempts to show how Winch might have been less than charitable on both of these accounts: that once one pays close attention to Weber’s concept of a “rule,” and to his ideas concerning “adequate causation,” the two frameworks for interpretive sociology could turn out to be much more similar than it is usually assumed.]]></description>
<dc:subject>papers to-read philosophy sociology rules</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:2157d12afade/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:sociology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:rules"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.aps.org/prl/abstract/10.1103/g1cz-wk1l">
    <title>Random Tree Model of Meaningful Memory | Phys. Rev. Lett.</title>
    <dc:date>2026-06-08T17:18:34+00:00</dc:date>
    <link>https://journals.aps.org/prl/abstract/10.1103/g1cz-wk1l</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Traditional studies of memory for meaningful narratives focus on specific stories and their semantic structures but do not address common quantitative features of recall across different narratives. We introduce a statistical ensemble of random trees to represent narratives as hierarchies of key points, where each node is a compressed representation of its descendant leaves, which are the original narrative segments. Recall from this hierarchical representation is constrained by working memory capacity. Our analytical solution aligns with observations from large-scale narrative recall experiments. Specifically, our model explains that (1) average recall length increases sublinearly with narrative length and (2) individuals summarize increasingly longer narrative segments in each recall sentence. Additionally, the theory predicts that for sufficiently long narratives, a universal, scale-invariant limit emerges, where the fraction of a narrative summarized by a single recall sentence follows a distribution independent of narrative length.]]></description>
<dc:subject>papers to-read memory random-structures language statistical-physics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:5718d5c7a886/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:memory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:random-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistical-physics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/doi/full/10.1073/pnas.2502353122">
    <title>Information rate of meaningful communication | PNAS</title>
    <dc:date>2026-06-08T17:17:22+00:00</dc:date>
    <link>https://www.pnas.org/doi/full/10.1073/pnas.2502353122</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In Shannon’s seminal paper, the entropy of printed English, treated as a stationary stochastic process, was estimated to be roughly 1 bit per character. However, considered as a means of communication, language differs considerably from its printed form: i) the units of information are not characters or even words but clauses, i.e., shortest meaningful parts of speech; and ii) what is transmitted is principally the meaning of what is being said or written, while the precise phrasing that was used to communicate the meaning is typically ignored. In this study, we show that one can leverage recently developed large language models to quantify information communicated in meaningful narratives in terms of bits of meaning per clause.]]></description>
<dc:subject>papers to-read information-theory language large-language-models communication semantic-communication</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:c67a307c255b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:information-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:communication"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:semantic-communication"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2603.11784">
    <title>[2603.11784] Language Generation with Replay: A Learning-Theoretic View of Model Collapse</title>
    <dc:date>2026-06-08T13:12:15+00:00</dc:date>
    <link>https://arxiv.org/abs/2603.11784</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[As scaling laws push the training of frontier large language models (LLMs) toward ever-growing data requirements, training pipelines are approaching a regime where much of the publicly available online text may be consumed. At the same time, widespread LLM usage increases the volume of machine-generated content on the web; together, these trends raise the likelihood of generated text re-entering future training corpora, increasing the associated risk of performance degradation often called model collapse. In practice, model developers address this concern through data cleaning, watermarking, synthetic-data policies, or, in some cases, blissful ignorance. However, the problem of model collapse in generative models has not been examined from a learning-theoretic perspective: we study it through the theoretical lens of the language generation in the limit framework, introducing a replay adversary that augments the example stream with the generator's own past outputs. Our main contribution is a fine-grained learning-theoretic characterization of when replay fundamentally limits generation: while replay is benign for the strongest notion of uniform generation, it provably creates separations for the weaker notions of non-uniform generation and generation in the limit. Interestingly, our positive results mirror heuristics widely used in practice, such as data cleaning, watermarking, and output filtering, while our separations show when these ideas can fail. ]]></description>
<dc:subject>papers to-read large-language-models model-collapse learning-theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:00b4a96208c9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:model-collapse"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:learning-theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sciencedirect.com/science/article/pii/S0951832006000330">
    <title>What are emergent properties and how do they affect the engineering of complex systems? - ScienceDirect</title>
    <dc:date>2026-06-07T21:39:25+00:00</dc:date>
    <link>https://www.sciencedirect.com/science/article/pii/S0951832006000330</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>papers to-read complex-systems engineering</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:777c41184f9a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:complex-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:engineering"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/doi/10.1073/pnas.2509612123">
    <title>Resolving Feynman’s restaurant problem reveals optimal solutions and human strategies | PNAS</title>
    <dc:date>2026-06-07T20:54:23+00:00</dc:date>
    <link>https://www.pnas.org/doi/10.1073/pnas.2509612123</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In the 1970s, physicist Richard Feynman turned lunch with a friend into a math problem—how to optimize dish selection over multiple meals—but his handwritten notes remained a mystery for decades. Here we present the fully deciphered problem and solution, prove its optimality, generalize it to related problems, and compare the results to human behavior. The optimal policy specifies decreasing thresholds for switching from exploring new dishes to exploiting the best, with thresholds varying based on the distribution of the quality of dishes. We connect these results to the existing psychological literature on optimal stopping problems, which has explored close variants on Feynman’s problem, and use our generalization of the solution to explore how the underlying distribution of the quality of the options influences people’s choices. A preregistered experiment with 2,520 participants shows that people adopt thresholds that decrease linearly with the proportion of trials remaining, consistent with the observation of linear thresholds in other optimal stopping problems. However, we show that people tend to explore more than predicted by linear thresholds, and that different distributions of quality result in thresholds with the same slope but different intercepts. These results indicate that people adapt linear thresholds used in optimal stopping tasks in a way that is sensitive to the underlying distribution—a simple strategy that we show is nearly as effective as Feynman’s solution.]]></description>
<dc:subject>papers to-read optimal-stopping decision-making Feynman</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:ed1a3861ba75/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimal-stopping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decision-making"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:Feynman"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cell.com/neuron/fulltext/S0896-6273(25)00716-0?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0896627325007160%3Fshowall%3Dtrue">
    <title>It’s not the thought that counts: Allostasis at the core of brain function: Neuron</title>
    <dc:date>2026-06-02T23:33:52+00:00</dc:date>
    <link>https://www.cell.com/neuron/fulltext/S0896-6273(25)00716-0?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0896627325007160%3Fshowall%3Dtrue</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[n psychology and neuroscience, scientific questions are often framed in terms of mental activity (e.g., cognition, emotion, and perception); however, the brain is an organ with a particular function that only it can fulfill. Converging evidence suggests that this function is allostasis: the predictive regulation of competing demands from internal bodily systems. We review evidence for a distributed allostatic system that organizes whole-brain signaling, scaffolds psychological phenomena, and places bodily regulation at the core of brain structure. We also demonstrate, with an example from Alzheimer’s disease, how an “allostasis-first” perspective might transform hypothesis generation in the context of neurological health and disease. In sum, the common conception that the brain is primarily for thinking, or other cognitive processes, is potentially misleading, and neuroscience may benefit from a theoretical structure that centers on basic questions of how the brain coordinates and efficiently regulates the body.]]></description>
<dc:subject>papers to-read neuroscience control-theory dynamical-systems cognition</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:0eecd28934b3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neuroscience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:cognition"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/elements/small-probabilities-and-high-stakes/13D562255077904A204C8659BD0AB34F?utm_id=97758_v0_s00_e231_tv4_tp1_a1dennhawg2fhs&amp;fbclid=IwY2xjawR5aspleHRuA2FlbQIxMABicmlkETFCNDcyd0ZBT0g0aWMwN01ic3J0YwZhcHBfaWQQMjIyMDM5MTc4ODIwMDg5MgABHsqqQ3oKimlTk8OOI5JVSdwaeJ14tae_kosyVOjfV8lMxZevM9qLTga0e12V_aem_7OG_-ZBz2GaUiibkJ0j-bA">
    <title>Small Probabilities and High Stakes</title>
    <dc:date>2026-05-19T16:16:30+00:00</dc:date>
    <link>https://www.cambridge.org/core/elements/small-probabilities-and-high-stakes/13D562255077904A204C8659BD0AB34F?utm_id=97758_v0_s00_e231_tv4_tp1_a1dennhawg2fhs&amp;fbclid=IwY2xjawR5aspleHRuA2FlbQIxMABicmlkETFCNDcyd0ZBT0g0aWMwN01ic3J0YwZhcHBfaWQQMjIyMDM5MTc4ODIwMDg5MgABHsqqQ3oKimlTk8OOI5JVSdwaeJ14tae_kosyVOjfV8lMxZevM9qLTga0e12V_aem_7OG_-ZBz2GaUiibkJ0j-bA</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[What price should you be willing to pay for a tiny probability of an astronomically large gain, or to avoid a tiny probability of an astronomically large loss? Should you be willing to pay any finite price, if the potential gains or losses are large enough? Fanaticism says you should, while anti-fanaticism says you should not. Focusing on morally motivated decision-making, this Element explores arguments for and against both positions, ultimately defending the intermediate view that rationality permits a range of dispositions toward extreme risks, while ruling out the most comprehensive forms of both fanaticism and anti-fanaticism. The final section considers practical implications, arguing that under real-world circumstances any view satisfying a minimal principle of rationality must very often rank options by expected value, and thus sometimes give great weight to intuitively small probabilities, but that we nonetheless retain rational flexibility in sufficiently extreme cases.]]></description>
<dc:subject>to-read books-noted philosophy decision-theory probabilism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:fe7b313a9824/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:books-noted"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decision-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:probabilism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/doi/10.1073/pnas.2108492118">
    <title>The overlap gap property: A topological barrier to optimizing over random structures | PNAS</title>
    <dc:date>2026-05-13T17:37:50+00:00</dc:date>
    <link>https://www.pnas.org/doi/10.1073/pnas.2108492118</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The problem of optimizing over random structures emerges in many areas of science and engineering, ranging from statistical physics to machine learning and artificial intelligence. For many such structures, finding optimal solutions by means of fast algorithms is not known and often is believed not to be possible. At the same time, the formal hardness of these problems in the form of the complexity-theoretic NP-hardness is lacking. A new approach for algorithmic intractability in random structures is described in this article, which is based on the topological disconnectivity property of the set of pairwise distances of near-optimal solutions, called the Overlap Gap Property. The article demonstrates how this property 1) emerges in most models known to exhibit an apparent algorithmic hardness; 2) is consistent with the hardness/tractability phase transition for many models analyzed to the day; and, importantly, 3) allows to mathematically rigorously rule out a large class of algorithms as potential contenders, specifically the algorithms that exhibit the input stability (insensitivity).]]></description>
<dc:subject>papers to-read computational-complexity random-structures statistical-physics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:483a65f6c008/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computational-complexity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:random-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistical-physics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2510.15464">
    <title>[2510.15464] Learning to Answer from Correct Demonstrations</title>
    <dc:date>2026-05-11T19:34:28+00:00</dc:date>
    <link>https://arxiv.org/abs/2510.15464</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We study the problem of learning to generate an answer (or completion) to a question (or prompt), where there could be multiple correct answers, any one of which is acceptable at test time. Learning is based on demonstrations of some correct answer to each training question, as in Supervised Fine Tuning (SFT). We formalize the problem as imitation learning (i.e., apprenticeship learning) in contextual bandits, with offline demonstrations from some expert (optimal, or very good) policy, without explicitly observed rewards. In contrast to prior work, which assumes the demonstrator belongs to a bounded-complexity policy class, we propose relying only on the underlying reward model (i.e., specifying which answers are correct) being in a bounded-complexity class, which we argue is a strictly weaker assumption. We show that likelihood-maximization methods can fail in this setting, and instead present an approach that learns to answer nearly as well as the demonstrator, with sample complexity logarithmic in the cardinality of the reward class. Our method is similar to Syed and Schapire 2007, when adapted to a contextual bandit (i.e., single step) setup, but is a simple one-pass online approach that enjoys an "optimistic rate" (i.e., $1/\varepsilon$ when the demonstrator is optimal, versus $1/\varepsilon^2$ in Syed and Schapire), and works even with arbitrarily adaptive demonstrations. ]]></description>
<dc:subject>papers to-read heard-the-talk large-language-models machine-learning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:f5836b2d0d6b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:heard-the-talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://seantrott.substack.com/p/the-problem-of-induction-heads-pt">
    <title>The problem of induction (heads), pt. I - by Sean Trott</title>
    <dc:date>2026-05-10T00:52:13+00:00</dc:date>
    <link>https://seantrott.substack.com/p/the-problem-of-induction-heads-pt</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>blogs to-read large-language-models machine-learning neural-networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:7330918ac7dd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:blogs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/doi/10.1073/pnas.2514107123">
    <title>Neural correlates of perceptual decision-making in the primary somatosensory cortex | PNAS</title>
    <dc:date>2026-05-10T00:51:01+00:00</dc:date>
    <link>https://www.pnas.org/doi/10.1073/pnas.2514107123</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The brain is thought to produce decisions by gradual accumulation of sensory evidence through a hierarchically organized feedforward cascade of neuronal activities that transforms early stimulus representations in the primary somatosensory cortex (S1) to a perceptual decision processed in premotor areas. Recently, this prevailing view has been challenged by observation of choice-correlated neural activity as early in the hierarchy as S1. Here, to reconcile these seemingly controversial observations, we employ ethological whisker-guided navigation of mice in a tactile virtual reality paradigm combined with dense electrophysiological recordings in whisker-related wS1. Leaving only a pair of C2 whiskers for mice to navigate with, we effectively designed an information bottleneck for sensory input to decision-making. We show that neural activity during sensory evidence accumulation exhibits dramatic collapse of the high-dimensional spiking activity to just a single latent variable followed by a slower and almost synchronous ramping up across the whole cortical column. We show that this variable is consistent with models of gradual accumulation of noisy sensory evidence to a decision bound. These observations indicate that S1 may directly participate in a categorical coding of all-or-none decision variable via cortico-cortical feedback loops through which sensory information reverberates to be transformed into perception and action.]]></description>
<dc:subject>papers to-read neuroscience decision-making perception</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:5060bdd7f851/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neuroscience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decision-making"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:perception"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2410.01576">
    <title>[2410.01576] On incompressible flows in discrete networks and Shnirelman's inequality</title>
    <dc:date>2026-05-10T00:50:27+00:00</dc:date>
    <link>https://arxiv.org/abs/2410.01576</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Let $f$ and $g$ be two volume-preserving diffeomorphisms on the cube $Q=[0,1]^{\nu}$, $\nu \geq 3$. We show that there is a divergence-free vector field $v \in L^1((0,1);L^p(Q))$ such that $v$ connects $f$ and $g$ through the corresponding flow and $\Vert v \Vert_{L^1_t L^p_x} \leq C_{p,\nu} \Vert f- g \Vert_{L^p_x}$. In particular we show Shnirelman's inequality, cf. [Shnirelman, Generalized fluid flows, their approximation and applications (1994)], for the optimal Hölder exponent $\alpha =1$, thus proving that the metric on the group of volume-preserving diffeomorphisms of $Q$ is equivalent to the $L^2$-distance. To achieve this, we discretise our problem, use some results on flows in discrete networks and then construct a flow in non-discrete space-time out of the discrete solution. ]]></description>
<dc:subject>papers to-read dynamical-systems PDEs</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:0d9ac4075df5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:PDEs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2605.01172">
    <title>[2605.01172] A Theory of Generalization in Deep Learning</title>
    <dc:date>2026-05-10T00:44:31+00:00</dc:date>
    <link>https://arxiv.org/abs/2605.01172</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We present a non-asymptotic theory of generalization in deep learning where the empirical neural tangent kernel partitions the output space. In directions corresponding to signal, error dissipates rapidly; in the vast orthogonal dimensions corresponding to noise, the kernel's near-zero eigenvalues trap residual error in a test-invisible reservoir. Within the signal channel, minibatch SGD ensures that coherent population signal accumulates via fast linear drift, while idiosyncratic memorization is suppressed into a slow, diffusive random walk. We prove generalization survives even when the kernel evolves \mathcal{O}(1) in operator norm, the full feature-learning regime. This theory naturally explains disparate phenomena in deep learning theory, such as benign overfitting, double descent, implicit bias, and grokking. Lastly, we derive an exact population-risk objective from a single training run with no validation data, for any architecture, loss, or optimizer, and prove that it measures precisely the noise in the signal channel. This objective reduces in practice to an SNR preconditioner on top of Adam, adding one state vector at no extra cost; it accelerates grokking by 5 \times, suppresses memorization in PINNs and implicit neural representations, and improves DPO fine-tuning under noisy preferences while staying 3 \times closer to the reference policy. ]]></description>
<dc:subject>papers to-read learning-theory machine-learning deep-learning dynamical-systems optimization neural-networks generalization-bounds</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:5ed6efa9c148/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:learning-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:generalization-bounds"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sciencedirect.com/science/article/pii/000510987790022X">
    <title>The future of control - ScienceDirect</title>
    <dc:date>2026-05-05T18:12:23+00:00</dc:date>
    <link>https://www.sciencedirect.com/science/article/pii/000510987790022X</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The development of control is briefly reviewed. It is suggested that ‘modern’ control has two aspects: a mathematical investigation of basic properties of dynamical systems, and the development of algorithmic methods of synthesis. Reasons are given for believing that the first of these will have more enduring value than the second. Algorithmic methods which try to eliminate the skill of the designer are contrasted with alternative methods which accept his skill and make it more productive. It is finally suggested that the impact of computers upon industry may give the opportunity for a similar development of production methods which accept and enhance the skill of manual workers.]]></description>
<dc:subject>to-read control-theory engineering complex-systems computation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:5c08fa3aa2c7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:engineering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:complex-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://royalsocietypublishing.org/rspa/article/482/2336/20250413/481461/On-computing-quantum-waves-exactly-from-classical">
    <title>On computing quantum waves exactly from classical action | Proceedings A | The Royal Society</title>
    <dc:date>2026-05-01T15:55:32+00:00</dc:date>
    <link>https://royalsocietypublishing.org/rspa/article/482/2336/20250413/481461/On-computing-quantum-waves-exactly-from-classical</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We show that the Schrödinger equation can be solved exactly based only on classical least action. Fundamental postulates of quantum mechanics can in turn be derived directly from this construction. The results extend to the relativistic Klein-Gordon, Pauli, and Dirac equations, and suggest a smooth transition between physics across scales. Most quantum mechanics problems have classical versions which involve multiple least action solutions. The associated classical multipaths stem either from the initial position or momentum distribution, or from branch points, generated, e.g. by a multiply connected manifold (double slit experiment), by spatial inequality constraints (particle in a box), or by a singularity (Coulomb potential). We show that the exact Schrödinger wave function can be constructed by combining this classical multi-valued action with the classical density ⁠, computed analytically from along each extremal action path. The construction is general and does not involve any semi-classical approximation. Quantum wave collapse at measurement can be derived from the classical density change. Entanglement corresponds to a sum of classical particle actions mapping to a tensor product of spinors. The results also provide a simpler computational alternative to Feynman path integrals, as they use only a minimal subset of classical paths.

-- Need to read carefully, but I am disposed toward skepticism. For one, no citation to Ed Nelson's stochastic mechanics or to Guerra and Morato's paper on optimal control and quantum mechanics: https://journals.aps.org/prd/abstract/10.1103/PhysRevD.27.1774.]]></description>
<dc:subject>papers to-read physics dynamical-systems quantum-mechanics color-me-skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:10c52d2df623/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:quantum-mechanics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:color-me-skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2602.07488">
    <title>[2602.07488] Deriving Neural Scaling Laws from the statistics of natural language</title>
    <dc:date>2026-05-01T15:53:17+00:00</dc:date>
    <link>https://arxiv.org/abs/2602.07488</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Despite the fact that experimental neural scaling laws have substantially guided empirical progress in large-scale machine learning, no existing theory can quantitatively predict the exponents of these important laws for any modern LLM trained on any natural language dataset. We provide the first such theory in the case of data-limited scaling laws. We isolate two key statistical properties of language that alone can predict neural scaling exponents: (i) the decay of pairwise token correlations with time separation between token pairs, and (ii) the decay of the next-token conditional entropy with the length of the conditioning context. We further derive a simple formula in terms of these statistics that predicts data-limited neural scaling exponents from first principles without any free parameters or synthetic data models. Our theory exhibits a remarkable match with experimentally measured neural scaling laws obtained from training GPT-2 and LLaMA style models from scratch on two qualitatively different benchmarks, TinyStories and WikiText. ]]></description>
<dc:subject>papers to-read large-language-models neural-networks deep-learning machine-learning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:c9205d53524e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2604.21691">
    <title>[2604.21691] There Will Be a Scientific Theory of Deep Learning</title>
    <dc:date>2026-05-01T15:46:32+00:00</dc:date>
    <link>https://arxiv.org/abs/2604.21691</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In this paper, we make the case that a scientific theory of deep learning is emerging. By this we mean a theory which characterizes important properties and statistics of the training process, hidden representations, final weights, and performance of neural networks. We pull together major strands of ongoing research in deep learning theory and identify five growing bodies of work that point toward such a theory: (a) solvable idealized settings that provide intuition for learning dynamics in realistic systems; (b) tractable limits that reveal insights into fundamental learning phenomena; (c) simple mathematical laws that capture important macroscopic observables; (d) theories of hyperparameters that disentangle them from the rest of the training process, leaving simpler systems behind; and (e) universal behaviors shared across systems and settings which clarify which phenomena call for explanation.
Taken together, these bodies of work share certain broad traits: they are concerned with the dynamics of the training process; they primarily seek to describe coarse aggregate statistics; and they emphasize falsifiable quantitative predictions. We argue that the emerging theory is best thought of as a mechanics of the learning process, and suggest the name learning mechanics. We discuss the relationship between this mechanics perspective and other approaches for building a theory of deep learning, including the statistical and information-theoretic perspectives. In particular, we anticipate a symbiotic relationship between learning mechanics and mechanistic interpretability.
We also review and address common arguments that fundamental theory will not be possible or is not important. We conclude with a portrait of important open directions in learning mechanics and advice for beginners. We host further introductory materials, perspectives, and open questions at this http URL. 

-- associated website: https://learningmechanics.pub/]]></description>
<dc:subject>papers to-read machine-learning deep-learning neural-networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:42dc3c1e01b6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/journals/statistical-science/volume-29/issue-3/Instrumental-Variables-An-Econometricians-Perspective/10.1214/14-STS480.full">
    <title>Instrumental Variables: An Econometrician’s Perspective</title>
    <dc:date>2026-04-29T14:47:59+00:00</dc:date>
    <link>https://projecteuclid.org/journals/statistical-science/volume-29/issue-3/Instrumental-Variables-An-Econometricians-Perspective/10.1214/14-STS480.full</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[I review recent work in the statistics literature on instrumental variables methods from an econometrics perspective. I discuss some of the older, economic, applications including supply and demand models and relate them to the recent applications in settings of randomized experiments with noncompliance. I discuss the assumptions underlying instrumental variables methods and in what settings these may be plausible. By providing context to the current applications, a better understanding of the applicability of these methods may arise. ]]></description>
<dc:subject>papers to-read statistics inference instrumental-variables econometrics randomized-experiments experimental-design</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:ea6321f5a7bc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:instrumental-variables"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:econometrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:randomized-experiments"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:experimental-design"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://proceedings.mlr.press/r2/jaakkola99a.html">
    <title>Probabilistic Kernel Regression Models</title>
    <dc:date>2026-04-25T02:13:48+00:00</dc:date>
    <link>https://proceedings.mlr.press/r2/jaakkola99a.html</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We introduce a class of flexible conditional probability models and techniques for classification/regression problems. Many existing methods such as generalized linear models and support vector machines are subsumed under this class. The flexibility of this class of techniques comes from the use of kernel functions as in support vector machines, and the generality from dual formulations of standard regression models. ]]></description>
<dc:subject>papers to-read kernel-methods machine-learning optimization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:2ecbc0a0a1a8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:kernel-methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://proceedings.neurips.cc/paper_files/paper/2024/hash/b78adc1c1558bf344809275854d40fd6-Abstract-Conference.html">
    <title>Unveiling the Hidden Structure of Self-Attention via Kernel Principal Component Analysis</title>
    <dc:date>2026-04-25T02:13:05+00:00</dc:date>
    <link>https://proceedings.neurips.cc/paper_files/paper/2024/hash/b78adc1c1558bf344809275854d40fd6-Abstract-Conference.html</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The remarkable success of transformers in sequence modeling tasks, spanning various applications in natural language processing and computer vision, is attributed to the critical role of self-attention. Similar to the development of most deep learning models, the construction of these attention mechanisms relies on heuristics and experience. In our work, we derive self-attention from kernel principal component analysis (kernel PCA) and show that self-attention projects its query vectors onto the principal component axes of its key matrix in a feature space. We then formulate the exact formula for the value matrix in self-attention, theoretically and empirically demonstrating that this value matrix captures the eigenvectors of the Gram matrix of the key vectors in self-attention. Leveraging our kernel PCA framework, we propose Attention with Robust Principal Components (RPC-Attention), a novel class of robust attention that is resilient to data contamination. We empirically demonstrate the advantages of RPC-Attention over softmax attention on the ImageNet-1K object classification, WikiText-103 language modeling, and ADE20K image segmentation tas]]></description>
<dc:subject>papers to-read transformers large-language-models kernel-methods</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:150df429d4fb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:kernel-methods"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://openreview.net/forum?id=FGTDe6EA0B&amp;referrer=%5Bthe%20profile%20of%20Jon%20Kleinberg%5D(%2Fprofile%3Fid%3D~Jon_Kleinberg3)">
    <title>Language Generation in the Limit | OpenReview</title>
    <dc:date>2026-04-24T20:43:14+00:00</dc:date>
    <link>https://openreview.net/forum?id=FGTDe6EA0B&amp;referrer=%5Bthe%20profile%20of%20Jon%20Kleinberg%5D(%2Fprofile%3Fid%3D~Jon_Kleinberg3)</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Although current large language models are complex, the most basic specifications of the underlying language generation problem itself are simple to state: given a finite set of training samples from an unknown language, produce valid new strings from the language that don't already appear in the training data. Here we ask what we can conclude about language generation using only this specification, without further assumptions. In particular, suppose that an adversary enumerates the strings of an unknown target language L that is known only to come from one of a possibly infinite list of candidates. A computational agent is trying to learn to generate from this language; we say that the agent generates from in the limit if after some finite point in the enumeration of , the agent is able to produce new elements that come exclusively from and that have not yet been presented by the adversary. Our main result is that there is an agent that is able to generate in the limit for every countable list of candidate languages. This contrasts dramatically with negative results due to Gold and Angluin in a well-studied model of language learning where the goal is to identify an unknown language from samples; the difference between these results suggests that identifying a language is a fundamentally different problem than generating from it.]]></description>
<dc:subject>papers to-read heard-the-talk language generative-models computer-science</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:cf4b581b7488/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:heard-the-talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computer-science"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/journals/philosophy-of-science/article/standards-for-modest-bayesian-credences/6FE3D870363912FE2EE5596E734D4B0B">
    <title>Standards for Modest Bayesian Credences | Philosophy of Science | Cambridge Core</title>
    <dc:date>2026-04-24T15:58:54+00:00</dc:date>
    <link>https://www.cambridge.org/core/journals/philosophy-of-science/article/standards-for-modest-bayesian-credences/6FE3D870363912FE2EE5596E734D4B0B</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Gordon Belot argues that Bayesian theory is epistemologically immodest. In response, we show that the topological conditions that underpin his criticisms of asymptotic Bayesian conditioning are self-defeating. They require extreme a priori credences regarding, for example, the limiting behavior of observed relative frequencies. We offer a different explication of Bayesian modesty using a goal of consensus: rival scientific opinions should be responsive to new facts as a way to resolve their disputes. Also we address Adam Elga’s rebuttal to Belot’s analysis, which focuses attention on the role that the assumption of countable additivity plays in Belot’s criticisms.]]></description>
<dc:subject>papers to-read philosophy-of-probability epistemology bayesianism probabilism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:a7a9d7acc836/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy-of-probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:epistemology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:bayesianism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:probabilism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2306.00802">
    <title>[2306.00802] Birth of a Transformer: A Memory Viewpoint</title>
    <dc:date>2026-04-24T15:07:11+00:00</dc:date>
    <link>https://arxiv.org/abs/2306.00802</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Large language models based on transformers have achieved great empirical successes. However, as they are deployed more widely, there is a growing need to better understand their internal mechanisms in order to make them more reliable. These models appear to store vast amounts of knowledge from their training data, and to adapt quickly to new information provided in their context or prompt. We study how transformers balance these two types of knowledge by considering a synthetic setup where tokens are generated from either global or context-specific bigram distributions. By a careful empirical analysis of the training process on a simplified two-layer transformer, we illustrate the fast learning of global bigrams and the slower development of an "induction head" mechanism for the in-context bigrams. We highlight the role of weight matrices as associative memories, provide theoretical insights on how gradients enable their learning during training, and study the role of data-distributional properties. ]]></description>
<dc:subject>papers to-read transformers large-language-models</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:d133c450f867/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sciencedirect.com/science/article/pii/S0167691123001950?via%3Dihub">
    <title>Attaining the Chebyshev bound for optimal learning: A numerical algorithm - ScienceDirect</title>
    <dc:date>2026-04-23T02:52:00+00:00</dc:date>
    <link>https://www.sciencedirect.com/science/article/pii/S0167691123001950?via%3Dihub</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Given a compact subset of a Banach space, the Chebyshev center problem consists of finding a minimal circumscribing ball containing the set. In this article we establish a numerically tractable algorithm for solving the Chebyshev center problem in the context of optimal learning from a finite set of data points. For a hypothesis space realized as a compact but not necessarily convex subset of a finite-dimensional subspace of some underlying Banach space, this algorithm computes the Chebyshev radius and the Chebyshev center of the hypothesis space, thereby solving the problem of optimal recovery of functions from data. The algorithm itself is based on, and significantly extends, recent results for near-optimal solutions of convex semi-infinite problems by means of targeted sampling, and it is of independent interest. Several examples of numerical computations of Chebyshev centers are included in order to illustrate the effectiveness of the algorithm.]]></description>
<dc:subject>papers to-read optimal-recovery interpolation numerical-methods optimization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:f58628d99e17/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimal-recovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:interpolation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:numerical-methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/journals/economics-and-philosophy/article/abstraction-as-flexibility-the-veil-of-evaluative-uncertainty/FA12D9D5BB3783F8EACF396DFE4B93EB">
    <title>Abstraction as flexibility: the veil of evaluative uncertainty | Economics &amp; Philosophy | Cambridge Core</title>
    <dc:date>2026-04-22T00:51:02+00:00</dc:date>
    <link>https://www.cambridge.org/core/journals/economics-and-philosophy/article/abstraction-as-flexibility-the-veil-of-evaluative-uncertainty/FA12D9D5BB3783F8EACF396DFE4B93EB</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[I argue that evaluative uncertainty gives rational agents instrumental reasons to abstract from some of their salient preferences when bargaining about social institutions. Because agents cannot assume stability in their future evaluative outlook, it is rational to favour rules that preserve options that may become salient. Building on Kreps (1979), I show how flexibility-driven abstraction expands the bargaining set, enabling convergence on rules while preserving motivational continuity. Since options are endogenous, bargainers also have reason to deliberate about option-generating and option-filtering meta-rules that structure the emergence, appraisal and revision of options over time.]]></description>
<dc:subject>papers to-read abstractions rationality decision-theory game-theory economics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:31b1026460d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:abstractions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:rationality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decision-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:game-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/journals/glasgow-mathematical-journal/article/linear-pde-with-constant-coefficients/E406F1D5A27DC4B4414B8A3DF3A225F6">
    <title>Linear PDE with constant coefficients | Glasgow Mathematical Journal | Cambridge Core</title>
    <dc:date>2026-04-22T00:50:12+00:00</dc:date>
    <link>https://www.cambridge.org/core/journals/glasgow-mathematical-journal/article/linear-pde-with-constant-coefficients/E406F1D5A27DC4B4414B8A3DF3A225F6</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We discuss practical methods for computing the space of solutions to an arbitrary homogeneous linear system of partial differential equations with constant coefficients. These rest on the Fundamental Principle of Ehrenpreis–Palamodov from the 1960s. We develop this further using recent advances in computational commutative algebra.]]></description>
<dc:subject>papers to-read PDEs differential-algebra polynomials commutative-algebra</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:3447f4cbf0c1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:PDEs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:differential-algebra"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:polynomials"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:commutative-algebra"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cell.com/trends/cognitive-sciences/fulltext/S1364-6613(26)00052-5">
    <title>Dependency syntax as the simplest theory of grammar: Trends in Cognitive Sciences</title>
    <dc:date>2026-04-20T18:04:17+00:00</dc:date>
    <link>https://www.cell.com/trends/cognitive-sciences/fulltext/S1364-6613(26)00052-5</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The syntax of human languages has long been argued to be complex and even unlearnable from the input alone. However, the success of large language models (LLMs) has challenged this idea. I argue for a simple view of syntax, where the syntax of a language is just the set of dependency rules, with no phrase structure or transformation rules—constructs central to Chomsky’s transformational grammar. This approach accounts for diverse phenomena in human language processing and explains crosslinguistic word order universals. Moreover, it better explains human data for cases that differentiate these accounts and eliminates the syntax learnability problem. I speculate that LLMs, similar to children, learn the dependency grammar from linguistic patterns, leading to their impressive syntactic competence.]]></description>
<dc:subject>papers to-read language learning grammar syntax</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:710cc64ef785/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:grammar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:syntax"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.fabricatedknowledge.com/p/mythos-and-engels-pause?fbclid=IwY2xjawRPrTNleHRuA2FlbQIxMQBicmlkETE4eDlndjllUld2cjlBTkFzc3J0YwZhcHBfaWQQMjIyMDM5MTc4ODIwMDg5MgABHsMwf89zs3VKJoXCiZg-1RshLfpVCcwbEgNOrrL2erKG5FX6sPEz6qE5axUQ_aem_RVUflDX1Xm89FsiXcKYlDQ">
    <title>Engels' Pause and the Permanent Underclass</title>
    <dc:date>2026-04-19T01:49:23+00:00</dc:date>
    <link>https://www.fabricatedknowledge.com/p/mythos-and-engels-pause?fbclid=IwY2xjawRPrTNleHRuA2FlbQIxMQBicmlkETE4eDlndjllUld2cjlBTkFzc3J0YwZhcHBfaWQQMjIyMDM5MTc4ODIwMDg5MgABHsMwf89zs3VKJoXCiZg-1RshLfpVCcwbEgNOrrL2erKG5FX6sPEz6qE5axUQ_aem_RVUflDX1Xm89FsiXcKYlDQ</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>blogs to-read ai security computation economics technology</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:4b2e91f0df03/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:blogs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:technology"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://thinkingmachines.ai/blog/defeating-nondeterminism-in-llm-inference/">
    <title>Defeating Nondeterminism in LLM Inference - Thinking Machines Lab</title>
    <dc:date>2026-04-18T22:13:14+00:00</dc:date>
    <link>https://thinkingmachines.ai/blog/defeating-nondeterminism-in-llm-inference/</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>to-read ai machine-learning determinism large-language-models computation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:24aaffeb64a7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:determinism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://archive.is/DKKXJ">
    <title>AI Can’t Replace Free Markets - WSJ</title>
    <dc:date>2026-04-18T22:11:27+00:00</dc:date>
    <link>https://archive.is/DKKXJ</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>to-read ai economics decentralized-control complexity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:80d5ce52266b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decentralized-control"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:complexity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.thephilosopher1923.org/post/artificial-bodies-and-the-promise-of-abstraction">
    <title>&quot;Artificial Bodies and the Promise of Abstraction&quot;: a conversation with Peter Wolfendale (Keywords: Philosophy of Mind; Phenomenology; Embodiment)</title>
    <dc:date>2026-04-18T21:20:47+00:00</dc:date>
    <link>https://www.thephilosopher1923.org/post/artificial-bodies-and-the-promise-of-abstraction</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>to-read philosophy cognitive-science embodiment ai</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:aa3cab335182/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:cognitive-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:embodiment"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://hbr.org/2026/02/ai-doesnt-reduce-work-it-intensifies-it">
    <title>AI Doesn’t Reduce Work—It Intensifies It</title>
    <dc:date>2026-04-18T21:20:15+00:00</dc:date>
    <link>https://hbr.org/2026/02/ai-doesnt-reduce-work-it-intensifies-it</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>to-read ai economics labor capital productivity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:f56f3e30c0cd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:labor"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:capital"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:productivity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/journals/economics-and-philosophy/article/market-as-a-creative-process/4A62A60230535D4A8553A7C26A184F56">
    <title>The Market as a Creative Process | Economics &amp; Philosophy | Cambridge Core</title>
    <dc:date>2026-04-18T21:19:33+00:00</dc:date>
    <link>https://www.cambridge.org/core/journals/economics-and-philosophy/article/market-as-a-creative-process/4A62A60230535D4A8553A7C26A184F56</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Contributions in modern theoretical physics and chemistry on the behavior of nonlinear systems, exemplified by Ilya Prigogine's work on the thermodynamics of open systems (Prigogine and Stengers, 1984), attract growing attention in economics (Anderson, Arrow, and Pines, 1988; Arthur, 1990; Baumol and Benhabib, 1989; Mirowski, 1990; Radzicki, 1990). Our purpose here is to relate the new orientation in the natural sciences to a particular nonorthodox strand of thought within economics. All that is needed for this purpose is some appreciation of the general thrust of the enterprise, which involves a shift of perspective from the determinism of conventional physics (which presumably inspired the neoclassical research program in economics) to the nonteleological open-endedness, creative, and nondetermined nature of evolutionary processes.]]></description>
<dc:subject>papers to-read economics dynamical-systems complex-systems process-philosophy evolution</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:7f64a7342065/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:complex-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:process-philosophy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:evolution"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://newlinesinstitute.org/tech-econ-sov-sec/making-machine-learning-safe-for-the-world/">
    <title>Making Machine Learning Safe for the World  - New Lines Institute</title>
    <dc:date>2026-04-15T19:44:51+00:00</dc:date>
    <link>https://newlinesinstitute.org/tech-econ-sov-sec/making-machine-learning-safe-for-the-world/</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The productive impacts of state-of-the-art machine learning (ML) have been, and will continue to be, sharply limited given the difficulty or inability to integrate it with safety-critical applications – those in which failures of a system during operation would cause harm to individuals, the public, or the environment. Safety-critical domains are the nexus at which problems that plague state-of-the-art ML in lower-stakes domains meet, revolving principally around matters of reliability, human interpretability, and the ability to intervene in the system’s internal mechanisms during operation. That general-purpose ML models today cannot meet the bar for their adoption in safety-critical domains contributes to a sense that they are ever-present with only modest social and economic impacts.

ML, having compelled the world to accommodate it in its most versatile forms, must change if it is to succeed in the most sensitive domains of application. Such changes should follow in the lineage of the most impactful engineering marvels of recent history by providing guarantees on performance in safety-critical domains, minimizing the probability of harmful outputs and reducing their severity.]]></description>
<dc:subject>to-read machine-learning ai economics policy technology security</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:7a98e5d9333b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:policy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:technology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:security"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2604.06107">
    <title>[2604.06107] Artificial Intelligence and the Structure of Mathematics</title>
    <dc:date>2026-04-12T16:10:36+00:00</dc:date>
    <link>https://arxiv.org/abs/2604.06107</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Recent progress in artificial intelligence (AI) is unlocking transformative capabilities for mathematics. There is great hope that AI will help solve major open problems and autonomously discover new mathematical concepts. In this essay, we further consider how AI may open a grand perspective on mathematics by forging a new route, complementary to mathematical\textbf{ logic,} to understanding the global structure of formal \textbf{proof}\textbf{s}. We begin by providing a sketch of the formal structure of mathematics in terms of universal proof and structural hypergraphs and discuss questions this raises about the foundational structure of mathematics. We then outline the main ingredients and provide a set of criteria to be satisfied for AI models capable of automated mathematical discovery. As we send AI agents to traverse Platonic mathematical worlds, we expect they will teach us about the nature of mathematics: both as a whole, and the small ribbons conducive to human understanding. Perhaps they will shed light on the old question: "Is mathematics discovered or invented?" Can we grok the terrain of these \textbf{Platonic worlds}? ]]></description>
<dc:subject>papers to-read mathematics ai philosophy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:2445d9d7f6fd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:mathematics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://jmlr.org/papers/v16/santhanam15a.html">
    <title>Agnostic Insurability of Model Classes</title>
    <dc:date>2026-04-01T15:00:12+00:00</dc:date>
    <link>https://jmlr.org/papers/v16/santhanam15a.html</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Motivated by problems in insurance, our task is to predict finite upper bounds on a future draw from an unknown distribution p over natural numbers. We can only use past observations generated independently and identically distributed according to p. While p is unknown, it is known to belong to a given collection P of probability distributions on the natural numbers.

The support of the distributions p∈P may be unbounded, and the prediction game goes on for infinitely many draws. We are allowed to make observations without predicting upper bounds for some time. But we must, with probability 1, start and then continue to predict upper bounds after a finite time irrespective of which p∈P governs the data.

If it is possible, without knowledge of p and for any prescribed confidence however close to 1, to come up with a sequence of upper bounds that is never violated over an infinite time window with confidence at least as big as prescribed, we say the model class P is insurable. We completely characterize the insurability of any class P of distributions over natural numbers by means of a condition on how the neighborhoods of distributions in P should be, one that is both necessary and sufficient. ]]></description>
<dc:subject>papers to-read machine-learning universal-prediction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:02e7d901124d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:universal-prediction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://direct.mit.edu/neco/article/35/3/277/112379/Feelings-Are-the-Source-of-Consciousness">
    <title>Feelings Are the Source of Consciousness | Neural Computation | MIT Press</title>
    <dc:date>2026-03-06T15:38:38+00:00</dc:date>
    <link>https://direct.mit.edu/neco/article/35/3/277/112379/Feelings-Are-the-Source-of-Consciousness</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In this view, we address the problem of consciousness, and although we focus on its human presentation, we note that the phenomenon is present in numerous nonhuman species and use findings from a variety of animal studies to explain our hypothesis for how consciousness is made.

Consciousness occurs when mind contents, such as perceptions and thoughts, are spontaneously identified as belonging to a specific organism/owner. Conscious minds are said to have a self that experiences mental events. We hypothesize that the automatic identification that associates minds and organisms is provided by a continuous flow of homeostatic feelings. Those feelings arise from the uninterrupted process of life regulation and correspond to both salient physiological fluctuations such as hunger, pain, well-being, or malaise, as well as to states closer to metabolic equilibrium and best described as feelings of life/existence, such as breathing or body temperature. We also hypothesize that homeostatic feelings were the inaugural phenomena of consciousness in biological evolution and venture that they were selected because the information they provided regarding the current state of life regulation conferred extraordinary advantages to the organisms so endowed. The “knowledge” carried by conscious homeostatic feelings provided “overt” guidance for life regulation, an advance over the covert regulation present in nonconscious organisms. Finally, we outline a mechanism for the generation of feelings based on a two-way interaction between interoceptive components of the nervous system and a particular set of nonneural components of the organism's interior, namely, viscera and circulating chemical molecules involved in their operations. Feelings emerge from this interaction as continuous and hybrid phenomena, related simultaneously to two series of events. The first is best described by the terms neural/representational/and mental and the second by the terms nonneural/visceral/and chemical. We note that this account offers a solution for the mind-body problem: homeostatic feelings constitute the “mental” version of bodily processes.]]></description>
<dc:subject>papers to-read neuroscience cognition</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:6ba7c1caaa70/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neuroscience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:cognition"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.01489">
    <title>[2104.01489] Explanatory models in neuroscience: Part 2 -- constraint-based intelligibility</title>
    <dc:date>2026-03-04T22:42:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.01489</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Computational modeling plays an increasingly important role in neuroscience, highlighting the philosophical question of how computational models explain. In the context of neural network models for neuroscience, concerns have been raised about model intelligibility, and how they relate (if at all) to what is found in the brain. We claim that what makes a system intelligible is an understanding of the dependencies between its behavior and the factors that are causally responsible for that behavior. In biological systems, many of these dependencies are naturally "top-down": ethological imperatives interact with evolutionary and developmental constraints under natural selection. We describe how the optimization techniques used to construct NN models capture some key aspects of these dependencies, and thus help explain why brain systems are as they are -- because when a challenging ecologically-relevant goal is shared by a NN and the brain, it places tight constraints on the possible mechanisms exhibited in both kinds of systems. By combining two familiar modes of explanation -- one based on bottom-up mechanism (whose relation to neural network models we address in a companion paper) and the other on top-down constraints, these models illuminate brain function. ]]></description>
<dc:subject>papers to-read neuroscience</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:a5a0c4e93864/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neuroscience"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.01490">
    <title>[2104.01490] Explanatory models in neuroscience: Part 1 -- taking mechanistic abstraction seriously</title>
    <dc:date>2026-03-04T22:41:58+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.01490</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Despite the recent success of neural network models in mimicking animal performance on visual perceptual tasks, critics worry that these models fail to illuminate brain function. We take it that a central approach to explanation in systems neuroscience is that of mechanistic modeling, where understanding the system is taken to require fleshing out the parts, organization, and activities of a system, and how those give rise to behaviors of interest. However, it remains somewhat controversial what it means for a model to describe a mechanism, and whether neural network models qualify as explanatory.
We argue that certain kinds of neural network models are actually good examples of mechanistic models, when the right notion of mechanistic mapping is deployed. Building on existing work on model-to-mechanism mapping (3M), we describe criteria delineating such a notion, which we call 3M++. These criteria require us, first, to identify a level of description that is both abstract but detailed enough to be "runnable", and then, to construct model-to-brain mappings using the same principles as those employed for brain-to-brain mapping across individuals. Perhaps surprisingly, the abstractions required are those already in use in experimental neuroscience, and are of the kind deployed in the construction of more familiar computational models, just as the principles of inter-brain mappings are very much in the spirit of those already employed in the collection and analysis of data across animals.
In a companion paper, we address the relationship between optimization and intelligibility, in the context of functional evolutionary explanations. Taken together, mechanistic interpretations of computational models and the dependencies between form and function illuminated by optimization processes can help us to understand why brain systems are built they way they are. ]]></description>
<dc:subject>papers to-read neuroscience</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:a4cbe2094f06/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neuroscience"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2602.23268">
    <title>[2602.23268] The selfish ribosome</title>
    <dc:date>2026-03-04T19:58:55+00:00</dc:date>
    <link>https://arxiv.org/abs/2602.23268</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The ribosome is responsible for protein synthesis in all cells, and is the largest energy consumer in the cell. We propose that the ribosome originated as a mutualistic symbiont of an RNA-dependent RNA polymerase ribozyme, supplying peptides that enhanced replication. As life transitioned from the RNA to the RNA-protein world, autonomous replicators became irreversibly addicted to the ribosome for producing replication proteins. Subsequent evolution is construed as a ribosomal takeover, whereby the ribosome evolved to consume most of the resources of the cell, while other cellular componentry ensured the propagation of the ribosome. Under this perspective, the ribosome is the ultimate biological selfish element. ]]></description>
<dc:subject>papers to-read biology</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:458e18545cd4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:biology"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2602.24083">
    <title>[2602.24083] Neural Diffusion Intensity Models for Point Process Data</title>
    <dc:date>2026-03-03T17:42:33+00:00</dc:date>
    <link>https://arxiv.org/abs/2602.24083</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Cox processes model overdispersed point process data via a latent stochastic intensity, but both nonparametric estimation of the intensity model and posterior inference over intensity paths are typically intractable, relying on expensive MCMC methods. We introduce Neural Diffusion Intensity Models, a variational framework for Cox processes driven by neural SDEs. Our key theoretical result, based on enlargement of filtrations, shows that conditioning on point process observations preserves the diffusion structure of the latent intensity with an explicit drift correction. This guarantees the variational family contains the true posterior, so that ELBO maximization coincides with maximum likelihood estimation under sufficient model capacity. We design an amortized encoder architecture that maps variable-length event sequences to posterior intensity paths by simulating the drift-corrected SDE, replacing repeated MCMC runs with a single forward pass. Experiments on synthetic and real-world data demonstrate accurate recovery of latent intensity dynamics and posterior paths, with orders-of-magnitude speedups over MCMC-based methods. ]]></description>
<dc:subject>papers to-read diffusions point-processes generative-models neural-networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:1b6c3bd2bc9c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:diffusions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:point-processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:generative-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://economics.mit.edu/sites/default/files/2026-02/AI%2C%20Human%20Cognition%20and%20Knowledge%20Collapse%2002-20-26.pdf">
    <title>AI, Human Cognition and Knowledge Collapse (Daron Acemoglu, Dingwen Kong, Asuman Ozdaglar)</title>
    <dc:date>2026-03-03T15:19:16+00:00</dc:date>
    <link>https://economics.mit.edu/sites/default/files/2026-02/AI%2C%20Human%20Cognition%20and%20Knowledge%20Collapse%2002-20-26.pdf</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA["We study how generative AI, and in particular agentic AI, shapes human learning incentives and the long-run evolution of society’s information ecosystem. We build a dynamic model of learning and decision-making in which successful decisions require combining shared, community-level general knowledge with individual-level, context-specific knowledge; these two inputs are complements. Learning exhibits economies of scope: costly human effort jointly produces a private signal about their own context and a “thin” public signal that accumulates into the community’s stock of general knowledge, generating a learning externality. Agentic AI delivers context-specific recommendations that substitute for human effort. By contrast, a richer stock of general knowledge complements human effort by raising its marginal return. The model highlights a sharp dynamic tension: while agentic AI can improve contemporaneous decision quality, it can also erode learning incentives that sustain long-run collective knowledge. When human effort is sufficiently elastic and agentic recommendations exceed an accuracy threshold, the economy can tip into a knowledge-collapse steady state in which general knowledge vanishes ultimately, despite high-quality personalized advice. Welfare is generally non-monotone in agentic accuracy, implying an interior, welfare-maximizing level of agentic precision and motivating information-design regulations. In contrast, greater aggregation capacity for general knowledge—meaning more effective sharing and pooling of human-generated general knowledge—unambiguously raises welfare and increases resilience to knowledge collapse."

Good critique by Carlo Ludovico Cordasco here: https://carlolc.substack.com/p/acemoglu-et-al-2026-are-wrong-about

My own 2c: Need to read the paper carefully, but the main issue seems to be that the model is formulated in a "small world" (in the sense of Savage) of fixed and known forms of knowledge, abilities, and preferences. This is Cordasco's critique as well.]]></description>
<dc:subject>papers to-read economics decision-making ai</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:ec1b7439f4e7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decision-making"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1023/B:JOSS.0000033245.43421.14">
    <title>Feynman's Ratchet and Pawl | Journal of Statistical Physics | Springer Nature Link</title>
    <dc:date>2026-03-02T01:40:25+00:00</dc:date>
    <link>https://link.springer.com/article/10.1023/B:JOSS.0000033245.43421.14</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[While many papers in the last few years have dealt with various equations euphemistically called “ratchets,” the original Feyman two-temperature setup has been left largely unchallenged. We present here a look at the details of how this famous engine actually generates motion from a temperature difference.]]></description>
<dc:subject>papers to-read thermodynamics statistical-physics stochastic-thermodynamics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:4410250fda4b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:thermodynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistical-physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:stochastic-thermodynamics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2602.22271">
    <title>[2602.22271] Support Tokens, Stability Margins, and a New Foundation for Robust LLMs</title>
    <dc:date>2026-02-27T03:36:04+00:00</dc:date>
    <link>https://arxiv.org/abs/2602.22271</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Self-attention is usually described as a flexible, content-adaptive way to mix a token with information from its past. We re-interpret causal self-attention transformers, the backbone of modern foundation models, within a probabilistic framework, much like how classical PCA is extended to probabilistic PCA. However, this re-formulation reveals a surprising and deeper structural insight: due to a change-of-variables phenomenon, a barrier constraint emerges on the self-attention parameters. This induces a highly structured geometry on the token space, providing theoretical insights into the dynamics of LLM decoding. This reveals a boundary where attention becomes ill-conditioned, leading to a margin interpretation similar to classical support vector machines. Just like support vectors, this naturally gives rise to the concept of support tokens.
Furthermore, we show that LLMs can be interpreted as a stochastic process over the power set of the token space, providing a rigorous probabilistic framework for sequence modeling. We propose a Bayesian framework and derive a MAP estimation objective that requires only a minimal modification to standard LLM training: the addition of a smooth log-barrier penalty to the usual cross-entropy loss. We demonstrate that this provides more robust models without sacrificing out-of-sample accuracy and that it is straightforward to incorporate in practice. ]]></description>
<dc:subject>papers to-read large-language-models transformers stochastic-processes</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:863fccab184b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:stochastic-processes"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(11)60563-1/fulltext?wptouch_preview_theme=enabled">
    <title>A philosopher's view of the long road from RCTs to effectiveness - The Lancet</title>
    <dc:date>2026-01-09T15:29:39+00:00</dc:date>
    <link>https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(11)60563-1/fulltext?wptouch_preview_theme=enabled</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>papers to-read philosophy-of-science statistics policy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:7fff32228f1f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy-of-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:policy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1007/s11084-016-9494-1">
    <title>The Logic of Life | Discover Life</title>
    <dc:date>2026-01-08T21:32:48+00:00</dc:date>
    <link>https://link.springer.com/article/10.1007/s11084-016-9494-1</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In this paper we propose a logical connection between the physical and biological worlds, one resting on a broader understanding of the stability concept. We propose that stability manifests two facets - time and energy, and that stability’s time facet, expressed as persistence, is more general than its energy facet. That insight leads to the logical formulation of the Persistence Principle, which describes the general direction of material change in the universe, and which can be stated most simply as: nature seeks persistent forms. Significantly, the principle is found to express itself in two mathematically distinct ways: in the replicative world through Malthusian exponential growth, and in the ‘regular’ physical/chemical world through Boltzmann’s probabilistic considerations. By encompassing both ‘regular’ and replicative worlds, the principle appears to be able to help reconcile two of the major scientific theories of the 19th century – the Second Law of Thermodynamics and Darwin’s theory of evolution – within a single conceptual framework.]]></description>
<dc:subject>papers to-read biology evolution physics thermodynamics statistical-physics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:2248c2f5d970/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:evolution"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:thermodynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistical-physics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2512.24999">
    <title>[2512.24999] Basic Inequalities for First-Order Optimization with Applications to Statistical Risk Analysis</title>
    <dc:date>2026-01-06T17:04:39+00:00</dc:date>
    <link>https://arxiv.org/abs/2512.24999</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We introduce \textit{basic inequalities} for first-order iterative optimization algorithms, forming a simple and versatile framework that connects implicit and explicit regularization. While related inequalities appear in the literature, we isolate and highlight a specific form and develop it as a well-rounded tool for statistical analysis. Let $f$ denote the objective function to be optimized. Given a first-order iterative algorithm initialized at $\theta_0$ with current iterate $\theta_T$, the basic inequality upper bounds $f(\theta_T)-f(z)$ for any reference point $z$ in terms of the accumulated step sizes and the distances between $\theta_0$, $\theta_T$, and $z$. The bound translates the number of iterations into an effective regularization coefficient in the loss function. We demonstrate this framework through analyses of training dynamics and prediction risk bounds. In addition to revisiting and refining known results on gradient descent, we provide new results for mirror descent with Bregman divergence projection, for generalized linear models trained by gradient descent and exponentiated gradient descent, and for randomized predictors. We illustrate and supplement these theoretical findings with experiments on generalized linear models. ]]></description>
<dc:subject>papers to-read statistics statistical-learning optimization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:5ee3b8230fe6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistical-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2512.24945">
    <title>[2512.24945] Dynamic response phenotypes and model discrimination in systems and synthetic biology</title>
    <dc:date>2026-01-06T03:50:39+00:00</dc:date>
    <link>https://arxiv.org/abs/2512.24945</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Biological systems encode function not primarily in steady states, but in the structure of transient responses elicited by time-varying stimuli. Overshoots, biphasic dynamics, adaptation kinetics, fold-change detection, entrainment, and cumulative exposure effects often determine phenotypic outcomes, yet are poorly captured by classical steady-state or dose-response analyses. This paper develops an input-output perspective on such "dynamic phenotypes," emphasizing how qualitative features of transient behavior constrain underlying network architectures independently of detailed parameter values. A central theme is the role of sign structure and interconnection logic, particularly the contrast between monotone systems and architectures containing antagonistic pathways. We show how incoherent feedforward (IFF) motifs provide a simple and recurrent mechanism for generating non-monotonic and adaptive responses across multiple levels of biological organization, from molecular signaling to immune regulation and population dynamics. Conversely, monotonicity imposes sharp impossibility results that can be used to falsify entire classes of models from transient data alone. Beyond step inputs, we highlight how periodic forcing, ramps, and integral-type readouts such as cumulative dose responses offer powerful experimental probes that reveal otherwise hidden structure, separate competing motifs, and expose invariances such as fold-change detection. Throughout, we illustrate how control-theoretic concepts, including monotonicity, equivariance, and input-output analysis, can be used not as engineering metaphors, but as precise mathematical tools for biological model discrimination. Thus we argue for a shift in emphasis from asymptotic behavior to transient and input-driven dynamics as a primary lens for understanding, testing, and reverse-engineering biological networks. ]]></description>
<dc:subject>papers to-read systems-biology control-theory dynamical-systems</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:dbf668c42dc8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:systems-biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1007/s00332-002-0506-0">
    <title>For Differential Equations with r Parameters, 2r+1 Experiments Are Enough for Identification | Journal of Nonlinear Science</title>
    <dc:date>2025-11-14T03:26:20+00:00</dc:date>
    <link>https://link.springer.com/article/10.1007/s00332-002-0506-0</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Given a set of differential equations whose description involves unknown parameters, such as reaction constants in chemical kinetics, and supposing that one may at any time measure the values of some of the variables and possibly choose external inputs to help excite the system, how many experiments are sufficient in order to obtain all the information that is potentially available about the parameters? This paper shows that the best possible answer (assuming exact measurements) is 2r+1 experiments, where r is the number of parameters. Moreover, a generic set of such experiments suffices. ]]></description>
<dc:subject>papers to-read dynamical-systems differential-equations system-identification</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:cb57577cf584/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:differential-equations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:system-identification"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.bostonreview.net/articles/how-to-lie-with-political-statistics/">
    <title>How to Lie with (Political) Statistics - Boston Review</title>
    <dc:date>2025-11-05T16:02:55+00:00</dc:date>
    <link>https://www.bostonreview.net/articles/how-to-lie-with-political-statistics/</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>to-read statistics politics data-analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:de5bdbd84c17/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:politics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:data-analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2510.15511">
    <title>[2510.15511] Language Models are Injective and Hence Invertible</title>
    <dc:date>2025-11-05T16:01:19+00:00</dc:date>
    <link>https://arxiv.org/abs/2510.15511</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Transformer components such as non-linear activations and normalization are inherently non-injective, suggesting that different inputs could map to the same output and prevent exact recovery of the input from a model's representations. In this paper, we challenge this view. First, we prove mathematically that transformer language models mapping discrete input sequences to their corresponding sequence of continuous representations are injective and therefore lossless, a property established at initialization and preserved during training. Second, we confirm this result empirically through billions of collision tests on six state-of-the-art language models, and observe no collisions. Third, we operationalize injectivity: we introduce SipIt, the first algorithm that provably and efficiently reconstructs the exact input text from hidden activations, establishing linear-time guarantees and demonstrating exact invertibility in practice. Overall, our work establishes injectivity as a fundamental and exploitable property of language models, with direct implications for transparency, interpretability, and safe deployment. ]]></description>
<dc:subject>papers to-read large-language-models dynamical-systems neural-networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:b44b817515cc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2508.11990">
    <title>[2508.11990] Universal Learning of Nonlinear Dynamics</title>
    <dc:date>2025-10-22T16:19:25+00:00</dc:date>
    <link>https://arxiv.org/abs/2508.11990</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We study the fundamental problem of learning a marginally stable unknown nonlinear dynamical system. We describe an algorithm for this problem, based on the technique of spectral filtering, which learns a mapping from past observations to the next based on a spectral representation of the system. Using techniques from online convex optimization, we prove vanishing prediction error for any nonlinear dynamical system that has finitely many marginally stable modes, with rates governed by a novel quantitative control-theoretic notion of learnability. The main technical component of our method is a new spectral filtering algorithm for linear dynamical systems, which incorporates past observations and applies to general noisy and marginally stable systems. This significantly generalizes the original spectral filtering algorithm to both asymmetric dynamics as well as incorporating noise correction, and is of independent interest. ]]></description>
<dc:subject>papers to-read system-identification control-theory learning-theory dynamical-systems</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:9164da4f209d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:system-identification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:learning-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1007/s10450-006-9683-8">
    <title>The Symplectic Semigroup and Riccati Differential Equations | Journal of Dynamical and Control Systems</title>
    <dc:date>2025-10-15T15:41:07+00:00</dc:date>
    <link>https://link.springer.com/article/10.1007/s10450-006-9683-8</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In this paper, we study close connections that exist between the Riccati operator (differential) equation that arises in linear control systems and the symplectic group and its subsemigroup of symplectic Hamiltonian operators. A canonical triple factorization is derived for the symplectic Hamiltonian operators, and their closure under multiplication is deduced from this property. This semigroup of Hamiltonian operators, which we call the symplectic semigroup, is studied from the viewpoint of Lie semigroup theory, and resulting consequences for the theory of the Riccati equation are delineated. Among other things, these developments provide an elementary proof for the existence of a solution of the Riccati equation for all t ≥ 0 under rather general hypotheses.]]></description>
<dc:subject>papers to-read dynamical-systems control-theory differential-equations symplectic-geometry</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:adfbeaa3393e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:differential-equations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:symplectic-geometry"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sciencedirect.com/science/article/pii/0095895677900570">
    <title>Balancing games - ScienceDirect</title>
    <dc:date>2025-10-04T17:25:07+00:00</dc:date>
    <link>https://www.sciencedirect.com/science/article/pii/0095895677900570</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We consider a variety of two person perfect information games of the following sort. On the ith round Player I selects a vector vi of a certain prescribed form and Player II either adds or subtracts vi from a cumulative sum. Player II's object is to keep the cumulative sum as small as possible. We give bounds on the value of such games under a variety of conditions.]]></description>
<dc:subject>papers to-read game-theory geometry optimization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:d079fe4b582d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:game-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:optimization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2509.19601">
    <title>[2509.19601] Modular Machine Learning with Applications to Genetic Circuit Composition</title>
    <dc:date>2025-09-29T01:53:44+00:00</dc:date>
    <link>https://arxiv.org/abs/2509.19601</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[In several applications, including in synthetic biology, one often has input/output data on a system composed of many modules, and although the modules' input/output functions and signals may be unknown, knowledge of the composition architecture can significantly reduce the amount of training data required to learn the system's input/output mapping. Learning the modules' input/output functions is also necessary for designing new systems from different composition architectures. Here, we propose a modular learning framework, which incorporates prior knowledge of the system's compositional structure to (a) identify the composing modules' input/output functions from the system's input/output data and (b) achieve this by using a reduced amount of data compared to what would be required without knowledge of the compositional structure. To achieve this, we introduce the notion of modular identifiability, which allows recovery of modules' input/output functions from a subset of the system's input/output data, and provide theoretical guarantees on a class of systems motivated by genetic circuits. We demonstrate the theory on computational studies showing that a neural network (NNET) that accounts for the compositional structure can learn the composing modules' input/output functions and predict the system's output on inputs outside of the training set distribution. By contrast, a neural network that is agnostic of the structure is unable to predict on inputs that fall outside of the training set distribution. By reducing the need for experimental data and allowing module identification, this framework offers the potential to ease the design of synthetic biological circuits and of multi-module systems more generally. ]]></description>
<dc:subject>papers to-read systems-biology control-theory dynamical-systems</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:cd5e939dcbbb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:systems-biology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://proceedings.mlr.press/v195/bosch23a.html">
    <title>Precise Asymptotic Analysis of Deep Random Feature Models</title>
    <dc:date>2025-09-25T14:13:27+00:00</dc:date>
    <link>https://proceedings.mlr.press/v195/bosch23a.html</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[We provide exact asymptotic expressions for the performance of regression by an L−layer deep random feature (RF) model, where the input is mapped through multiple random embedding and non-linear activation functions. For this purpose, we establish two key steps: First, we prove a novel universality result for RF models and deterministic data, by which we demonstrate that a deep random feature model is equivalent to a deep linear Gaussian model that matches it in the first and second moments, at each layer. Second, we make use of the convex Gaussian Min-Max theorem multiple times to obtain the exact behavior of deep RF models. We further characterize the variation of the eigendistribution in different layers of the equivalent Gaussian model, demonstrating that depth has a tangible effect on model performance despite the fact that only the last layer of the model is being trained. ]]></description>
<dc:subject>papers to-read machine-learning probability random-matrices</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:d2c36fdb39a5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:random-matrices"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2209.08832">
    <title>[2209.08832] From microscopic to macroscopic scale equations: mean field, hydrodynamic and graph limits</title>
    <dc:date>2025-09-25T13:36:29+00:00</dc:date>
    <link>https://arxiv.org/abs/2209.08832</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Considering finite particle systems, we elaborate on various ways to pass to the limit as thenumber of agents tends to infinity, either by mean field limit, deriving the Vlasov equation,or by hydrodynamic or graph limit, obtaining the Euler equation. We provide convergenceestimates. We also show how to pass from Liouville to Vlasov or to Euler by taking adequatemoments. Our results encompass and generalize a number of known results of the this http URL a surprising consequence of our analysis, we show that sufficiently regular solutions of anylinear PDE can be approximated by solutions of systems of N particles, to within 1/ log log(N ). ]]></description>
<dc:subject>papers to-read statistical-physics dynamical-systems interacting-particle-systems</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:b2fd1757e956/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:statistical-physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:dynamical-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:interacting-particle-systems"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/doi/full/10.1073/pnas.2502599122">
    <title>Asymptotic theory of in-context learning by linear attention | PNAS</title>
    <dc:date>2025-09-25T13:34:23+00:00</dc:date>
    <link>https://www.pnas.org/doi/full/10.1073/pnas.2502599122</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Transformers have a remarkable ability to learn and execute tasks based on examples provided within the input itself, without explicit prior training. It has been argued that this capability, known as in-context learning (ICL), is a cornerstone of Transformers’ success, yet questions about the necessary sample complexity, pretraining task diversity, and context length for successful ICL remain unresolved. Here, we provide a precise answer to these questions in an exactly solvable model of ICL of a linear regression task by linear attention. We derive sharp asymptotics for the learning curve in a phenomenologically rich scaling regime where the token dimension is taken to infinity; the context length and pretraining task diversity scale proportionally with the token dimension; and the number of pretraining examples scales quadratically. We demonstrate a double-descent learning curve with increasing pretraining examples, and uncover a phase transition in the model’s behavior between low and high task diversity regimes: in the low diversity regime, the model tends toward memorization of training tasks, whereas in the high diversity regime, it achieves genuine ICL and generalization beyond the scope of pretrained tasks. These theoretical insights are empirically validated through experiments with both linear attention and full nonlinear Transformer architectures.]]></description>
<dc:subject>papers to-read transformers machine-learning neural-networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:2ca76927ce32/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2301.10414">
    <title>[2301.10414] Towards a Unification of Logic and Information Theory</title>
    <dc:date>2025-09-17T21:27:23+00:00</dc:date>
    <link>https://arxiv.org/abs/2301.10414</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Today, the vast majority of the world's digital information is represented using the fundamental assumption, introduced by Claude Shannon in 1948, that ``...the semantic aspects of communication are irrelevant to the engineering problem (of the design of communication systems)...''. Consider, nonetheless, the observation that we often combine a message with other information in order to deduce new facts, thereby expanding the value of such a message. It is noteworthy that to-date, no rigorous theory of communication has been put forth which postulates the existence of deductive capabilities on the receiver's side.
The purpose of this paper is to present such a theory. We formally model such deductive capabilities using logic reasoning, and present a rigorous theory which covers the following generic scenario: Alice and Bob each have knowledge of some logic sentence, and they wish to communicate as efficiently as possible with the shared goal that, following their communication, Bob should be able to deduce a particular logic sentence that Alice knows to be true, but that Bob currently cannot prove. Many variants of this general setup are considered in this article; in all cases we are able to provide sharp upper and lower bounds. Our contribution includes the identification of the most fundamental requirements that we place on a logic and associated logical language for all of our results to apply. Practical algorithms that are in some cases asymptotically optimal are provided, and we illustrate the potential practical value of the design of communication systems that incorporate the assumption of deductive capabilities at the receiver using experimental results that suggest significant possible gains compared to classical systems. ]]></description>
<dc:subject>papers to-read information-theory logic formal-methods</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:463dce340e59/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:information-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:logic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:formal-methods"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1007/BF00212098">
    <title>Bergsonism in Russia: The case of Bakhtin | Neophilologus</title>
    <dc:date>2025-09-04T19:05:03+00:00</dc:date>
    <link>https://link.springer.com/article/10.1007/BF00212098</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[“Bergsonism in Russia: The Case of Bakhtin” examines an important question of Russian intellectual history and Bakhtin studies, namely Bergson's impact on Bakhtin, and traces similarities and differences between the two thinkers' views. Bergson's influence on the cultural and intellectual climate in Europe was profound. For a new generation of thinkers, he was a symbol of opposition to traditional philosophy, determinism, and mechanistic science. All major works by Bergson were translated into Russian by 1914 and widely read and discussed in intellectual and artistic circles. The Formalists, Osip Mandelstam, Nikolai Gumilev, Alexandr Voronskii, to name but a few Russian intellectuals, were interested in his philosophy, and so was Mikhail Bakhtin.

Although Bergson's influence on Bakhtin has been briefly mentioned in Michael Holquist's works on Bakhtin, no substantial research has appeared on this subject. This study establishes Bergson as a formative source of Bakhtin's thinking and provides a comparative analysis of their patterns of thought. Specifically, it discusses Bakhtin's and Bergson's theories of the self, their views on time and space, and their approaches to ethics. This paper not only traces Bakhtin's and Bergson's patterns of thought and Bakhtin's polemics against Bergson, it also places Bakhtin within a wider group of twentieth-century thinkers who attempted to create new approaches to knowledge.]]></description>
<dc:subject>papers to-read philosophy history_of_ideas via:_onionesque</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:f63e5d35035a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:philosophy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:history_of_ideas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:via:_onionesque"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2508.05776">
    <title>[2508.05776] Whither symbols in the era of advanced neural networks?</title>
    <dc:date>2025-08-29T14:15:06+00:00</dc:date>
    <link>https://arxiv.org/abs/2508.05776</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[Some of the strongest evidence that human minds should be thought about in terms of symbolic systems has been the way they combine ideas, produce novelty, and learn quickly. We argue that modern neural networks -- and the artificial intelligence systems built upon them -- exhibit similar abilities. This undermines the argument that the cognitive processes and representations used by human minds are symbolic, although the fact that these neural networks are typically trained on data generated by symbolic systems illustrates that such systems play an important role in characterizing the abstract problems that human minds have to solve. This argument leads us to offer a new agenda for research on the symbolic basis of human thought. ]]></description>
<dc:subject>papers to-read ai large-language-models cognitive-science</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:a55868592f48/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:large-language-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:cognitive-science"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2010.11929">
    <title>[2010.11929] An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</title>
    <dc:date>2025-08-24T17:45:28+00:00</dc:date>
    <link>https://arxiv.org/abs/2010.11929</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[While the Transformer architecture has become the de-facto standard for natural language processing tasks, its applications to computer vision remain limited. In vision, attention is either applied in conjunction with convolutional networks, or used to replace certain components of convolutional networks while keeping their overall structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring substantially fewer computational resources to train. ]]></description>
<dc:subject>papers to-read deep-learning computer-vision transformers neural-networks machine-learning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:c7da9245f8ce/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:deep-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computer-vision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:transformers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:neural-networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:machine-learning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://aeon.co/essays/the-sovereign-individual-and-the-paradox-of-the-digital-age">
    <title>The sovereign individual and the paradox of the digital age | Aeon Essays</title>
    <dc:date>2025-08-24T17:44:21+00:00</dc:date>
    <link>https://aeon.co/essays/the-sovereign-individual-and-the-paradox-of-the-digital-age</link>
    <dc:creator>mraginsky</dc:creator><dc:subject>to-read ai algorithms computation economics capital decision-making</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:31de9a14c266/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:computation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:capital"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:decision-making"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2302.10488">
    <title>[2302.10488] The informativity approach to data-driven analysis and control</title>
    <dc:date>2025-08-24T17:41:38+00:00</dc:date>
    <link>https://arxiv.org/abs/2302.10488</link>
    <dc:creator>mraginsky</dc:creator><description><![CDATA[The goal of this paper is to provide a tutorial on the so-called informativity framework for direct data-driven analysis and control. This framework achieves certified data-based analysis and control by assessing system properties and determining controllers for sets of systems unfalsified by the data. We will first introduce the informativity approach at an abstract level. Thereafter, we will report case studies where we highlight the strength of the framework in the context of various problems involving both noiseless and noisy data. In particular, we will treat controllability and stabilizability, and stabilization, linear quadratic regulation, and tracking and regulation using exact input-state measurements. Thereafter, we will treat dissipativity analysis, stabilization, and H_inf control using noisy input-state data. Finally, we will study dynamic measurement feedback stabilization using noisy input-output data. We will provide several examples to illustrate the approach. In addition, we will highlight the main tools underlying the framework, such as quadratic matrix inequalities in robust control and quadratic difference forms in behavioral systems theory. ]]></description>
<dc:subject>papers to-read control-theory behavioral-control data-driven-control</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:mraginsky/b:ca74e0e3ac7a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:control-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:behavioral-control"/>
	<rdf:li rdf:resource="https://pinboard.in/u:mraginsky/t:data-driven-control"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>