<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (cshalizi)</title>
    <link>https://pinboard.in/u:cshalizi/public/</link>
    <description>recent bookmarks from cshalizi</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://arxiv.org/abs/2512.01819"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2510.16174"/>
	<rdf:li rdf:resource="https://www.propublica.org/article/inside-ai-tool-doge-veterans-affairs-contracts-sahil-lavingia"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2408.08823"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2203.05551"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2211.13000"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2402.07999"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2301.11562"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.07801"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2301.07015"/>
	<rdf:li rdf:resource="https://www.wired.com/story/welfare-state-algorithms/"/>
	<rdf:li rdf:resource="https://jmlr.org/papers/v23/21-1427.html"/>
	<rdf:li rdf:resource="https://www.tandfonline.com/doi/full/10.1080/01621459.2021.1979010"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2112.00329"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.01295"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2205.03009"/>
	<rdf:li rdf:resource="https://www.nytimes.com/2022/02/19/technology/qanon-messages-authors.html"/>
	<rdf:li rdf:resource="https://hci.stanford.edu/publications/2019/streetlevelalgorithms/streetlevelalgorithms-chi2019.pdf"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2003.08907"/>
	<rdf:li rdf:resource="http://proceedings.mlr.press/v139/kandiros21a.html"/>
	<rdf:li rdf:resource="https://www.nature.com/articles/s41386-021-01020-7"/>
	<rdf:li rdf:resource="https://doxa.substack.com/p/phrenology-insurance-claims-and-digital?token=eyJ1c2VyX2lkIjozMTk2MjUwOSwicG9zdF9pZCI6MzcxODg4MDYsIl8iOiI5RlR3eiIsImlhdCI6MTYyMjc3NzYxOSwiZXhwIjoxNjIyNzgxMjE5LCJpc3MiOiJwdWItMjM5NjUzIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.r6AUl5BSCgCU4aMl6zL1Pt8xcC3cnNU5E7J6LDGlQbs"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2011.02407"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2105.08742"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2105.11490"/>
	<rdf:li rdf:resource="https://doi.org/10.1111/rssb.12425"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2105.07283"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2105.04648"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.12553"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-040720-022432"/>
	<rdf:li rdf:resource="https://www.sciencedirect.com/science/article/pii/S0047259X02000210"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2003.01908"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2101.11815"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1611889233"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2101.06309"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.15863"/>
	<rdf:li rdf:resource="https://www.washingtonpost.com/technology/2020/12/08/huawei-tested-ai-software-that-could-recognize-uighur-minorities-alert-police-report-says/"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1594972839"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.ejs/1601085759"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.ejs/1602900015"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2010.13993"/>
	<rdf:li rdf:resource="https://www.pnas.org/content/116/52/26459"/>
	<rdf:li rdf:resource="https://blog.piekniewski.info/2018/07/14/autopsy-dl-paper/"/>
	<rdf:li rdf:resource="https://www.nytimes.com/2020/06/24/technology/facial-recognition-arrest.html?action=click&amp;module=Top%20Stories&amp;pgtype=Homepage"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2006.03895"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1905.12516"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2001.01987"/>
	<rdf:li rdf:resource="https://www.mitpressjournals.org/doi/abs/10.1162/evco_a_00252"/>
	<rdf:li rdf:resource="https://www.jstatsoft.org/article/view/v054i02"/>
	<rdf:li rdf:resource="https://advances.sciencemag.org/content/4/1/eaao5580"/>
	<rdf:li rdf:resource="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3489440"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1911.00483"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1902.02979"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.12163"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.12756"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.11299"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.10831"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.06772"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.12475"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.12434"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.03801"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.06788"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.04791"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1503.06410"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.08973"/>
	<rdf:li rdf:resource="https://jamanetwork.com/journals/jamadermatology/article-abstract/2740808"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.06852"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.06319"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.03000"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.02591"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://arxiv.org/abs/2512.01819">
    <title>[2512.01819] Decision Tree Embedding by Leaf-Means</title>
    <dc:date>2025-12-06T14:30:34+00:00</dc:date>
    <link>https://arxiv.org/abs/2512.01819</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Decision trees and random forest remain highly competitive for classification on medium-sized, standard datasets due to their robustness, minimal preprocessing requirements, and interpretability. However, a single tree suffers from high estimation variance, while large ensembles reduce this variance at the cost of substantial computational overhead and diminished interpretability. In this paper, we propose Decision Tree Embedding (DTE), a fast and effective method that leverages the leaf partitions of a trained classification tree to construct an interpretable feature representation. By using the sample means within each leaf region as anchor points, DTE maps inputs into an embedding space defined by the tree's partition structure, effectively circumventing the high variance inherent in decision-tree splitting rules. We further introduce an ensemble extension based on additional bootstrap trees, and pair the resulting embedding with linear discriminant analysis for classification. We establish several population-level theoretical properties of DTE, including its preservation of conditional density under mild conditions and a characterization of the resulting classification error. Empirical studies on synthetic and real datasets demonstrate that DTE strikes a strong balance between accuracy and computational efficiency, outperforming or matching random forest and shallow neural networks while requiring only a fraction of their training time in most cases. Overall, the proposed DTE method can be viewed either as a scalable decision tree classifier that improves upon standard split rules, or as a neural network model whose weights are learned from tree-derived anchor points, achieving an intriguing integration of both paradigms."]]></description>
<dc:subject>to:NB decision_trees neural_networks classifiers priebe.carey_e.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a1ccf503ec38/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision_trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:priebe.carey_e."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2510.16174">
    <title>[2510.16174] COWs and their Hybrids: A Statistical View of Custom Orthogonal Weights</title>
    <dc:date>2025-10-24T19:41:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2510.16174</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A recurring challenge in high energy physics is inference of the signal component from a distribution for which observations are assumed to be a mixture of signal and background events. A standard assumption is that there exists information encoded in a discriminant variable that is effective at separating signal and background. This can be used to assign a signal weight to each event, with these weights used in subsequent analyses of one or more control variables of interest. The custom orthogonal weights (COWs) approach of Dembinski, et al.(2022), a generalization of the sPlot approach of Barlow (1987) and Pivk and Le Diberder (2005), is tailored to address this objective. The problem, and this method, present interesting and novel statistical issues. Here we formalize the assumptions needed and the statistical properties, while also considering extensions and alternative approaches."]]></description>
<dc:subject>to:NB classifiers hypothesis_testing statistics particle_physics kith_and_kin wasserman.larry</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d79263d603f5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:particle_physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.propublica.org/article/inside-ai-tool-doge-veterans-affairs-contracts-sahil-lavingia">
    <title>Inside the AI Tool Used by DOGE to Review Veterans Affairs Contracts — ProPublica</title>
    <dc:date>2025-06-15T15:50:36+00:00</dc:date>
    <link>https://www.propublica.org/article/inside-ai-tool-doge-veterans-affairs-contracts-sahil-lavingia</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>utter_stupidity us_politics large_language_models_(so_called) programming classifiers to_teach:data-mining have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8dfe8878ce5a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:us_politics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:large_language_models_(so_called)"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2408.08823">
    <title>[2408.08823] Optimal Symmetries in Binary Classification</title>
    <dc:date>2024-12-11T19:53:45+00:00</dc:date>
    <link>https://arxiv.org/abs/2408.08823</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We explore the role of group symmetries in binary classification tasks, presenting a novel framework that leverages the principles of Neyman-Pearson optimality. Contrary to the common intuition that larger symmetry groups lead to improved classification performance, our findings show that selecting the appropriate group symmetries is crucial for optimising generalisation and sample efficiency. We develop a theoretical foundation for designing group equivariant neural networks that align the choice of symmetries with the underlying probability distributions of the data. Our approach provides a unified methodology for improving classification accuracy across a broad range of applications by carefully tailoring the symmetry group to the specific characteristics of the problem. Theoretical analysis and experimental results demonstrate that optimal classification performance is not always associated with the largest equivariant groups possible in the domain, even when the likelihood ratio is invariant under one of its proper subgroups, but rather with those subgroups themselves. This work offers insights and practical guidelines for constructing more effective group equivariant architectures in diverse machine-learning contexts."]]></description>
<dc:subject>to:NB symmetry neural_networks classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:11063aae71d2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:symmetry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2203.05551">
    <title>[2203.05551] Cellular automata can classify data by inducing trajectory phase coexistence</title>
    <dc:date>2024-12-11T15:53:04+00:00</dc:date>
    <link>https://arxiv.org/abs/2203.05551</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We show that cellular automata can classify data by inducing a form of dynamical phase coexistence. We use Monte Carlo methods to search for general two-dimensional deterministic automata that classify images on the basis of activity, the number of state changes that occur in a trajectory initiated from the image. When the number of timesteps of the automaton is a trainable parameter, the search scheme identifies automata that generate a population of dynamical trajectories displaying high or low activity, depending on initial conditions. Automata of this nature behave as nonlinear activation functions with an output that is effectively binary, resembling an emergent version of a spiking neuron."]]></description>
<dc:subject>to:NB classifiers cellular_automata</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c120322757d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cellular_automata"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2211.13000">
    <title>[2211.13000] A Network Classification Method based on Density Time Evolution Patterns Extracted from Network Automata</title>
    <dc:date>2024-07-23T14:56:27+00:00</dc:date>
    <link>https://arxiv.org/abs/2211.13000</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Network modeling has proven to be an efficient tool for many interdisciplinary areas, including social, biological, transport, and many other real world complex systems. In addition, cellular automata (CA) are a formalism that has been studied in the last decades as a model for exploring patterns in the dynamic spatio-temporal behavior of these systems based on local rules. Some studies explore the use of cellular automata to analyze the dynamic behavior of networks, denominating them as network automata (NA). Recently, NA proved to be efficient for network classification, since it uses a time-evolution pattern (TEP) for the feature extraction. However, the TEPs explored by previous studies are composed of binary values, which does not represent detailed information on the network analyzed. Therefore, in this paper, we propose alternate sources of information to use as descriptor for the classification task, which we denominate as density time-evolution pattern (D-TEP) and state density time-evolution pattern (SD-TEP). We explore the density of alive neighbors of each node, which is a continuous value, and compute feature vectors based on histograms of the TEPs. Our results show a significant improvement compared to previous studies at five synthetic network databases and also seven real world databases. Our proposed method demonstrates not only a good approach for pattern recognition in networks, but also shows great potential for other kinds of data, such as images."]]></description>
<dc:subject>to:NB to_read network_data_analysis cellular_automata classifiers via:vaguery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c05ac37b45b9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cellular_automata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:vaguery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2402.07999">
    <title>[2402.07999] NetInfoF Framework: Measuring and Exploiting Network Usable Information</title>
    <dc:date>2024-03-12T01:33:46+00:00</dc:date>
    <link>https://arxiv.org/abs/2402.07999</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Given a node-attributed graph, and a graph task (link prediction or node classification), can we tell if a graph neural network (GNN) will perform well? More specifically, do the graph structure and the node features carry enough usable information for the task? Our goals are (1) to develop a fast tool to measure how much information is in the graph structure and in the node features, and (2) to exploit the information to solve the task, if there is enough. We propose NetInfoF, a framework including NetInfoF_Probe and NetInfoF_Act, for the measurement and the exploitation of network usable information (NUI), respectively. Given a graph data, NetInfoF_Probe measures NUI without any model training, and NetInfoF_Act solves link prediction and node classification, while two modules share the same backbone. In summary, NetInfoF has following notable advantages: (a) General, handling both link prediction and node classification; (b) Principled, with theoretical guarantee and closed-form solution; (c) Effective, thanks to the proposed adjustment to node similarity; (d) Scalable, scaling linearly with the input size. In our carefully designed synthetic datasets, NetInfoF correctly identifies the ground truth of NUI and is the only method being robust to all graph scenarios. Applied on real-world datasets, NetInfoF wins in 11 out of 12 times on link prediction compared to general GNN baselines."]]></description>
<dc:subject>to:NB network_data_analysis classifiers entropy_estimation information_theory faloutsos.christos</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d6e3be34531e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:entropy_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:faloutsos.christos"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2301.11562">
    <title>[2301.11562] Is My Prediction Arbitrary? The Confounding Effects of Variance in Fair Classification Benchmarks</title>
    <dc:date>2023-09-15T19:39:01+00:00</dc:date>
    <link>https://arxiv.org/abs/2301.11562</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Variance in predictions across different trained models is a significant, under-explored source of error in fair classification. In practice, the variance on some data examples is so large that decisions can be effectively arbitrary. To investigate this problem, we take an experimental approach and make four overarching contributions: We 1) Define a metric called self-consistency, derived from variance, which we use as a proxy for measuring and reducing arbitrariness; 2) Develop an ensembling algorithm that abstains from classification when a prediction would be arbitrary; 3) Conduct the largest to-date empirical study of the role of variance (vis-a-vis self-consistency and arbitrariness) in fair classification; and, 4) Release a toolkit that makes the US Home Mortgage Disclosure Act (HMDA) datasets easily usable for future research. Altogether, our experiments reveal shocking insights about the reliability of conclusions on benchmark datasets. Most fairness classification benchmarks are close-to-fair when taking into account the amount of arbitrariness present in predictions -- before we even try to apply common fairness interventions. This finding calls into question the practical utility of common algorithmic fairness methods, and in turn suggests that we should fundamentally reconsider how we choose to measure fairness in machine learning."

--- "Variance" here is defined slightly non-standardly, as E[loss(Y_1, Y_2)] where Y_1 and Y_2 are (distinct) draws from the distribution.  (If loss is squared error, this comes out to twice the usual definition of variance.)  "Self-consistency" is just the probability that two models, bootstrapped from the same data set, give the same classification for a given individual.]]></description>
<dc:subject>algorithmic_fairness via:rvenkat classifiers have_read ensemble_methods uncertainty_for_neural_networks in_NB to_teach:data-mining</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:bec8057ed430/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:rvenkat"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:uncertainty_for_neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.07801">
    <title>[2206.07801] Beyond Adult and COMPAS: Fairness in Multi-Class Prediction</title>
    <dc:date>2023-06-28T16:26:36+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.07801</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the problem of producing fair probabilistic classifiers for multi-class classification tasks. We formulate this problem in terms of "projecting" a pre-trained (and potentially unfair) classifier onto the set of models that satisfy target group-fairness requirements. The new, projected model is given by post-processing the outputs of the pre-trained classifier by a multiplicative factor. We provide a parallelizable iterative algorithm for computing the projected classifier and derive both sample complexity and convergence guarantees. Comprehensive numerical comparisons with state-of-the-art benchmarks demonstrate that our approach maintains competitive performance in terms of accuracy-fairness trade-off curves, while achieving favorable runtime on large datasets. We also evaluate our method at scale on an open dataset with multiple classes, multiple intersectional protected groups, and over 1M samples."]]></description>
<dc:subject>in_NB classifiers algorithmic_fairness</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1ea2ba74e86b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2301.07015">
    <title>[2301.07015] Simplistic Collection and Labeling Practices Limit the Utility of Benchmark Datasets for Twitter Bot Detection</title>
    <dc:date>2023-05-01T20:37:24+00:00</dc:date>
    <link>https://arxiv.org/abs/2301.07015</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Accurate bot detection is necessary for the safety and integrity of online platforms. It is also crucial for research on the influence of bots in elections, the spread of misinformation, and financial market manipulation. Platforms deploy infrastructure to flag or remove automated accounts, but their tools and data are not publicly available. Thus, the public must rely on third-party bot detection. These tools employ machine learning and often achieve near perfect performance for classification on existing datasets, suggesting bot detection is accurate, reliable and fit for use in downstream applications. We provide evidence that this is not the case and show that high performance is attributable to limitations in dataset collection and labeling rather than sophistication of the tools. Specifically, we show that simple decision rules -- shallow decision trees trained on a small number of features -- achieve near-state-of-the-art performance on most available datasets and that bot detection datasets, even when combined together, do not generalize well to out-of-sample datasets. Our findings reveal that predictions are highly dependent on each dataset's collection and labeling procedures rather than fundamental differences between bots and humans. These results have important implications for both transparency in sampling and labeling procedures and potential biases in research using existing bot detection tools for pre-processing."]]></description>
<dc:subject>to:NB classifiers networked_life deceiving_us_has_become_an_industrial_process decision_trees to_teach:data-mining philip_k_dick_and_the_fake_humans_rules_everything_around_me</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:26709234aea1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:networked_life"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:deceiving_us_has_become_an_industrial_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision_trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:philip_k_dick_and_the_fake_humans_rules_everything_around_me"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.wired.com/story/welfare-state-algorithms/">
    <title>Inside the Suspicion Machine | WIRED</title>
    <dc:date>2023-03-21T15:43:38+00:00</dc:date>
    <link>https://www.wired.com/story/welfare-state-algorithms/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[--- Last tag for the underlying analysis.
--- The bit about coding _any_ comment from the social worker as a flag for trouble is mind-blowing, and not in a good way.]]></description>
<dc:subject>classifiers risk_assessment welfare_state algorithmic_fairness have_read to_teach:data-mining track_down_references bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:102b2cb016b2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:risk_assessment"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:welfare_state"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://jmlr.org/papers/v23/21-1427.html">
    <title>Inherent Tradeoffs in Learning Fair Representations</title>
    <dc:date>2022-07-19T14:03:30+00:00</dc:date>
    <link>https://jmlr.org/papers/v23/21-1427.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Real-world applications of machine learning tools in high-stakes domains are often regulated to be fair, in the sense that the predicted target should satisfy some quantitative notion of parity with respect to a protected attribute. However, the exact tradeoff between fairness and accuracy is not entirely clear, even for the basic paradigm of classification problems. In this paper, we characterize an inherent tradeoff between statistical parity and accuracy in the classification setting by providing a lower bound on the sum of group-wise errors of any fair classifiers. Our impossibility theorem could be interpreted as a certain uncertainty principle in fairness: if the base rates differ among groups, then any fair classifier satisfying statistical parity has to incur a large error on at least one of the groups. We further extend this result to give a lower bound on the joint error of any (approximately) fair classifiers, from the perspective of learning fair representations. To show that our lower bound is tight, assuming oracle access to Bayes (potentially unfair) classifiers, we also construct an algorithm that returns a randomized classifier which is both optimal (in terms of accuracy) and fair. Interestingly, when the protected attribute can take more than two values, an extension of this lower bound does not admit an analytic solution. Nevertheless, in this case, we show that the lower bound can be efficiently computed by solving a linear program, which we term as the TV-Barycenter problem, a barycenter problem under the TV-distance. On the upside, we prove that if the group-wise Bayes optimal classifiers are close, then learning fair representations leads to an alternative notion of fairness, known as the accuracy parity, which states that the error rates are close between groups. Finally, we also conduct experiments on real-world datasets to confirm our theoretical findings."

--- I am sure this is not _just_ Chouldechova (2016), because Geoff wouldn't do that.
(Also, to keep repeating a point, suppose your sibling or your spouse was having their fate determined by a _randomized_ classifier, with the judge [or loan officer, etc.] rolling the d20 in front of you so there's no hiding what's going on.  Would you really think they'd been treated _fairly_?!?)]]></description>
<dc:subject>in_NB algorithmic_fairness classifiers gordon.geoffrey to_teach:data-mining to_teach:statistics_of_inequality_and_discrimination</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8e8eb5d2fc4d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:gordon.geoffrey"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.tandfonline.com/doi/full/10.1080/01621459.2021.1979010">
    <title>Is a Classification Procedure Good Enough?—A Goodness-of-Fit Assessment Tool for Classification Learning: Journal of the American Statistical Association: Vol 0, No 0</title>
    <dc:date>2022-06-11T04:55:24+00:00</dc:date>
    <link>https://www.tandfonline.com/doi/full/10.1080/01621459.2021.1979010</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In recent years, many nontraditional classification methods, such as random forest, boosting, and neural network, have been widely used in applications. Their performance is typically measured in terms of classification accuracy. While the classification error rate and the like are important, they do not address a fundamental question: Is the classification method underfitted? To our best knowledge, there is no existing method that can assess the goodness of fit of a general classification procedure. Indeed, the lack of a parametric assumption makes it challenging to construct proper tests. To overcome this difficulty, we propose a methodology called BAGofT that splits the data into a training set and a validation set. First, the classification procedure to assess is applied to the training set, which is also used to adaptively find a data grouping that reveals the most severe regions of underfitting. Then, based on this grouping, we calculate a test statistic by comparing the estimated success probabilities and the actual observed responses from the validation set. The data splitting guarantees that the size of the test is controlled under the null hypothesis, and the power of the test goes to one as the sample size increases under the alternative hypothesis. For testing parametric classification models, the BAGofT has a broader scope than the existing methods since it is not restricted to specific parametric models (e.g., logistic regression). Extensive simulation studies show the utility of the BAGofT when assessing general classification procedures and its strengths over some existing methods when testing parametric classification models."

--- I'm sure this is more than just out-of-sample calibration-checking, somehow.]]></description>
<dc:subject>to:NB classifiers goodness-of-fit</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7a692cbcec00/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2112.00329">
    <title>[2112.00329] Non-splitting Neyman-Pearson Classifiers</title>
    <dc:date>2022-06-09T08:29:57+00:00</dc:date>
    <link>https://arxiv.org/abs/2112.00329</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Neyman-Pearson (NP) binary classification paradigm constrains the more severe type of error (e.g., the type I error) under a preferred level while minimizing the other (e.g., the type II error). This paradigm is suitable for applications such as severe disease diagnosis, fraud detection, among others. A series of NP classifiers have been developed to guarantee the type I error control with high probability. However, these existing classifiers involve a sample splitting step: a mixture of class 0 and class 1 observations to construct a scoring function and some left-out class 0 observations to construct a threshold. This splitting enables classifier construction built upon independence, but it amounts to insufficient use of data for training and a potentially higher type II error. Leveraging a canonical linear discriminant analysis model, we derive a quantitative CLT for a certain functional of quadratic forms of the inverse of sample and population covariance matrices, and based on this result, develop for the first time NP classifiers without splitting the training sample. Numerical experiments have confirmed the advantages of our new non-splitting parametric strategy."]]></description>
<dc:subject>to:NB classifiers neyman-pearson_classification</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:fa5984dd2b33/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neyman-pearson_classification"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.01295">
    <title>[2206.01295] Rashomon Capacity: A Metric for Predictive Multiplicity in Probabilistic Classification</title>
    <dc:date>2022-06-09T08:28:56+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.01295</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Predictive multiplicity occurs when classification models with nearly indistinguishable average performances assign conflicting predictions to individual samples. When used for decision-making in applications of consequence (e.g., lending, education, criminal justice), models developed without regard for predictive multiplicity may result in unjustified and arbitrary decisions for specific individuals. We introduce a new measure of predictive multiplicity in probabilistic classification called Rashomon Capacity. Prior metrics for predictive multiplicity focus on classifiers that output thresholded (i.e., 0-1) predicted classes. In contrast, Rashomon Capacity applies to probabilistic classifiers, capturing more nuanced score variations for individual samples. We provide a rigorous derivation for Rashomon Capacity, argue its intuitive appeal, and demonstrate how to estimate it in practice. We show that Rashomon Capacity yields principled strategies for disclosing conflicting models to stakeholders. Our numerical experiments illustrate how Rashomon Capacity captures predictive multiplicity in various datasets and learning models, including neural networks. The tools introduced in this paper can help data scientists measure, report, and ultimately resolve predictive multiplicity prior to model deployment."

--- Ain't this just the reference class problem?]]></description>
<dc:subject>to:NB prediction classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ebd80ca97d0b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2205.03009">
    <title>[2205.03009] Watching the watchers: bias and vulnerability in remote proctoring software</title>
    <dc:date>2022-05-23T15:01:27+00:00</dc:date>
    <link>https://arxiv.org/abs/2205.03009</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Educators are rapidly switching to remote proctoring and examination software for their testing needs, both due to the COVID-19 pandemic and the expanding virtualization of the education sector. State boards are increasingly utilizing these software for high stakes legal and medical licensing exams. Three key concerns arise with the use of these complex software: exam integrity, exam procedural fairness, and exam-taker security and privacy. We conduct the first technical analysis of each of these concerns through a case study of four primary proctoring suites used in U.S. law school and state attorney licensing exams. We reverse engineer these proctoring suites and find that despite promises of high-security, all their anti-cheating measures can be trivially bypassed and can pose significant user security risks. We evaluate current facial recognition classifiers alongside the classifier used by Examplify, the legal exam proctoring suite with the largest market share, to ascertain their accuracy and determine whether faces with certain skin tones are more readily flagged for cheating. Finally, we offer recommendations to improve the integrity and fairness of the remotely proctored exam experience."

--- As yorksranter says, the fact that in some conditions all of these give error rates above 20% for all groups says that the big problem here isn't _unfairness_, it's _not working well enough to be reliable for anyone_.]]></description>
<dc:subject>to:NB pattern_recognition classifiers algorithmic_fairness to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:697655fc032e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:pattern_recognition"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nytimes.com/2022/02/19/technology/qanon-messages-authors.html">
    <title>Who Is Behind QAnon? Linguistic Detectives Find Fingerprints - The New York Times</title>
    <dc:date>2022-02-27T03:28:42+00:00</dc:date>
    <link>https://www.nytimes.com/2022/02/19/technology/qanon-messages-authors.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Last tag is probably asking for trouble.]]></description>
<dc:subject>qanon conspiracy_theories psychoceramics stylometrics text_mining natural_language_processing classifiers to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cf3f4ef95f36/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:qanon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:conspiracy_theories"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychoceramics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stylometrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:natural_language_processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://hci.stanford.edu/publications/2019/streetlevelalgorithms/streetlevelalgorithms-chi2019.pdf">
    <title>Street–Level Algorithms: A Theory at the Gaps Between Policy and Decisions</title>
    <dc:date>2022-02-26T19:03:55+00:00</dc:date>
    <link>https://hci.stanford.edu/publications/2019/streetlevelalgorithms/streetlevelalgorithms-chi2019.pdf</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Errors and biases are earning algorithms increasingly malignant reputations in society. A central challenge is that
algorithms must bridge the gap between high–level policy
and on–the–ground decisions, making inferences in novel
situations where the policy or training data do not readily
apply. In this paper, we draw on the theory of street–level
bureaucracies, how human bureaucrats such as police and
judges interpret policy to make on–the–ground decisions.
We present by analogy a theory of street–level algorithms,
the algorithms that bridge the gaps between policy and decisions about people in a socio-technical system. We argue that
unlike street–level bureaucrats, who reflexively refine their
decision criteria as they reason through a novel situation,
street–level algorithms at best rene their criteria only after
the decision is made. This loop–and–a–half delay results in
illogical decisions when handling new or extenuating circumstances. This theory suggests designs for street–level
algorithms that draw on historical design patterns for street–
level bureaucracies, including mechanisms for self–policing
and recourse in the case of error."

--- ETA after reading: I like the framing, mostly, and I appreciate the point about "reflexively" considering the decision boundary before making a decision vs. at best updating with feedback after the fact.  But even then, what humans really do, _sometimes_, is alter the rule based on what they _imagine_ the consequences of a decision would be, in light of what they _conceive_ the purpose of the behavior to be.  I emphasize the subjective, mental terms, because matching this, or coming close, is perilously close to AI-complete.

Further unfair complaints:
- A persistent conflation of "marginal", in the sense of lower-status social categories, with "marginal", in the sense of "marginal case", i.e., one near a decision boundary.
- Somewhat uncritical presentations of the case studies:
    + No references are given for the algorithmic reasons why videos with "transgender" in the title flipped YouTube's de-monetization switch.  It _could_ be that those algorithms somehow encodeed "gender = sex (noun) = sex (verb)".  But, well, there's a hell of a lot of TG porn online (proof: omitted), and while this would still be a kind of stupidity on the part of the YouTube algorithm, and indeed an instance of data-set shift, it'd be one with a rather different valence.  (If this conjecture is even close to right, people making videos about dealing with the trauma of incest were probably also de-monetized.)
    + Similarly, in disputes about crowd-sourcing, they very plainly take the side of the workers, not those paying for work.  Now my bias, too, is to always side with the workers, but if you're really going to do critical social science, you need to at least peer into that bias.  (It's quite possible that existing practices are bad for both sides of the worker-employer divide!)
- I do like the idea that when people ask for recourse, the system should provide the representations (they say "embeddings", but I forgive them) for similar cases.  I'd amend this to a selection of the most similar cases with the same outcome as the one being appealed, and the most similar cases with different outcomes.  ("Similar" needs specification here, yes.)  However, the presentation of the appeal/recourse process seems to presume that recourse will be granted --- it would instead make more sense to allow for the possibility that the system got it right the first time, and whatever the complainant alleges as distinguishing special features of their case are _properly_ ignored.]]></description>
<dc:subject>via:henry_farrell classifiers algorithmic_fairness to_teach:data-mining decision-making bureaucracy have_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4aa7f6ce05aa/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:henry_farrell"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision-making"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bureaucracy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2003.08907">
    <title>[2003.08907] Overinterpretation reveals image classification model pathologies</title>
    <dc:date>2021-12-22T18:10:28+00:00</dc:date>
    <link>https://arxiv.org/abs/2003.08907</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Image classifiers are typically scored on their test set accuracy, but high accuracy can mask a subtle type of model failure. We find that high scoring convolutional neural networks (CNNs) on popular benchmarks exhibit troubling pathologies that allow them to display high accuracy even in the absence of semantically salient features. When a model provides a high-confidence decision without salient supporting input features, we say the classifier has overinterpreted its input, finding too much class-evidence in patterns that appear nonsensical to humans. Here, we demonstrate that neural networks trained on CIFAR-10 and ImageNet suffer from overinterpretation, and we find models on CIFAR-10 make confident predictions even when 95% of input images are masked and humans cannot discern salient features in the remaining pixel-subsets. We introduce Batched Gradient SIS, a new method for discovering sufficient input subsets for complex datasets, and use this method to show the sufficiency of border pixels in ImageNet for training and testing. Although these patterns portend potential model fragility in real-world deployment, they are in fact valid statistical patterns of the benchmark that alone suffice to attain high test accuracy. Unlike adversarial examples, overinterpretation relies upon unmodified image pixels. We find ensembling and input dropout can each help mitigate overinterpretation."

--- ImageNet can work from just the _border_?!?]]></description>
<dc:subject>classifiers your_favorite_deep_neural_network_sucks via:? adversarial_examples have_skimmed in_NB have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:628357aee15e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:your_favorite_deep_neural_network_sucks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:adversarial_examples"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://proceedings.mlr.press/v139/kandiros21a.html">
    <title>Statistical Estimation from Dependent Data</title>
    <dc:date>2021-07-11T16:44:59+00:00</dc:date>
    <link>http://proceedings.mlr.press/v139/kandiros21a.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider a general statistical estimation problem wherein binary labels across different observations are not independent conditioning on their feature vectors, but dependent, capturing settings where e.g. these observations are collected on a spatial domain, a temporal domain, or a social network, which induce dependencies. We model these dependencies in the language of Markov Random Fields and, importantly, allow these dependencies to be substantial, i.e. do not assume that the Markov Random Field capturing these dependencies is in high temperature. As our main contribution we provide algorithms and statistically efficient estimation rates for this model, giving several instantiations of our bounds in logistic regression, sparse logistic regression, and neural network regression settings with dependent data. Our estimation guarantees follow from novel results for estimating the parameters (i.e. external fields and interaction strengths) of Ising models from a single sample."]]></description>
<dc:subject>to:NB learning_theory random_fields classifiers dependent_learning of_course_its_really_a_spin_glass statistics learning_under_dependence</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a1d41909f983/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:learning_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_fields"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependent_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:of_course_its_really_a_spin_glass"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:learning_under_dependence"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nature.com/articles/s41386-021-01020-7">
    <title>Systematic misestimation of machine learning performance in neuroimaging studies of depression | Neuropsychopharmacology</title>
    <dc:date>2021-06-11T18:03:12+00:00</dc:date>
    <link>https://www.nature.com/articles/s41386-021-01020-7</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We currently observe a disconcerting phenomenon in machine learning studies in psychiatry: While we would expect larger samples to yield better results due to the availability of more data, larger machine learning studies consistently show much weaker performance than the numerous small-scale studies. Here, we systematically investigated this effect focusing on one of the most heavily studied questions in the field, namely the classification of patients suffering from Major Depressive Disorder (MDD) and healthy controls based on neuroimaging data. Drawing upon structural MRI data from a balanced sample of N = 1868 MDD patients and healthy controls from our recent international Predictive Analytics Competition (PAC), we first trained and tested a classification model on the full dataset which yielded an accuracy of 61%. Next, we mimicked the process by which researchers would draw samples of various sizes (N = 4 to N = 150) from the population and showed a strong risk of misestimation. Specifically, for small sample sizes (N = 20), we observe accuracies of up to 95%. For medium sample sizes (N = 100) accuracies up to 75% were found. Importantly, further investigation showed that sufficiently large test sets effectively protect against performance misestimation whereas larger datasets per se do not. While these results question the validity of a substantial part of the current literature, we outline the relatively low-cost remedy of larger test sets, which is readily available in most cases."

--- I haven't read the paper yet so there might be alternative explanations, but I can't help noting that this is 100% consistent with the most cynical possible interpretation of [http://bactra.org/weblog/698.html].]]></description>
<dc:subject>to:NB neural_data_analysis statistics classifiers to_teach:data-mining re:neutral_model_of_inquiry data_mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a31d8ee83a33/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:neutral_model_of_inquiry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doxa.substack.com/p/phrenology-insurance-claims-and-digital?token=eyJ1c2VyX2lkIjozMTk2MjUwOSwicG9zdF9pZCI6MzcxODg4MDYsIl8iOiI5RlR3eiIsImlhdCI6MTYyMjc3NzYxOSwiZXhwIjoxNjIyNzgxMjE5LCJpc3MiOiJwdWItMjM5NjUzIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.r6AUl5BSCgCU4aMl6zL1Pt8xcC3cnNU5E7J6LDGlQbs">
    <title>Phrenology, insurance claims, and digital gaydar - doxa</title>
    <dc:date>2021-06-04T03:34:23+00:00</dc:date>
    <link>https://doxa.substack.com/p/phrenology-insurance-claims-and-digital?token=eyJ1c2VyX2lkIjozMTk2MjUwOSwicG9zdF9pZCI6MzcxODg4MDYsIl8iOiI5RlR3eiIsImlhdCI6MTYyMjc3NzYxOSwiZXhwIjoxNjIyNzgxMjE5LCJpc3MiOiJwdWItMjM5NjUzIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.r6AUl5BSCgCU4aMl6zL1Pt8xcC3cnNU5E7J6LDGlQbs</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>classifiers algorithmic_fairness to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:903e4f804855/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2011.02407">
    <title>[2011.02407] Debiasing classifiers: is reality at variance with expectation?</title>
    <dc:date>2021-06-01T13:36:59+00:00</dc:date>
    <link>https://arxiv.org/abs/2011.02407</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present an empirical study of debiasing methods for classifiers, showing that debiasers often fail in practice to generalize out-of-sample, and can in fact make fairness worse rather than better. A rigorous evaluation of the debiasing treatment effect requires extensive cross-validation beyond what is usually done. We demonstrate that this phenomenon can be explained as a consequence of bias-variance trade-off, with an increase in variance necessitated by imposing a fairness constraint. Follow-up experiments validate the theoretical prediction that the estimation variance depends strongly on the base rates of the protected class. Considering fairness--performance trade-offs justifies the counterintuitive notion that partial debiasing can actually yield better results in practice on out-of-sample data."]]></description>
<dc:subject>classifiers algorithmic_fairness in_NB statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ed5dccd4c10d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2105.08742">
    <title>[2105.08742] Uncertainty Aware Learning for High Energy Physics</title>
    <dc:date>2021-05-30T20:45:29+00:00</dc:date>
    <link>https://arxiv.org/abs/2105.08742</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Machine learning techniques are becoming an integral component of data analysis in High Energy Physics (HEP). These tools provide a significant improvement in sensitivity over traditional analyses by exploiting subtle patterns in high-dimensional feature spaces. These subtle patterns may not be well-modeled by the simulations used for training machine learning methods, resulting in an enhanced sensitivity to systematic uncertainties.
"Contrary to the traditional wisdom of constructing an analysis strategy that is invariant to systematic uncertainties, we study the use of a classifier that is fully aware of uncertainties and their corresponding nuisance parameters. We show that this dependence can actually enhance the sensitivity to parameters of interest. Studies are performed using a synthetic Gaussian dataset as well as a more realistic HEP dataset based on Higgs boson decays to tau leptons. For both cases, we show that the uncertainty aware approach can achieve a better sensitivity than alternative machine learning strategies."]]></description>
<dc:subject>to:NB classifiers statistics particle_physics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:41957b05e9e8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:particle_physics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2105.11490">
    <title>[2105.11490] Hidden Markov and semi-Markov models: When and why are these models useful to classify states in time series data?</title>
    <dc:date>2021-05-26T18:30:10+00:00</dc:date>
    <link>https://arxiv.org/abs/2105.11490</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Hidden Markov models (HMMs) and their extensions have proven to be powerful tools for classification of observations that stem from systems with temporal dependence as they take into account that observations close in time to one another are likely generated from the same state (i.e. class). In this paper, we provide details for the implementation of four models for classification in a supervised learning context: HMMs, hidden semi-Markov models (HSMMs), autoregressive-HMMs and autoregressive-HSMMs. Using simulations, we study the classification performance under various degrees of model misspecification to characterize when it would be important to extend a basic HMM to an HSMM. As an application of these techniques we use the models to classify accelerometer data from Merino sheep to distinguish between four different behaviors of interest. In particular in the field of movement ecology, collection of fine-scale animal movement data over time to identify behavioral states has become ubiquitous, necessitating models that can account for the dependence structure in the data. We demonstrate that when the aim is to conduct classification, various degrees of model misspecification of the proposed model may not impede good classification performance unless there is high overlap between the state-dependent distributions."]]></description>
<dc:subject>to:NB state-space_models time_series classifiers state_estimation misspecification to_teach:data_over_space_and_time statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:14bc8be5c08a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state-space_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:misspecification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1111/rssb.12425">
    <title>AMF: Aggregated Mondrian forests for online learning - Mourtada - - Journal of the Royal Statistical Society: Series B (Statistical Methodology) - Wiley Online Library</title>
    <dc:date>2021-05-20T13:53:05+00:00</dc:date>
    <link>https://doi.org/10.1111/rssb.12425</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Random forest (RF) is one of the algorithms of choice in many supervised learning applications, be it classification or regression. The appeal of such tree-ensemble methods comes from a combination of several characteristics: a remarkable accuracy in a variety of tasks, a small number of parameters to tune, robustness with respect to features scaling, a reasonable computational cost for training and prediction, and their suitability in high-dimensional settings. The most commonly used RF variants, however, are ‘offline’ algorithms, which require the availability of the whole dataset at once. In this paper, we introduce AMF, an online RF algorithm based on Mondrian Forests. Using a variant of the context tree weighting algorithm, we show that it is possible to efficiently perform an exact aggregation over all prunings of the trees; in particular, this enables to obtain a truly online parameter-free algorithm which is competitive with the optimal pruning of the Mondrian tree, and thus adaptive to the unknown regularity of the regression function. Numerical experiments show that AMF is competitive with respect to several strong baselines on a large number of datasets for multi-class classification."]]></description>
<dc:subject>to:NB to_read ensemble_methods random_forests regression classifiers to_teach:data-mining online_learning statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:42bd56d40bd2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_forests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:online_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2105.07283">
    <title>[2105.07283] Calibrating sufficiently</title>
    <dc:date>2021-05-18T14:11:08+00:00</dc:date>
    <link>https://arxiv.org/abs/2105.07283</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["When probabilistic classifiers are trained and calibrated, the so-called grouping loss component of the calibration loss can easily be overlooked. Grouping loss refers to the gap between observable information and information actually exploited in the calibration exercise. We investigate the relation between grouping loss and the concept of sufficiency, identifying comonotonicity as a useful criterion for sufficiency. We revisit the probing reduction approach of Langford & Zadrozny (2005) and find that it produces an estimator of probabilistic classifiers that reduces information loss. Finally, we discuss Brier curves as tools to support training and `sufficient' calibration of probabilistic classifiers."

--- Pre-written commentary incorporated by reference: [http://bactra.org/notebooks/prediction-process.html] and [https://arxiv.org/abs/nlin/0006025]]]></description>
<dc:subject>to:NB classifiers prediction calibration sufficiency information_theory statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:fe18352f7b8e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:calibration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sufficiency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2105.04648">
    <title>[2105.04648] Joint Fairness Model with Applications to Risk Predictions for Under-represented Populations</title>
    <dc:date>2021-05-12T18:29:05+00:00</dc:date>
    <link>https://arxiv.org/abs/2105.04648</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Under-representation of certain populations, based on gender, race/ethnicity, and age, in data collection for predictive modeling may yield less-accurate predictions for the under-represented groups. Recently, this issue of fairness in predictions has attracted significant attention, as data-driven models are increasingly utilized to perform crucial decision-making tasks. Methods to achieve fairness in the machine learning literature typically build a single prediction model subject to some fairness criteria in a manner that encourages fair prediction performances for all groups. These approaches have two major limitations: i) fairness is often achieved by compromising accuracy for some groups; ii) the underlying relationship between dependent and independent variables may not be the same across groups. We propose a Joint Fairness Model (JFM) approach for binary outcomes that estimates group-specific classifiers using a joint modeling objective function that incorporates fairness criteria for prediction. We introduce an Accelerated Smoothing Proximal Gradient Algorithm to solve the convex objective function, and demonstrate the properties of the proposed JFM estimates. Next, we presented the key asymptotic properties for the JFM parameter estimates. We examined the efficacy of the JFM approach in achieving prediction performances and parities, in comparison with the Single Fairness Model, group-separate model, and group-ignorant model through extensive simulations. Finally, we demonstrated the utility of the JFM method in the motivating example to obtain fair risk predictions for under-represented older patients diagnosed with coronavirus disease 2019 (COVID-19)."]]></description>
<dc:subject>prediction classifiers algorithmic_fairness smyth.padhraic to_teach:data-mining in_NB re:codename:one_law_for_the_lion_and_ox_is_oppression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9778e5e6bcab/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:smyth.padhraic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:codename:one_law_for_the_lion_and_ox_is_oppression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.12553">
    <title>[2104.12553] Avoiding bias when inferring race using name-based approaches</title>
    <dc:date>2021-05-06T13:24:01+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.12553</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Racial disparity in academia is a widely acknowledged problem. The quantitative understanding of racial-based systemic inequalities is an important step towards a more equitable research system. However, few large-scale analyses have been performed on this topic, mostly because of the lack of robust race-disambiguation algorithms. Identifying author information does not generally include the author's race. Therefore, an algorithm needs to be employed, using known information about authors, i.e., their names, to infer their perceived race. Nevertheless, as any other algorithm, the process of racial inference can generate biases if it is not carefully considered. When the research is focused on the understanding of racial-based inequalities, such biases undermine the objectives of the investigation and may perpetuate inequities. The goal of this article is to assess the biases introduced by the different approaches used name-based racial inference. We use information from US census and mortgage applications to infer the race of US author names in the Web of Science. We estimate the effects of using given and family names, thresholds or continuous distributions, and imputation. Our results demonstrate that the validity of name-based inference varies by race and ethnicity and that threshold approaches underestimate Black authors and overestimate White authors. We conclude with recommendations to avoid potential biases. This article fills an important research gap that will allow more systematic and unbiased studies on racial disparity in science."

--- This seems like an elaborate re-discovery of the fact that, for obvious historical reasons lots of African Americans have names like WASPs or Irish-Americans (to name only prominent intellectuals: "Chloe  Morrison", "Henry Gates", "John McWhorter",  "David Blackwell", etc.).  Also, if you didn't know the context but heard that someone was insisting "bibliographic databases do not classify scientists by race, so we need to create files giving the racial classification of everyone!", would you be astonished if they were creepy "race realist" pseudo-scientists of the Pioneer Fund or "human biodiversity" ilk?  I get why it's got a beneficent purpose, the French attitude of refusing to gather data on racial inequalities in the hope that that will make them go away still doesn't make sense to me, but sheesh.]]></description>
<dc:subject>to:NB text_mining classifiers sociology_of_science the_american_dilemma to_teach:statistics_of_inequality_and_discrimination color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c0dddef66827/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology_of_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:the_american_dilemma"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-040720-022432">
    <title>Statistical Evaluation of Medical Tests | Annual Review of Statistics and Its Application</title>
    <dc:date>2021-04-14T22:24:48+00:00</dc:date>
    <link>https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-040720-022432</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>to:NB to_teach:data-mining classifiers medicine statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7109aea5926e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sciencedirect.com/science/article/pii/S0047259X02000210">
    <title>Results in statistical discriminant analysis: a review of the former Soviet Union literature - ScienceDirect</title>
    <dc:date>2021-03-10T15:49:35+00:00</dc:date>
    <link>https://www.sciencedirect.com/science/article/pii/S0047259X02000210</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[This looks very interesting, and makes me want to go back to the notes/materials from my first pattern recognition/ML course as a graduate student at Wisconsin, taught by a Russian mechanical engineering professor (V. J. Lumelsky, Ph.D. 1970, Moscow Institute of Control Sciences).]]></description>
<dc:subject>to:NB history_of_statistics ussr via:rvenkat classifiers learning_theory to_teach:childs_garden_of_statistical_learning_theory re:paradigm_formation_in_statistical_learning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e51a3efdacdf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:history_of_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ussr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:rvenkat"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:learning_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:childs_garden_of_statistical_learning_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:paradigm_formation_in_statistical_learning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2003.01908">
    <title>[2003.01908] Denoised Smoothing: A Provable Defense for Pretrained Classifiers</title>
    <dc:date>2021-02-11T22:41:03+00:00</dc:date>
    <link>https://arxiv.org/abs/2003.01908</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present a method for provably defending any pretrained image classifier against ℓp adversarial attacks. This method, for instance, allows public vision API providers and users to seamlessly convert pretrained non-robust classification services into provably robust ones. By prepending a custom-trained denoiser to any off-the-shelf image classifier and using randomized smoothing, we effectively create a new classifier that is guaranteed to be ℓp-robust to adversarial examples, without modifying the pretrained classifier. Our approach applies to both the white-box and the black-box settings of the pretrained classifier. We refer to this defense as denoised smoothing, and we demonstrate its effectiveness through extensive experimentation on ImageNet and CIFAR-10. Finally, we use our approach to provably defend the Azure, Google, AWS, and ClarifAI image classification APIs."

--- From a quick scan, worth reading carefully.  It looks like the idea is roughly as follows.  Start with a correctly classified image x, so m(x) = c(x) where m() is classifier output and c() is true class.  (Let's assume true class _is_ a function of the image for now.)  An adversarial example would be a _small_ perturbation a such that m(x+a) = d \neq c(x).  But the adversarial perturbations aren't just small, they're a very particular set, so if we add random noise R we typically get kicked back out of the adversarial set and back in to the pre-image of c(x), thus m(x+a+R) = c(x) with high probability.  So it's somehow relying on adversarial perturbations being atypical; maybe not topological "meagre" in the strict sense, but presumably also not a generic set.  So for this to work must tell us something about the geometry of the decision boundaries, but I'm not smart enough to say what.  Again, to be studied.]]></description>
<dc:subject>classifiers adversarial_examples neural_networks kolter.j._zico to_read via:? in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:23b656d669b5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:adversarial_examples"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kolter.j._zico"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2101.11815">
    <title>[2101.11815] Interpolating Classifiers Make Few Mistakes</title>
    <dc:date>2021-02-04T15:33:47+00:00</dc:date>
    <link>https://arxiv.org/abs/2101.11815</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper provides elementary analyses of the regret and generalization of minimum-norm interpolating classifiers (MNIC). The MNIC is the function of smallest Reproducing Kernel Hilbert Space norm that perfectly interpolates a label pattern on a finite data set. We derive a mistake bound for MNIC and a regularized variant that holds for all data sets. This bound follows from elementary properties of matrix inverses. Under the assumption that the data is independently and identically distributed, the mistake bound implies that MNIC generalizes at a rate proportional to the norm of the interpolating solution and inversely proportional to the number of data points. This rate matches similar rates derived for margin classifiers and perceptrons. We derive several plausible generative models where the norm of the interpolating classifier is bounded or grows at a rate sublinear in n. We also show that as long as the population class conditional distributions are sufficiently separable in total variation, then MNIC generalizes with a fast rate."]]></description>
<dc:subject>learning_theory recht.benjamin classifiers to_teach:childs_garden_of_statistical_learning_theory in_NB interpolation_aka_memorizing_the_training_data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d6b07112ccd5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:learning_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:recht.benjamin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:childs_garden_of_statistical_learning_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:interpolation_aka_memorizing_the_training_data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1611889233">
    <title>Kim , Ramdas , Singh , Wasserman : Classification accuracy as a proxy for two-sample testing</title>
    <dc:date>2021-02-04T15:31:39+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1611889233</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["When data analysts train a classifier and check if its accuracy is significantly different from chance, they are implicitly performing a two-sample test. We investigate the statistical properties of this flexible approach in the high-dimensional setting. We prove two results that hold for all classifiers in any dimensions: if its true error remains ϵϵ-better than chance for some ϵ>0ϵ>0 as d,n→∞d,n→∞, then (a) the permutation-based test is consistent (has power approaching to one), (b) a computationally efficient test based on a Gaussian approximation of the null distribution is also consistent. To get a finer understanding of the rates of consistency, we study a specialized setting of distinguishing Gaussians with mean-difference δδ and common (known or unknown) covariance ΣΣ, when d/n→c∈(0,∞)d/n→c∈(0,∞). We study variants of Fisher’s linear discriminant analysis (LDA) such as “naive Bayes” in a nontrivial regime when ϵ→0ϵ→0 (the Bayes classifier has true accuracy approaching 1/2), and contrast their power with corresponding variants of Hotelling’s test. Surprisingly, the expressions for their power match exactly in terms of nn, dd, δδ, ΣΣ, and the LDA approach is only worse by a constant factor, achieving an asymptotic relative efficiency (ARE) of 1/π‾‾√1/π for balanced samples. We also extend our results to high-dimensional elliptical distributions with finite kurtosis. Other results of independent interest include minimax lower bounds, and the optimality of Hotelling’s test when d=o(n)d=o(n). Simulation results validate our theory, and we present practical takeaway messages along with natural open problems."]]></description>
<dc:subject>to:NB hypothesis_testing two-sample_tests classifiers high-dimensional_statistics heard_the_talk kith_and_kin singh.aarti wasserman.larry ramdas.aaditya</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9a8de542290c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:two-sample_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:singh.aarti"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ramdas.aaditya"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2101.06309">
    <title>[2101.06309] Fundamental Tradeoffs in Distributionally Adversarial Training</title>
    <dc:date>2021-01-19T18:33:29+00:00</dc:date>
    <link>https://arxiv.org/abs/2101.06309</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Adversarial training is among the most effective techniques to improve the robustness of models against adversarial perturbations. However, the full effect of this approach on models is not well understood. For example, while adversarial training can reduce the adversarial risk (prediction error against an adversary), it sometimes increase standard risk (generalization error when there is no adversary). Even more, such behavior is impacted by various elements of the learning problem, including the size and quality of training data, specific forms of adversarial perturbations in the input, model overparameterization, and adversary's power, among others. In this paper, we focus on \emph{distribution perturbing} adversary framework wherein the adversary can change the test distribution within a neighborhood of the training data distribution. The neighborhood is defined via Wasserstein distance between distributions and the radius of the neighborhood is a measure of adversary's manipulative power. We study the tradeoff between standard risk and adversarial risk and derive the Pareto-optimal tradeoff, achievable over specific classes of models, in the infinite data limit with features dimension kept fixed. We consider three learning settings: 1) Regression with the class of linear models; 2) Binary classification under the Gaussian mixtures data model, with the class of linear classifiers; 3) Regression with the class of random features model (which can be equivalently represented as two-layer neural network with random first-layer weights). We show that a tradeoff between standard and adversarial risk is manifested in all three settings. We further characterize the Pareto-optimal tradeoff curves and discuss how a variety of factors, such as features correlation, adversary's power or the width of two-layer neural network would affect this tradeoff."

--- Ain't this just robustness to mis-specification?]]></description>
<dc:subject>to:NB statistics misspecification regression classifiers adversarial_examples robustness color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b9c81909cdee/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:misspecification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:adversarial_examples"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:robustness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.15863">
    <title>[2012.15863] Empirically Classifying Network Mechanisms</title>
    <dc:date>2021-01-03T19:53:41+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.15863</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Network models are used to study interconnected systems across many physical, biological, and social disciplines. Such models often assume a particular network-generating mechanism, which when fit to data produces estimates of mechanism-specific parameters that describe how systems function. For instance, a social network model might assume new individuals connect to others with probability proportional to their number of pre-existing connections ('preferential attachment'), and then estimate the disparity in interactions between famous and obscure individuals with similar qualifications. However, without a means of testing the relevance of the assumed mechanism, conclusions from such models could be misleading. Here we introduce a simple empirical approach which can mechanistically classify arbitrary network data. Our approach compares empirical networks to model networks from a user-provided candidate set of mechanisms, and classifies each network--with high accuracy--as originating from either one of the mechanisms or none of them. We tested 373 empirical networks against five of the most widely studied network mechanisms and found that most (228) were unlike any of these mechanisms. This raises the possibility that some empirical networks arise from mixtures of mechanisms. We show that mixtures are often unidentifiable because different mixtures can produce functionally equivalent networks. In such systems, which are governed by multiple mechanisms, our approach can still accurately predict out-of-sample functional properties."

--- Didn't Wiggins have a paper doing essentially this years and years ago?]]></description>
<dc:subject>to:NB network_data_analysis network_formation classifiers to_teach:baby-nets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:451433286efe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_formation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.washingtonpost.com/technology/2020/12/08/huawei-tested-ai-software-that-could-recognize-uighur-minorities-alert-police-report-says/">
    <title>Huawei tested AI software that could recognize Uighur minorities and alert police, report says - The Washington Post</title>
    <dc:date>2020-12-09T13:16:12+00:00</dc:date>
    <link>https://www.washingtonpost.com/technology/2020/12/08/huawei-tested-ai-software-that-could-recognize-uighur-minorities-alert-police-report-says/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Chinese tech giant Huawei has tested facial recognition software that could send automated “Uighur alarms” to government authorities when its camera systems identify members of the oppressed minority group, according to an internal document that provides further details about China’s artificial-intelligence surveillance regime.
"A document signed by Huawei representatives — discovered by the research organization IPVM and shared exclusively with The Washington Post — shows that the telecommunications firm worked in 2018 with the facial recognition start-up Megvii to test an artificial-intelligence camera system that could scan faces in a crowd and estimate each person’s age, sex and ethnicity.
"If the system detected the face of a member of the mostly Muslim minority group, the test report said, it could trigger a “Uighur alarm” — potentially flagging them for police in China, where members of the group have been detained en masse as part of a brutal government crackdown. The document, which was found on Huawei’s website, was removed shortly after The Post and IPVM asked the companies for comment."]]></description>
<dc:subject>to_teach:data-mining xinjiang china:prc classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:953580a10026/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:xinjiang"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:china:prc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1594972839">
    <title>Cannings , Berrett , Samworth : Local nearest neighbour classification with applications to semi-supervised learning</title>
    <dc:date>2020-11-18T22:44:02+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1594972839</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We derive a new asymptotic expansion for the global excess risk of a local-kk-nearest neighbour classifier, where the choice of kk may depend upon the test point. This expansion elucidates conditions under which the dominant contribution to the excess risk comes from the decision boundary of the optimal Bayes classifier, but we also show that if these conditions are not satisfied, then the dominant contribution may arise from the tails of the marginal distribution of the features. Moreover, we prove that, provided the dd-dimensional marginal distribution of the features has a finite ρρth moment for some ρ>4ρ>4 (as well as other regularity conditions), a local choice of kk can yield a rate of convergence of the excess risk of O(n−4/(d+4))O(n−4/(d+4)), where nn is the sample size, whereas for the standard kk-nearest neighbour classifier, our theory would require d≥5d≥5 and ρ>4d/(d−4)ρ>4d/(d−4) finite moments to achieve this rate. These results motivate a new kk-nearest neighbour classifier for semi-supervised learning problems, where the unlabelled data are used to obtain an estimate of the marginal feature density, and fewer neighbours are used for classification when this density estimate is small. Our worst-case rates are complemented by a minimax lower bound, which reveals that the local, semi-supervised kk-nearest neighbour classifier attains the minimax optimal rate over our classes for the excess risk, up to a subpolynomial factor in nn. These theoretical improvements over the standard kk-nearest neighbour classifier are also illustrated through a simulation study."]]></description>
<dc:subject>classifiers nearest_neighbors statistics nonparametrics samworth.richard_j. in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:357925370f16/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nearest_neighbors"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:samworth.richard_j."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.ejs/1601085759">
    <title>Barber : Is distribution-free inference possible for binary regression?</title>
    <dc:date>2020-11-16T16:22:04+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.ejs/1601085759</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["For a regression problem with a binary label response, we examine the problem of constructing confidence intervals for the label probability conditional on the features. In a setting where we do not have any information about the underlying distribution, we would ideally like to provide confidence intervals that are distribution-free—that is, valid with no assumptions on the distribution of the data. Our results establish an explicit lower bound on the length of any distribution-free confidence interval, and construct a procedure that can approximately achieve this length. In particular, this lower bound is independent of the sample size and holds for all distributions with no point masses, meaning that it is not possible for any distribution-free procedure to be adaptive with respect to any type of special structure in the distribution."]]></description>
<dc:subject>to:NB classifiers statistics confidence_sets prediction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f65bda46c0b5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:confidence_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.ejs/1602900015">
    <title>Liu , Goldberg : Kernel machines with missing responses</title>
    <dc:date>2020-11-16T16:20:21+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.ejs/1602900015</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Missing responses is a common type of data where the interested outcomes are not always observed. In this paper, we develop two new kernel machines to handle such a case, which can be used for both regression and classification. The first proposed kernel machine uses onlyonly the complete cases where both response and covariates are observed. It is, however, subject to some assumption limitations. Our second proposed doubly-robust kernel machine overcomes such limitations regardless of the misspecification of either the missing mechanism or the conditional distribution of the response. Theoretical properties, including the oracle inequalities for the excess risk, universal consistency, and learning rates are established. We demonstrate the superiority of the proposed methods to some existing methods by simulation and illustrate their application to a real data set concerning a survey about homeless people."]]></description>
<dc:subject>to:NB missing_data kernel_methods statistics regression classifiers prediction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b8bca80a71f4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:missing_data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2010.13993">
    <title>[2010.13993] Combining Label Propagation and Simple Models Out-performs Graph Neural Networks</title>
    <dc:date>2020-11-08T08:47:34+00:00</dc:date>
    <link>https://arxiv.org/abs/2010.13993</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Graph Neural Networks (GNNs) are the predominant technique for learning over graphs. However, there is relatively little understanding of why GNNs are successful in practice and whether they are necessary for good performance. Here, we show that for many standard transductive node classification benchmarks, we can exceed or match the performance of state-of-the-art GNNs by combining shallow models that ignore the graph structure with two simple post-processing steps that exploit correlation in the label structure: (i) an "error correlation" that spreads residual errors in training data to correct errors in test data and (ii) a "prediction correlation" that smooths the predictions on the test data. We call this overall procedure Correct and Smooth (C&S), and the post-processing steps are implemented via simple modifications to standard label propagation techniques from early graph-based semi-supervised learning methods. Our approach exceeds or nearly matches the performance of state-of-the-art GNNs on a wide variety of benchmarks, with just a small fraction of the parameters and orders of magnitude faster runtime. For instance, we exceed the best known GNN performance on the OGB-Products dataset with 137 times fewer parameters and greater than 100 times less training time. The performance of our methods highlights how directly incorporating label information into the learning algorithm (as was done in traditional techniques) yields easy and substantial performance gains. We can also incorporate our techniques into big GNN models, providing modest gains."

--- Perhaps I'm biased, but the fact that "run simple models that use node-level features, then smooth" comes within inches of the state of the art, if not actually beating it, while using a minute fraction of the resources, seems like an extraordinary indictment of the whole GNN industry.  (Hell, sheer "label propagation" [i.e., smoothing] seems pretty competitive.)  ]]></description>
<dc:subject>network_data_analysis prediction classifiers smoothing your_favorite_deep_neural_network_sucks have_read to_teach:baby-nets in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7e9c5fb62793/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:smoothing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:your_favorite_deep_neural_network_sucks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/content/116/52/26459">
    <title>Social media-predicted personality traits and values can help match people to their ideal jobs | PNAS</title>
    <dc:date>2020-07-16T15:49:42+00:00</dc:date>
    <link>https://www.pnas.org/content/116/52/26459</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Work is thought to be more enjoyable and beneficial to individuals and society when there is congruence between one’s personality and one’s occupation. We provide large-scale evidence that occupations have distinctive psychological profiles, which can successfully be predicted from linguistic information unobtrusively collected through social media. Based on 128,279 Twitter users representing 3,513 occupations, we automatically assess user personalities and visually map the personality profiles of different professions. Similar occupations cluster together, pointing to specific sets of jobs that one might be well suited for. Observations that contradict existing classifications may point to emerging occupations relevant to the 21st century workplace. Findings illustrate how social media can be used to match people to their ideal occupation."

--- Some observations:
1. They did not actually measure people's personality traits; they _assumed_ that a commercial IBM product can map word usage to personality traits.
1a. In particular, they _assumed_ that this remains accurate for what people write on Twitter, as opposed to whatever context IBM developed their system in (not specified here).
2. They did not actually measure "ideal" occupations; they saw whether a classifier using the estimated personality traits could map people to their actual occupations.
2a. They artificially balance their 10 professions so that each has 955 members.  (I presume that they randomly sampled the occupations with more members, though I don't quite see them saying that; maybe I missed it.  Also, I presume they did _not_ go hunting for the best group of 10 occupations.)  So the baseline accuracy would be only 10%, and getting about 70% under CV does indeed mean that there's some signal here.
2b. It's good that they include error bars on their accuracy figures!
2c.  Since they include those error bars, we can see that the difference in classification accuracy between the different methods are both small and statistically insignificant.  In particular, good old fashioned logistic regression is pretty much on par with everything else.
2d. They don't seem to have actually tried the obvious classifier here, which would map each person to the occupation whose feature-vector center ("medoid") was closest to the person's feature-vector ("prototype method").  But they did at least use k-nearest-neighbors, which performed about as well as all the others.
3. Calling this evidence that we could go from analyzing Twitter word usage to "ideal" job recommendations presumes that most people are _already_ in their ideal jobs.
4. This was edited by Susan Fiske [https://statmodeling.stat.columbia.edu/2017/02/08/authority-figures-spread-happy-talk-still-dont-get-it/].

_Maybe_ people reveal their personalities, in the Big 5 sense, by what they write on Twitter.  (Operationally, "personality" in the Big 5 sense is pretty close to "what words would you use to describe yourself on a questionnaire?")  And _maybe_ the way people reveal their personalities in their word usage on Twitter is so context-independent that it can reliably generalize across all the different sub-cultures and sub-societies and self-organized genre conventions of Twitter, so there is one globally reliable mapping.  (I am not going to repeat all of [http://bactra.org/weblog/770.html], but I could.)  And _maybe_ IBM has provided that mapping with an API.  And _maybe_ people with different personalities select in to different professions.  (As an alternative: different occupations train people differently, which alters their personalities, or at least the verbal expressions thereof, and different occupations expose people to different situations, which alters what they say and maybe even shapes their personalities.)  And _maybe_ people select in to professions where they are happier.  And _maybe_ if we looked at how young people talk on Twitter, before they've chosen an occupation, and extract their personality from it, and map them to a profession with lots of similar personality vectors already in it, they'll be happier in that occupation than in others.  But this study provides at best very, very weak evidence for all this.  (I want to say "no evidence at all", but I also don't want to get into arguments about the theory of evidence.)  What the study does show is that people in different occupations use different words on Twitter, and that these differences are detectable through the filter of IBM's purported personality estimator.

]]></description>
<dc:subject>to:NB have_read bad_science bad_data_analysis classifiers text_mining personality_tests logistic_regression social_media psychology why_oh_why_cant_we_have_a_better_academic_publishing_system to_teach:data-mining forty_minutes_of_my_life_im_not_getting_back trapped_in_plutos_republic to:blog twitter re:career_advising_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:780cca65f6d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:personality_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:logistic_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_media"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:forty_minutes_of_my_life_im_not_getting_back"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:career_advising_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://blog.piekniewski.info/2018/07/14/autopsy-dl-paper/">
    <title>Autopsy of a deep learning paper – Piekniewski's blog</title>
    <dc:date>2020-07-13T17:58:44+00:00</dc:date>
    <link>https://blog.piekniewski.info/2018/07/14/autopsy-dl-paper/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>neural_networks classifiers your_favorite_deep_neural_network_sucks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:12413aefa499/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:your_favorite_deep_neural_network_sucks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nytimes.com/2020/06/24/technology/facial-recognition-arrest.html?action=click&amp;module=Top%20Stories&amp;pgtype=Homepage">
    <title>Wrongfully Accused by an Algorithm - The New York Times</title>
    <dc:date>2020-06-25T16:05:42+00:00</dc:date>
    <link>https://www.nytimes.com/2020/06/24/technology/facial-recognition-arrest.html?action=click&amp;module=Top%20Stories&amp;pgtype=Homepage</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>classifiers data_mining police utter_stupidity to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b4f394d2fc25/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:police"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2006.03895">
    <title>[2006.03895] The Criminality From Face Illusion</title>
    <dc:date>2020-06-19T17:30:25+00:00</dc:date>
    <link>https://arxiv.org/abs/2006.03895</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The automatic analysis of face images can generate predictions about a person's gender, age, race, facial expression, body mass index, and various other indices and conditions. A few recent publications have claimed success in analyzing an image of a person's face in order to predict the person's status as Criminal / Non-Criminal. Predicting criminality from face may initially seem similar to other facial analytics, but we argue that attempts to create a criminality-from-face algorithm are necessarily doomed to fail, that apparently promising experimental results in recent publications are an illusion resulting from inadequate experimental design, and that there is potentially a large social cost to belief in the criminality from face illusion."]]></description>
<dc:subject>to:NB to_read prediction crime classifiers bad_data_analysis to_teach:data-mining to_teach:statistics_of_inequality_and_discrimination via:yorksranter trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:34b1929c5c7c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:yorksranter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1905.12516">
    <title>[1905.12516] Racial Bias in Hate Speech and Abusive Language Detection Datasets</title>
    <dc:date>2020-05-06T20:12:10+00:00</dc:date>
    <link>https://arxiv.org/abs/1905.12516</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Technologies for abusive language detection are being developed and applied with little consideration of their potential biases. We examine racial bias in five different sets of Twitter data annotated for hate speech and abusive language. We train classifiers on these datasets and compare the predictions of these classifiers on tweets written in African-American English with those written in Standard American English. The results show evidence of systematic racial bias in all datasets, as classifiers trained on them tend to predict that tweets written in African-American English are abusive at substantially higher rates. If these abusive language detection systems are used in the field they will therefore have a disproportionate negative impact on African-American social media users. Consequently, these systems may discriminate against the groups who are often the targets of the abuse we are trying to detect."]]></description>
<dc:subject>algorithmic_fairness text_mining classifiers to_teach:data-mining to_teach:statistics_of_inequality_and_discrimination in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1ce9a1ebca05/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2001.01987">
    <title>[2001.01987] Softmax-based Classification is k-means Clustering: Formal Proof, Consequences for Adversarial Attacks, and Improvement through Centroid Based Tailoring</title>
    <dc:date>2020-03-18T17:56:52+00:00</dc:date>
    <link>https://arxiv.org/abs/2001.01987</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We formally prove the connection between k-means clustering and the predictions of neural networks based on the softmax activation layer. In existing work, this connection has been analyzed empirically, but it has never before been mathematically derived. The softmax function partitions the transformed input space into cones, each of which encompasses a class. This is equivalent to putting a number of centroids in this transformed space at equal distance from the origin, and k-means clustering the data points by proximity to these centroids. Softmax only cares in which cone a data point falls, and not how far from the centroid it is within that cone. We formally prove that networks with a small Lipschitz modulus (which corresponds to a low susceptibility to adversarial attacks) map data points closer to the cluster centroids, which results in a mapping to a k-means-friendly space. To leverage this knowledge, we propose Centroid Based Tailoring as an alternative to the softmax function in the last layer of a neural network. The resulting Gauss network has similar predictive accuracy as traditional networks, but is less susceptible to one-pixel attacks; while the main contribution of this paper is theoretical in nature, the Gauss network contributes empirical auxiliary benefits."]]></description>
<dc:subject>to:NB neural_networks classifiers clustering k-means adversarial_examples via:arsyed</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1f1d75e91ff6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:k-means"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:adversarial_examples"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:arsyed"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.mitpressjournals.org/doi/abs/10.1162/evco_a_00252">
    <title>A Tandem Evolutionary Algorithm for Identifying Causal Rules from Complex Data | Evolutionary Computation | MIT Press Journals</title>
    <dc:date>2020-03-02T14:53:54+00:00</dc:date>
    <link>https://www.mitpressjournals.org/doi/abs/10.1162/evco_a_00252</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a new evolutionary approach for discovering causal rules in complex classification problems from batch data. Key aspects include (a) the use of a hypergeometric probability mass function as a principled statistic for assessing fitness that quantifies the probability that the observed association between a given clause and target class is due to chance, taking into account the size of the dataset, the amount of missing data, and the distribution of outcome categories, (b) tandem age-layered evolutionary algorithms for evolving parsimonious archives of conjunctive clauses, and disjunctions of these conjunctions, each of which have probabilistically significant associations with outcome classes, and (c) separate archive bins for clauses of different orders, with dynamically adjusted order-specific thresholds. The method is validated on majority-on and multiplexer benchmark problems exhibiting various combinations of heterogeneity, epistasis, overlap, noise in class associations, missing data, extraneous features, and imbalanced classes. We also validate on a more realistic synthetic genome dataset with heterogeneity, epistasis, extraneous features, and noise. In all synthetic epistatic benchmarks, we consistently recover the true causal rule sets used to generate the data. Finally, we discuss an application to a complex real-world survey dataset designed to inform possible ecohealth interventions for Chagas disease."

--- I don't see what's _causal_ here, as opposed to just searching for tractably-simple classification rules.]]></description>
<dc:subject>to:NB causal_inference genetic_algorithms classifiers color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:33c9b7e64857/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:genetic_algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.jstatsoft.org/article/view/v054i02">
    <title>adabag: An R Package for Classification with Boosting and Bagging | Alfaro | Journal of Statistical Software</title>
    <dc:date>2019-12-01T15:47:34+00:00</dc:date>
    <link>https://www.jstatsoft.org/article/view/v054i02</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Boosting and bagging are two widely used ensemble methods for classification. Their common goal is to improve the accuracy of a classifier combining single classifiers which are slightly better than random guessing. Among the family of boosting algorithms, AdaBoost (adaptive boosting) is the best known, although it is suitable only for dichotomous tasks. AdaBoost.M1 and SAMME (stagewise additive modeling using a multi-class exponential loss function) are two easy and natural extensions to the general case of two or more classes. In this paper, the adabag R package is introduced. This version implements AdaBoost.M1, SAMME and bagging algorithms with classification trees as base classifiers. Once the ensembles have been trained, they can be used to predict the class of new samples. The accuracy of these classifiers can be estimated in a separated data set or through cross validation. Moreover, the evolution of the error as the ensemble grows can be analysed and the ensemble can be pruned. In addition, the margin in the class prediction and the probability of each class for the observations can be calculated. Finally, several classic examples in classification literature are shown to illustrate the use of this package."]]></description>
<dc:subject>to:NB boosting bagging ensemble_methods classifiers decision_trees R to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:af00024c2969/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:boosting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bagging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision_trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:R"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://advances.sciencemag.org/content/4/1/eaao5580">
    <title>The accuracy, fairness, and limits of predicting recidivism | Science Advances</title>
    <dc:date>2019-11-28T21:01:50+00:00</dc:date>
    <link>https://advances.sciencemag.org/content/4/1/eaao5580</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Algorithms for predicting recidivism are commonly used to assess a criminal defendant’s likelihood of committing a crime. These predictions are used in pretrial, parole, and sentencing decisions. Proponents of these systems argue that big data and advanced machine learning make these analyses more accurate and less biased than humans. We show, however, that the widely used commercial risk assessment software COMPAS is no more accurate or fair than predictions made by people with little or no criminal justice expertise. In addition, despite COMPAS’s collection of 137 features, the same accuracy can be achieved with a simple linear classifier with only two features."

--- Pretty sure that The Kids matched the last result using decision trees (with the same two features).]]></description>
<dc:subject>have_skimmed crime prediction algorithmic_fairness statistics classifiers to_teach:data-mining to_teach:statistics_of_inequality_and_discrimination in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ebd017b79dc3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3489440">
    <title>Algorithmic Risk Assessment in the Hands of Humans by Megan T. Stevenson, Jennifer L. Doleac :: SSRN</title>
    <dc:date>2019-11-21T03:32:13+00:00</dc:date>
    <link>https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3489440</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We evaluate the impacts of adopting algorithmic predictions of future offending (risk assessments) as an aid to judicial discretion in felony sentencing. We find that judges' decisions are influenced by the risk score, leading to longer sentences for defendants with higher scores and shorter sentences for those with lower scores. However, we find no robust evidence that this reshuffling led to a decline in recidivism, and, over time, judges appeared to use the risk scores less. Risk assessment's failure to reduce recidivism is at least partially explained by judicial discretion in its use. Judges systematically grant leniency to young defendants, despite their high risk of reoffending. This is in line with a long standing practice of treating youth as a mitigator in sentencing, due to lower perceived culpability. Such a conflict in goals may have led prior studies to overestimate the extent to which judges make prediction errors. Since one of the most important inputs to the risk score is effectively off-limits, risk assessment's expected benefits are curtailed. We find no evidence that risk assessment affected racial disparities statewide, although there was a relative increase in sentences for black defendants in courts that appeared to use risk assessment most. We conduct simulations to evaluate how race and age disparities would have changed if judges had fully complied with the sentencing recommendations associated with the algorithm. Racial disparities might have increased slightly, but the largest change would have been higher relative incarceration rates for defendants under the age of 23. In the context of contentious public discussions about algorithms, our results highlight the importance of thinking about how man and machine interact."]]></description>
<dc:subject>algorithmic_fairness prediction classifiers crime to_teach:data-mining scores_and_classes to_teach:statistics_of_inequality_and_discrimination in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:553fdf367655/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:scores_and_classes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1911.00483">
    <title>[1911.00483] Explanation by Progressive Exaggeration</title>
    <dc:date>2019-11-11T15:27:15+00:00</dc:date>
    <link>https://arxiv.org/abs/1911.00483</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["As machine learning methods see greater adoption and implementation in high stakes applications such as medical image diagnosis, the need for model interpretability and explanation has become more critical. Classical approaches that assess feature importance (e.g. saliency maps) do not explain how and why a particular region of an image is relevant to the prediction. We propose a method that explains the outcome of a classification black-box by gradually exaggerating the semantic effect of a given class. Given a query input to a classifier, our method produces a progressive set of plausible variations of that query, which gradually changes the posterior probability from its original class to its negation. These counter-factually generated samples preserve features unrelated to the classification decision, such that a user can employ our method as a "tuning knob" to traverse a data manifold while crossing the decision boundary. Our method is model agnostic and only requires the output value and gradient of the predictor with respect to its input."]]></description>
<dc:subject>to:NB classifiers explanation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7501ccff54db/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:explanation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1902.02979">
    <title>[1902.02979] Fair Decisions Despite Imperfect Predictions</title>
    <dc:date>2019-10-29T15:01:36+00:00</dc:date>
    <link>https://arxiv.org/abs/1902.02979</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Consequential decisions are increasingly informed by sophisticated data-driven predictive models. However, consistently learning accurate predictive models requires access to ground truth labels. Unfortunately, in practice, labels may only exist conditional on certain decisions---if a loan is denied, there is not even an option for the individual to pay back the loan. In this paper, we show that, in this selective labels setting, learning to predict is suboptimal in terms of both fairness and utility. To avoid this undesirable behavior, we propose to directly learn stochastic decision policies that maximize utility under fairness constraints. In the context of fair machine learning, our results suggest the need for a paradigm shift from "learning to predict" to "learning to decide". Experiments on synthetic and real-world data illustrate the favorable properties of learning to decide, in terms of both utility and fairness."]]></description>
<dc:subject>missing_data classifiers algorithmic_fairness statistics to_teach:data-mining to_teach:statistics_of_inequality_and_discrimination in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3e6f8835dbd4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:missing_data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.12163">
    <title>[1910.12163] Understanding and Quantifying Adversarial Examples Existence in Linear Classification</title>
    <dc:date>2019-10-29T14:18:08+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.12163</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["State-of-art deep neural networks (DNN) are vulnerable to attacks by adversarial examples: a carefully designed small perturbation to the input, that is imperceptible to human, can mislead DNN. To understand the root cause of adversarial examples, we quantify the probability of adversarial example existence for linear classifiers. Previous mathematical definition of adversarial examples only involves the overall perturbation amount, and we propose a more practical relevant definition of strong adversarial examples that separately limits the perturbation along the signal direction also. We show that linear classifiers can be made robust to strong adversarial examples attack in cases where no adversarial robust linear classifiers exist under the previous definition. The quantitative formulas are confirmed by numerical experiments using a linear support vector machine (SVM) classifier. The results suggest that designing general strong-adversarial-robust learning systems is feasible but only through incorporating human knowledge of the underlying classification problem."]]></description>
<dc:subject>adversarial_examples classifiers in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:266946ee11ba/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:adversarial_examples"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.12756">
    <title>[1910.12756] Fast classification rates without standard margin assumptions</title>
    <dc:date>2019-10-29T02:24:09+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.12756</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the classical problem of learning rates for classes with finite VC dimension. It is well known that fast learning rates are achievable by the empirical risk minimization algorithm (ERM) if one of the low noise/margin assumptions such as Tsybakov's and Massart's condition is satisfied. In this paper, we consider an alternative way of obtaining fast learning rates in classification if none of these conditions are met.
"We first consider Chow's reject option model and show that by lowering the impact of a small fraction of hard instances, fast learning rate is achievable in an agnostic model by a specific learning algorithm. Similar results were only known under special versions of margin assumptions. We also show that the learning algorithm achieving these rates is adaptive to standard margin assumptions and always satisfies the risk bounds achieved by ERM.
"Based on our results on Chow's model, we then analyze a particular family of VC classes, namely classes with finite combinatorial diameter. Using their special structure, we show that there is an improper learning algorithm that provides fast rates of convergence even in the (poorly understood) situations where ERM is suboptimal. This provides the first setup in which an improper learning algorithm may significantly improve the learning rates for non-convex losses.
"Finally, we discuss some implications of our techniques to the analysis of ERM."]]></description>
<dc:subject>to:NB learning_theory classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5060e7be5448/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:learning_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.11299">
    <title>[1910.11299] Fraud Detection in Networks: State-of-the-art</title>
    <dc:date>2019-10-25T14:40:13+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.11299</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Financial fraud detection represents the challenge of finding anomalies in networks of financial transactions. In general, the anomaly detection (AD) is the problem of distinguishing between normal data samples with well defined patterns or signatures and those that do not conform to the expected profiles. The fraudulent behaviour in money laundering may manifest itself through unusual patterns in financial transaction networks. In such networks, nodes represents customers and the edges are transactions: a directed edge between two nodes illustrates that there is a money transfer in the respective direction, where the weight on the edge is the transferred amount. In this paper we present a survey on the fundamental anomaly detection techniques and then present briefly the relevant literature in connection with fraud detection context."]]></description>
<dc:subject>to:NB fraud classifiers network_data_analysis relational_learning statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b2f24aaae87e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:fraud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:relational_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.10831">
    <title>[1909.10831] Entropy from Machine Learning</title>
    <dc:date>2019-10-25T14:28:25+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.10831</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We translate the problem of calculating the entropy of a set of binary configurations/signals into a sequence of supervised classification tasks. Subsequently, one can use virtually any machine learning classification algorithm for computing entropy. This procedure can be used to compute entropy, and consequently the free energy directly from a set of Monte Carlo configurations at a given temperature. As a test of the proposed method, using an off-the-shelf machine learning classifier we reproduce the entropy and free energy of the 2D Ising model from Monte Carlo configurations at various temperatures throughout its phase diagram. Other potential applications include computing the entropy of spiking neurons or any other multidimensional binary signals."]]></description>
<dc:subject>to:NB classifiers entropy_estimation statistics color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:220e73253c27/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:entropy_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.06772">
    <title>[1910.06772] Counterfactual diagnosis</title>
    <dc:date>2019-10-16T15:50:04+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.06772</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Causal knowledge is vital for effective reasoning in science and medicine. In medical diagnosis for example, a doctor aims to explain a patient's symptoms by determining the diseases causing them. However, all previous approaches to Machine-Learning assisted diagnosis, including Deep Learning and model-based Bayesian approaches, learn by association and do not distinguish correlation from causation. Here, we propose a new diagnostic algorithm based on counterfactual inference which captures the causal aspect of diagnosis overlooked by previous approaches. Using a statistical disease model, which describes the relations between hundreds of diseases, symptoms and risk factors, we compare our counterfactual algorithm to the standard Bayesian diagnostic algorithm, and test these against a cohort of 44 doctors. We use 1763 clinical vignettes created by a separate panel of doctors to benchmark performance. Each vignette provides a non-exhaustive list of symptoms and medical history simulating a single presentation of a disease. The algorithms and doctors are tasked with determining the underlying disease for each vignette from symptom and medical history information alone. While the Bayesian algorithm achieves the accuracy comparable to the average doctor, placing in the top 49\% of doctors in our cohort, our counterfactual algorithm places in the top 20\% of doctors, achieving expert clinical accuracy. Our results demonstrate the advantage of counterfactual over associative reasoning in a complex real-world task, and show that counterfactual reasoning is a vital missing ingredient for applying machine learning to medical diagnosis."]]></description>
<dc:subject>to:NB causal_inference classifiers data_mining to_read to_teach:data-mining color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:595c9e6af36b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.12475">
    <title>[1909.12475] Hidden Stratification Causes Clinically Meaningful Failures in Machine Learning for Medical Imaging</title>
    <dc:date>2019-10-01T17:19:57+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.12475</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Machine learning models for medical image analysis often suffer from poor performance on important subsets of a population that are not identified during training or testing. For example, overall performance of a cancer detection model may be high, but the model still consistently misses a rare but aggressive cancer subtype. We refer to this problem as hidden stratification, and observe that it results from incompletely describing the meaningful variation in a dataset. While hidden stratification can substantially reduce the clinical efficacy of machine learning models, its effects remain difficult to measure. In this work, we assess the utility of several possible techniques for measuring and describing hidden stratification effects, and characterize these effects both on multiple medical imaging datasets and via synthetic experiments on the well-characterised CIFAR-100 benchmark dataset. We find evidence that hidden stratification can occur in unidentified imaging subsets with low prevalence, low label quality, subtle distinguishing features, or spurious correlates, and that it can result in relative performance differences of over 20% on clinically important subsets. Finally, we explore the clinical implications of our findings, and suggest that evaluation of hidden stratification should be a critical component of any machine learning deployment in medical imaging."]]></description>
<dc:subject>to:NB classifiers data_mining prediction bad_data_analysis statistics to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:48588e6ab9e2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.12434">
    <title>[1909.12434] Learning the Difference that Makes a Difference with Counterfactually-Augmented Data</title>
    <dc:date>2019-10-01T17:18:54+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.12434</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Despite alarm over the reliance of machine learning systems on so-called spurious patterns in training data, the term lacks coherent meaning in standard statistical frameworks. However, the language of causality offers clarity: spurious associations are those due to a common cause (confounding) vs direct or indirect effects. In this paper, we focus on NLP, introducing methods and resources for training models insensitive to spurious patterns. Given documents and their initial labels, we task humans with revise each document to accord with a counterfactual target label, asking that the revised documents be internally coherent while avoiding any gratuitous changes. Interestingly, on sentiment analysis and natural language inference tasks, classifiers trained on original data fail on their counterfactually-revised counterparts and vice versa. Classifiers trained on combined datasets perform remarkably well, just shy of those specialized to either domain. While classifiers trained on either original or manipulated data alone are sensitive to spurious features (e.g., mentions of genre), models trained on the combined data are insensitive to this signal. We will publicly release both datasets."]]></description>
<dc:subject>to:NB classifiers statistics experimental_psychology of_a_sort</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:41d582ec8253/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experimental_psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:of_a_sort"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.03801">
    <title>[1909.03801] Aim for clinical utility, not just predictive accuracy</title>
    <dc:date>2019-09-26T18:39:39+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.03801</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The predictions from an accurate prognostic model can be of great interest to patients and clinicians. When predictions are reported to individuals, they may decide to take action to improve their health or they may simply be comforted by the knowledge. However, if there is a clearly defined space of actions in the clinical context, a formal decision rule based on the prediction has the potential to have a much broader impact. Even if it is not the intended use of a developed prediction model, informal decision rules can often be found in practice. The use of a prediction-based decision rule should be formalized and compared to the standard of care in a randomized trial to assess its clinical utility, however, evidence is needed to motivate such a trial. We outline how observational data can be used to propose a decision rule based on a prognostic prediction model. We then propose a framework for emulating a prediction driven trial to evaluate the utility of a prediction-based decision rule in observational data. A split-sample structure can and should be used to develop the prognostic model, define the decision rule, and evaluate its clinical utility."]]></description>
<dc:subject>to:NB decision-making prediction classifiers data_mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:182a39f8e19b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision-making"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.06788">
    <title>[1909.06788] Inner-product Kernels are Asymptotically Equivalent to Binary Discrete Kernels</title>
    <dc:date>2019-09-18T12:54:06+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.06788</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This article investigates the eigenspectrum of the inner product-type kernel matrix p‾√K={f(x𝖳ixj/p‾√)}ni,j=1 under a binary mixture model in the high dimensional regime where the number of data n and their dimension p are both large and comparable. Based on recent advances in random matrix theory, we show that, for a wide range of nonlinear functions f, the eigenspectrum behavior is asymptotically equivalent to that of an (at most) cubic function. This sheds new light on the understanding of nonlinearity in large dimensional problems. As a byproduct, we propose a simple function prototype valued in (−1,0,1) that, while reducing both storage memory and running time, achieves the same (asymptotic) classification performance as any arbitrary function f."]]></description>
<dc:subject>to:NB kernel_methods high-dimensional_probability high-dimensional_statistics to_read statistics classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c966754e5c49/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.04791">
    <title>[1909.04791] Techniques All Classifiers Can Learn from Deep Networks: Models, Optimizations, and Regularization</title>
    <dc:date>2019-09-15T17:23:40+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.04791</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Deep neural networks have introduced novel and useful tools to the machine learning community, and other types of classifiers can make use of these tools to improve their performance and generality. This paper reviews the current state of the art for deep learning classifier technologies that are being used outside of deep neural networks. Many components of existing deep neural network architectures can be employed by non-network classifiers. In this paper, we review the feature learning, optimization, and regularization methods that form a core of deep network technologies. We then survey non-neural network learning algorithms that make innovative use of these methods to improve classification. We conclude by discussing directions that can be pursued to expand the area of deep learning for a variety of classification algorithms."]]></description>
<dc:subject>to:NB classifiers machine_learning optimization computational_statistics statistics neural_networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d7aabdc62b1e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1503.06410">
    <title>[1503.06410] What the F-measure doesn't measure: Features, Flaws, Fallacies and Fixes</title>
    <dc:date>2019-09-13T13:17:32+00:00</dc:date>
    <link>https://arxiv.org/abs/1503.06410</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The F-measure or F-score is one of the most commonly used single number measures in Information Retrieval, Natural Language Processing and Machine Learning, but it is based on a mistake, and the flawed assumptions render it unsuitable for use in most contexts! Fortunately, there are better alternatives."

--- Not quite a crank, but definitely crank-y.]]></description>
<dc:subject>information_retrieval classifiers NOT_to_teach:data-mining my_initial_skeptical_coloration_became_on_examination_a_permanent_stain</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:55d719338a6d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_retrieval"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:NOT_to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:my_initial_skeptical_coloration_became_on_examination_a_permanent_stain"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.08973">
    <title>[1908.08973] No evidence for critical slowing down prior to human epileptic seizures</title>
    <dc:date>2019-08-27T15:41:38+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.08973</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["There is a ongoing debate whether generic early warning signals for critical transitions exist that can be applied across diverse systems. The human epileptic brain is often considered as a prototypical system, given the devastating and, at times, even life-threatening nature of the extreme event epileptic seizure. More than three decades of international effort has successfully identified predictors of imminent seizures. However, the suitability of typically applied early warning indicators for critical slowing down, namely variance and lag-1 autocorrelation, for indexing seizure susceptibility is still controversially discussed. Here, we investigated long-term, multichannel recordings of brain dynamics from 28 subjects with epilepsy. Using a surrogate-based evaluation procedure of sensitivity and specificity of time-resolved estimates of early warning indicators, we found no evidence for critical slowing down prior to 105 epileptic seizures."]]></description>
<dc:subject>to:NB time_series classifiers phase_transitions statistics epilepsy neuroscience</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:513f153be229/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:phase_transitions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:epilepsy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neuroscience"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://jamanetwork.com/journals/jamadermatology/article-abstract/2740808">
    <title>Association Between Surgical Skin Markings in Dermoscopic Images and Diagnostic Performance of a Deep Learning Convolutional Neural Network for Melanoma Recognition | Dermatology | JAMA Dermatology | JAMA Network</title>
    <dc:date>2019-08-20T14:46:42+00:00</dc:date>
    <link>https://jamanetwork.com/journals/jamadermatology/article-abstract/2740808</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Question  Are surgical skin markings in dermoscopic images associated with the diagnostic performance of a trained and validated deep learning convolutional neural network?
"Findings  In this cross-sectional study of 130 skin lesions, skin markings by standard surgical ink markers were associated with a significant reduction in the specificity of a convolutional neural network by increasing the melanoma probability scores, consequently increasing the false-positive rate of benign nevi by approximately 40%.
"Meaning  This study suggests that the use of surgical skin markers should be avoided in dermoscopic images intended for analysis by a convolutional neural network."]]></description>
<dc:subject>to:NB classifiers to_teach:data-mining via:tslumley</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1d52eaad8ebe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:tslumley"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.06852">
    <title>[1908.06852] SIRUS: making random forests interpretable</title>
    <dc:date>2019-08-20T14:22:39+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.06852</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["State-of-the-art learning algorithms, such as random forests or neural networks, are often qualified as "black-boxes" because of the high number and complexity of operations involved in their prediction mechanism. This lack of interpretability is a strong limitation for applications involving critical decisions, typically the analysis of production processes in the manufacturing industry. In such critical contexts, models have to be interpretable, i.e., simple, stable, and predictive. To address this issue, we design SIRUS (Stable and In-terpretable RUle Set), a new classification algorithm based on random forests, which takes the form of a short list of rules. While simple models are usually unstable with respect to data perturbation, SIRUS achieves a remarkable stability improvement over cutting-edge methods. Furthermore, SIRUS inherits a predictive accuracy close to random forests, combined with the simplicity of decision trees. These properties are assessed both from a theoretical and empirical point of view, through extensive numerical experiments based on our R/C++ software implementation sirus."

--- Not sure that there's really much new here, beyond limiting the forest to very shallow trees.]]></description>
<dc:subject>to:NB classifiers ensemble_methods random_forests decision_trees data_mining statistics to_teach:data-mining have_skimmed</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e72b24d4a589/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_forests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision_trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.06319">
    <title>[1908.06319] Locally Linear Embedding and fMRI feature selection in psychiatric classification</title>
    <dc:date>2019-08-20T14:19:42+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.06319</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Background: Functional magnetic resonance imaging (fMRI) provides non-invasive measures of neuronal activity using an endogenous Blood Oxygenation-Level Dependent (BOLD) contrast. This article introduces a nonlinear dimensionality reduction (Locally Linear Embedding) to extract informative measures of the underlying neuronal activity from BOLD time-series. The method is validated using the Leave-One-Out-Cross-Validation (LOOCV) accuracy of classifying psychiatric diagnoses using resting-state and task-related fMRI. Methods: Locally Linear Embedding of BOLD time-series (into each voxel's respective tensor) was used to optimise feature selection. This uses Gauß' Principle of Least Constraint to conserve quantities over both space and time. This conservation was assessed using LOOCV to greedily select time points in an incremental fashion on training data that was categorised in terms of psychiatric diagnoses. Findings: The embedded fMRI gave highly diagnostic performances (> 80%) on eleven publicly-available datasets containing healthy controls and patients with either Schizophrenia, Attention-Deficit Hyperactivity Disorder (ADHD), or Autism Spectrum Disorder (ASD). Furthermore, unlike the original fMRI data before or after using Principal Component Analysis (PCA) for artefact reduction, the embedded fMRI furnished significantly better than chance classification (defined as the majority class proportion) on ten of eleven datasets. Interpretation: Locally Linear Embedding appears to be a useful feature extraction procedure that retains important information about patterns of brain activity distinguishing among psychiatric cohorts."

--- Last tag is because I plan to teach LLE and this might make a good example or assignment, if I like how it was actually done.

--- ETA: It's... not horrible (though the writing is bad and far too pretentious), but not very insightful, and too complicated to make a good teaching example.]]></description>
<dc:subject>to:NB locally_linear_embedding classifiers fmri dimension_reduction to_teach:data-mining have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ac687c5e5458/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:locally_linear_embedding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:fmri"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dimension_reduction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.03000">
    <title>[1908.03000] Feature selection of neural networks is skewed towards the less abstract cue</title>
    <dc:date>2019-08-09T13:40:03+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.03000</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Artificial neural networks (ANNs) have become an important tool for image classification with many applications in research and industry. However, it remains largely unknown how relevant image features are selected and how data properties affect this process. In particular, we are interested whether the abstraction level of image cues correlating with class membership influences feature selection. We perform experiments with binary images that contain a combination of cues, representing two different levels of abstractions: one is a pattern drawn from a random distribution where class membership correlates with the statistics of the pattern, the other a combination of symbol-like entities, where the symbolic code correlates with class membership. When the network is trained with data in which both cues are equally significant, we observe that the cues at the lower abstraction level, i.e., the pattern, is learned, while the symbolic information is largely ignored, even in networks with many layers. Symbol-like entities are only learned if the importance of low-level cues is reduced compared to the high-level ones. These findings raise important questions about the relevance of features that are learned by deep ANNs and how learning could be shifted towards symbolic features."]]></description>
<dc:subject>to:NB neural_networks classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4c351f450ae7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.02591">
    <title>[1908.02591] Anti-Money Laundering in Bitcoin: Experimenting with Graph Convolutional Networks for Financial Forensics</title>
    <dc:date>2019-08-08T12:59:09+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.02591</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Anti-money laundering (AML) regulations play a critical role in safeguarding financial systems, but bear high costs for institutions and drive financial exclusion for those on the socioeconomic and international margins. The advent of cryptocurrency has introduced an intriguing paradox: pseudonymity allows criminals to hide in plain sight, but open data gives more power to investigators and enables the crowdsourcing of forensic analysis. Meanwhile advances in learning algorithms show great promise for the AML toolkit. In this workshop tutorial, we motivate the opportunity to reconcile the cause of safety with that of financial inclusion. We contribute the Elliptic Data Set, a time series graph of over 200K Bitcoin transactions (nodes), 234K directed payment flows (edges), and 166 node features, including ones based on non-public data; to our knowledge, this is the largest labelled transaction data set publicly available in any cryptocurrency. We share results from a binary classification task predicting illicit transactions using variations of Logistic Regression (LR), Random Forest (RF), Multilayer Perceptrons (MLP), and Graph Convolutional Networks (GCN), with GCN being of special interest as an emergent new method for capturing relational information. The results show the superiority of Random Forest (RF), but also invite algorithmic work to combine the respective powers of RF and graph methods. Lastly, we consider visualization for analysis and explainability, which is difficult given the size and dynamism of real-world transaction graphs, and we offer a simple prototype capable of navigating the graph and observing model performance on illicit activity over time. With this tutorial and data set, we hope to a) invite feedback in support of our ongoing inquiry, and b) inspire others to work on this societally important challenge."]]></description>
<dc:subject>to:NB bitcoin network_data_analysis classifiers statistics data_mining crime to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:82c164a2a4b9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bitcoin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>