<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (cshalizi)</title>
    <link>https://pinboard.in/u:cshalizi/public/</link>
    <description>recent bookmarks from cshalizi</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://arxiv.org/abs/2009.13807"/>
	<rdf:li rdf:resource="https://www.nber.org/papers/w33962"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/journals/journal-of-economic-history/article/we-do-not-know-the-population-of-every-country-in-the-world-for-the-past-two-thousand-years/D747DDC6E499C799B0471DBE33FEB0BB"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2304.01315"/>
	<rdf:li rdf:resource="https://retractionwatch.com/2015/07/21/to-our-horror-widely-reported-study-suggesting-divorce-is-more-likely-when-wives-fall-ill-gets-axed/"/>
	<rdf:li rdf:resource="https://www.nature.com/articles/s41562-023-01749-9"/>
	<rdf:li rdf:resource="https://statmodeling.stat.columbia.edu/2024/08/05/adverse-adult-research-outcomes-increased-after-increased-willingness-of-public-health-journals-to-publish-absolute-crap/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2402.14583"/>
	<rdf:li rdf:resource="https://retractionwatch.com/2023/08/25/ex-cops-tangle-with-journals-over-strip-clubs-and-sex-crimes/"/>
	<rdf:li rdf:resource="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4528229"/>
	<rdf:li rdf:resource="https://www.wired.com/story/welfare-state-algorithms/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2203.06498"/>
	<rdf:li rdf:resource="https://mobile.twitter.com/benmschmidt/status/1419497587296571395"/>
	<rdf:li rdf:resource="https://www.technologyreview.com/2021/07/07/1027916/we-tested-ai-interview-tools/?_hsmi=140522126&amp;_hsenc=p2ANqtz-8vjTOrf7wPGyBHzGvNM6HAsRT9_ivd6OAndeGlgd7q_DeKQzL8wzhIWUxeUBctrU37fL6-4CUvXLmfyBLOvUsDCnCCddi0lNRwTMkkbRP71Z9mu90"/>
	<rdf:li rdf:resource="https://mobile.twitter.com/HistDem/status/1395774558096039938"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2105.03948"/>
	<rdf:li rdf:resource="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3814191"/>
	<rdf:li rdf:resource="https://twitter.com/literalbanana/status/1380386461728522243"/>
	<rdf:li rdf:resource="https://kieranhealy.org/blog/archives/2021/01/26/income-and-happiness/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2101.07097"/>
	<rdf:li rdf:resource="https://www.mcgill.ca/oss/article/critical-thinking/dunning-kruger-effect-probably-not-real"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1707.06289"/>
	<rdf:li rdf:resource="https://www.npr.org/sections/ed/2018/08/27/640323347/the-school-shootings-that-werent"/>
	<rdf:li rdf:resource="https://projects.tampabay.com/projects/2020/investigations/police-pasco-sheriff-targeted/intelligence-led-policing/"/>
	<rdf:li rdf:resource="https://www.bbc.com/worklife/article/20200123-how-your-twitter-feed-could-help-find-your-dream-job"/>
	<rdf:li rdf:resource="http://nautil.us/blog/scientists-can-predict-your-job-by-your-social_media-personality"/>
	<rdf:li rdf:resource="https://www.sophieheloisebennett.com/posts/a-levels-2020/"/>
	<rdf:li rdf:resource="https://www.pnas.org/content/116/52/26459"/>
	<rdf:li rdf:resource="https://github.com/andrewkern/gdp_pgs/blob/master/araThal.ipynb"/>
	<rdf:li rdf:resource="https://unherd.com/2020/07/why-we-stopped-trusting-experts/"/>
	<rdf:li rdf:resource="https://www.nber.org/papers/w26480"/>
	<rdf:li rdf:resource="https://www-brookings-edu.cdn.ampproject.org/v/s/www.brookings.edu/blog/brown-center-chalkboard/2020/01/21/can-a-teacher-really-impact-student-height-a-cautionary-tale-on-value-added-models/amp/?usqp=mq331AQCKAE%3D&amp;amp_js_v=0.1#referrer=https%3A%2F%2Fwww.google.com&amp;amp_tf=From%20%251%24s&amp;ampshare=https%3A%2F%2Fwww.brookings.edu%2Fblog%2Fbrown-center-chalkboard%2F2020%2F01%2F21%2Fcan-a-teacher-really-impact-student-height-a-cautionary-tale-on-value-added-models%2F"/>
	<rdf:li rdf:resource="http://teaching.sociology.ul.ie/bhalpin/wordpress/?p=669"/>
	<rdf:li rdf:resource="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0235318"/>
	<rdf:li rdf:resource="https://medium.com/@jon.mummolo/prominent-claims-that-policing-is-not-racially-biased-rest-on-flawed-science-6f66535dc7e5"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2006.03895"/>
	<rdf:li rdf:resource="https://www.washingtonpost.com/opinions/2020/01/28/it-took-us-months-contest-flawed-study-police-bias-heres-why-thats-dangerous/"/>
	<rdf:li rdf:resource="https://twitter.com/WhiteHouseCEA/status/1257680258364555264"/>
	<rdf:li rdf:resource="https://theconversation.com/three-charts-that-show-where-the-coronavirus-death-rate-is-heading-137103"/>
	<rdf:li rdf:resource="https://rexdouglass.github.io/TIGR/Douglass_2020_How_To_Be_Curious_Instead_of_Contrarian_About_Covid19.nb.html"/>
	<rdf:li rdf:resource="https://journals.sagepub.com/doi/abs/10.1207/s15327957pspr0203_4"/>
	<rdf:li rdf:resource="http://models.street-artists.org/2020/01/09/nothing-to-see-here-move-along-regression-discontinuity-edition/"/>
	<rdf:li rdf:resource="https://www.nber.org/papers/w26480#fromrss"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.12475"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.06539"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.08702"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.04436"/>
	<rdf:li rdf:resource="https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1044-7"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1905.11052"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/doi/abs/10.1146/annurev-criminol-011518-024638"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/doi/abs/10.1146/annurev-soc-073117-041429"/>
	<rdf:li rdf:resource="https://arstechnica.com/cars/2019/02/in-2017-the-feds-said-tesla-autopilot-cut-crashes-40-that-was-bogus/"/>
	<rdf:li rdf:resource="http://www.pnas.org/content/108/42/E833.full"/>
	<rdf:li rdf:resource="http://www.pnas.org/content/115/10/2305"/>
	<rdf:li rdf:resource="http://dx.doi.org/10.1111/ecoj.12461"/>
	<rdf:li rdf:resource="https://scatter.wordpress.com/2014/06/10/the-hurricane-name-study-gets-worse/"/>
	<rdf:li rdf:resource="https://arstechnica.com/science/2017/04/the-peer-reviewed-saga-of-mindless-eating-mindless-research-is-bad-too/"/>
	<rdf:li rdf:resource="https://extranewsfeed.com/the-noise-miners-cffe6c14b626"/>
	<rdf:li rdf:resource="http://sloanreview.mit.edu/article/moneyball-for-professors/"/>
	<rdf:li rdf:resource="https://www.washingtonpost.com/news/monkey-cage/wp/2016/12/05/that-viral-graph-about-millennials-declining-support-for-democracy-its-very-misleading/"/>
	<rdf:li rdf:resource="https://baselinescenario.com/2016/10/17/you-cant-get-there-from-here/"/>
	<rdf:li rdf:resource="https://violentmetaphors.com/2014/05/21/nicholas-wade-and-race-building-a-scientific-facade/"/>
	<rdf:li rdf:resource="http://www.pnas.org/content/early/2016/06/27/1602413113.long"/>
	<rdf:li rdf:resource="http://arstechnica.co.uk/security/2016/02/the-nsas-skynet-program-may-be-killing-thousands-of-innocent-people/"/>
	<rdf:li rdf:resource="http://www.harrowell.org.uk/blog/2015/12/05/that-time-i-was-nearly-burned-alive-by-a-machine-learning-model-and-didnt-even-notice-for-33-years/"/>
	<rdf:li rdf:resource="https://medium.com/bull-market/digital-locability-and-interocular-trauma-973397192975"/>
	<rdf:li rdf:resource="http://www.statschat.org.nz/2015/01/20/ask-a-silly-question-get-a-silly-answer/"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1407.4240"/>
	<rdf:li rdf:resource="http://www.chicagomag.com/Chicago-Magazine/June-2014/Chicago-crime-statistics/"/>
	<rdf:li rdf:resource="http://news.sciencemag.org/biology/2014/08/ecology-explaining-less-and-less"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://arxiv.org/abs/2009.13807">
    <title>[2009.13807] Current Time Series Anomaly Detection Benchmarks are Flawed and are Creating the Illusion of Progress</title>
    <dc:date>2025-08-05T13:08:41+00:00</dc:date>
    <link>https://arxiv.org/abs/2009.13807</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Time series anomaly detection has been a perennially important topic in data science, with papers dating back to the 1950s. However, in recent years there has been an explosion of interest in this topic, much of it driven by the success of deep learning in other domains and for other time series tasks. Most of these papers test on one or more of a handful of popular benchmark datasets, created by Yahoo, Numenta, NASA, etc. In this work we make a surprising claim. The majority of the individual exemplars in these datasets suffer from one or more of four flaws. Because of these four flaws, we believe that many published comparisons of anomaly detection algorithms may be unreliable, and more importantly, much of the apparent progress in recent years may be illusionary. In addition to demonstrating these claims, with this paper we introduce the UCR Time Series Anomaly Archive. We believe that this resource will perform a similar role as the UCR Time Series Classification Archive, by providing the community with a benchmark that allows meaningful comparisons between approaches and a meaningful gauge of overall progress."]]></description>
<dc:subject>to:NB have_read time_series anomaly_detection evisceration keogh.eamonn_j. bad_data_analysis bad_science to_teach:data_over_space_and_time</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2a8c232d3c05/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:anomaly_detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:evisceration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:keogh.eamonn_j."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nber.org/papers/w33962">
    <title>Uncertainty in Empirical Economics | NBER</title>
    <dc:date>2025-07-02T18:36:14+00:00</dc:date>
    <link>https://www.nber.org/papers/w33962</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Econometricians invest substantial effort in constructing standard errors that yield valid inference under a hypothetical data-generating process. This paper asks a fundamental question: Are the uncertainty statements reported by applied researchers consistent with empirical frequencies? The short answer is no. Drawing on the forecasting literature, we predict estimates from “new” studies using estimates from corresponding baseline studies. By doing this across a large number of study groups and linking parameters through a hierarchical model, we compare stated probabilities to observed empirical frequencies. Alignment occurs only under limited external validity, namely, that the studies estimate different parameters."]]></description>
<dc:subject>to:NB bad_data_analysis econometrics calibration</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2cd96b2a0835/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:econometrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:calibration"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/journals/journal-of-economic-history/article/we-do-not-know-the-population-of-every-country-in-the-world-for-the-past-two-thousand-years/D747DDC6E499C799B0471DBE33FEB0BB">
    <title>We Do Not Know the Population of Every Country in the World for the Past Two Thousand Years | The Journal of Economic History | Cambridge Core</title>
    <dc:date>2025-03-10T14:07:54+00:00</dc:date>
    <link>https://www.cambridge.org/core/journals/journal-of-economic-history/article/we-do-not-know-the-population-of-every-country-in-the-world-for-the-past-two-thousand-years/D747DDC6E499C799B0471DBE33FEB0BB</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Economists have reported results based on populations for every country in the world for the past two thousand years. The source, McEvedy and Jones’ Atlas of World Population History, includes many estimates that are little more than guesses and that do not reflect research since 1978. McEvedy and Jones often infer population sizes from their view of a particular economy, making their estimates poor proxies for economic growth. Their rounding means their measurement error is not “classical.” Some economists augment that error by disaggregating regions in unfounded ways. Econometric results that rest on McEvedy and Jones are unreliable.
"“… we haven’t just pulled the figures out of the sky. Well, not often.”
"—McEvedy and Jones (1978, p. 11)"

--- I want to teach this to The Kids, but it simultaneously expects too much historical knowledge on their part, and would make too many of them nihilists about social science.]]></description>
<dc:subject>to:NB have_read history economic_history econometrics social_measurement bad_data_collection demography bad_data_analysis to_teach:undergrad-ADA tab_closure</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b68c7ada7248/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:history"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:economic_history"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:econometrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_collection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:demography"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:tab_closure"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2304.01315">
    <title>[2304.01315] Empirical Design in Reinforcement Learning</title>
    <dc:date>2025-01-01T15:57:49+00:00</dc:date>
    <link>https://arxiv.org/abs/2304.01315</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Empirical design in reinforcement learning is no small task. Running good experiments requires attention to detail and at times significant computational resources. While compute resources available per dollar have continued to grow rapidly, so have the scale of typical experiments in reinforcement learning. It is now common to benchmark agents with millions of parameters against dozens of tasks, each using the equivalent of 30 days of experience. The scale of these experiments often conflict with the need for proper statistical evidence, especially when comparing algorithms. Recent studies have highlighted how popular algorithms are sensitive to hyper-parameter settings and implementation details, and that common empirical practice leads to weak statistical evidence (Machado et al., 2018; Henderson et al., 2018). Here we take this one step further.
"This manuscript represents both a call to action, and a comprehensive resource for how to do good experiments in reinforcement learning. In particular, we cover: the statistical assumptions underlying common performance measures, how to properly characterize performance variation and stability, hypothesis testing, special considerations for comparing multiple agents, baseline and illustrative example construction, and how to deal with hyper-parameters and experimenter bias. Throughout we highlight common mistakes found in the literature and the statistical consequences of those in example experiments. The objective of this document is to provide answers on how we can use our unprecedented compute to do good science in reinforcement learning, as well as stay alert to potential pitfalls in our empirical design."]]></description>
<dc:subject>experimental_design reinforcement_learning bad_data_analysis in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a6dc8ef33a95/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experimental_design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:reinforcement_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://retractionwatch.com/2015/07/21/to-our-horror-widely-reported-study-suggesting-divorce-is-more-likely-when-wives-fall-ill-gets-axed/">
    <title>“To our horror”: Widely reported study suggesting divorce is more likely when wives fall ill gets axed – Retraction Watch</title>
    <dc:date>2024-12-22T03:24:34+00:00</dc:date>
    <link>https://retractionwatch.com/2015/07/21/to-our-horror-widely-reported-study-suggesting-divorce-is-more-likely-when-wives-fall-ill-gets-axed/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[The first tag is a little harsh on the authors...]]></description>
<dc:subject>bad_data_analysis to_teach</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4a289433710a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nature.com/articles/s41562-023-01749-9">
    <title>RETRACTED ARTICLE: High replicability of newly discovered social-behavioural findings is achievable | Nature Human Behaviour</title>
    <dc:date>2024-10-02T13:48:30+00:00</dc:date>
    <link>https://www.nature.com/articles/s41562-023-01749-9</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[--- The "Matters Arising" piece which led to the retraction is incredible.]]></description>
<dc:subject>via:auerbach replication_crisis science_as_a_social_process bad_science bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:141d1209c84a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:auerbach"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:replication_crisis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:science_as_a_social_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://statmodeling.stat.columbia.edu/2024/08/05/adverse-adult-research-outcomes-increased-after-increased-willingness-of-public-health-journals-to-publish-absolute-crap/">
    <title>Adverse Adult Research Outcomes Increased After Increased Willingness of Public Health Journals to Publish Absolute Crap | Statistical Modeling, Causal Inference, and Social Science</title>
    <dc:date>2024-08-20T18:23:47+00:00</dc:date>
    <link>https://statmodeling.stat.columbia.edu/2024/08/05/adverse-adult-research-outcomes-increased-after-increased-willingness-of-public-health-journals-to-publish-absolute-crap/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>debunking bad_data_analysis bad_science gelman.andrew have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cf82544c4363/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:debunking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:gelman.andrew"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2402.14583">
    <title>[2402.14583] Dataset Artefacts are the Hidden Drivers of the Declining Disruptiveness in Science</title>
    <dc:date>2024-03-05T15:03:31+00:00</dc:date>
    <link>https://arxiv.org/abs/2402.14583</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Park et al. [1] reported a decline in the disruptiveness of scientific and technological knowledge over time. Their main finding is based on the computation of CD indices, a measure of disruption in citation networks [2], across almost 45 million papers and 3.9 million patents. Due to a factual plotting mistake, database entries with zero references were omitted in the CD index distributions, hiding a large number of outliers with a maximum CD index of one, while keeping them in the analysis [1]. Our reanalysis shows that the reported decline in disruptiveness can be attributed to a relative decline of these database entries with zero references. Notably, this was not caught by the robustness checks included in the manuscript. The regression adjustment fails to control for the hidden outliers as they correspond to a discontinuity in the CD index. Proper evaluation of the Monte-Carlo simulations reveals that, because of the preservation of the hidden outliers, even random citation behaviour replicates the observed decline in disruptiveness. Finally, while these papers and patents with supposedly zero references are the hidden drivers of the reported decline, their source documents predominantly do make references, exposing them as pure dataset artefacts."

--- This looks pretty convincing, from a quick skim.]]></description>
<dc:subject>to:NB sociology_of_science bibliometry bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a4b217521fcf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology_of_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bibliometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://retractionwatch.com/2023/08/25/ex-cops-tangle-with-journals-over-strip-clubs-and-sex-crimes/">
    <title>Ex-cops tangle with journals over strip clubs and sex crimes – Retraction Watch</title>
    <dc:date>2023-09-20T17:57:15+00:00</dc:date>
    <link>https://retractionwatch.com/2023/08/25/ex-cops-tangle-with-journals-over-strip-clubs-and-sex-crimes/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Cf. [https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4528229]]]></description>
<dc:subject>social_measurement social_science_methodology crime practices_relating_to_the_transmission_of_genetic_information bad_data_analysis moskos.peter via:peter_moskos to_teach:if_i_ever_really_need_to_get_cancelled in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e6b151cb8004/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_science_methodology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:practices_relating_to_the_transmission_of_genetic_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:moskos.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:peter_moskos"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:if_i_ever_really_need_to_get_cancelled"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4528229">
    <title>Registering a Proposed Business Reduces Police Stops of Innocent People? Reconsidering the Effects of Sex Work on Sex Crimes Found in Ciacci &amp; Sviatschi’s Study of New York City by Brandon del Pozo, Peter Moskos, John Donohue, John Hall :: SSRN</title>
    <dc:date>2023-09-20T17:55:31+00:00</dc:date>
    <link>https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4528229</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Ciacci & Sviatschi’s (2021) ‘The Effect of Adult Entertainment Establishments on Sex Crime: Evidence from New York City,’ published in The Economic Journal, concluded that opening new adult entertainment businesses reduces sex crimes, with the most compelling finding that ‘[strip clubs, gentleman’s clubs, and escort services] decrease sex crime by 13% per police precinct one week after the opening.’ We contend that the study’s conclusions speak beyond the data, which cannot support these findings because they do not measure the necessary variables. The study uses the date a business is registered with New York State as a proxy for its opening date, but the actual date of opening comes weeks or months later, after requirements such as inspections, licensure, and community board approval. The study then uses police Stop, Question and Frisk Reports as data about subsequent crimes. As reports created to memorialize forcible police stops based on less than probable cause, 94% of these reports document that the police had an unfounded belief in criminal activity, and the person stopped was innocent of any crime. In effect, what the study has done is measure changes in police encounters with innocent people in the week after an entity has filed the paperwork that will eventually allow it to open as a business. The study lacks construct validity, cannot reject the null hypothesis of its most important finding, and its methods fall short of the rigor necessary to permit replication."

--- Cf. [https://retractionwatch.com/2023/08/25/ex-cops-tangle-with-journals-over-strip-clubs-and-sex-crimes/]]]></description>
<dc:subject>social_measurement social_science_methodology crime practices_relating_to_the_transmission_of_genetic_information bad_data_analysis moskos.peter via:peter_moskos to_teach:if_i_ever_really_need_to_get_cancelled in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:23a33d86df87/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_science_methodology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:practices_relating_to_the_transmission_of_genetic_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:moskos.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:peter_moskos"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:if_i_ever_really_need_to_get_cancelled"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.wired.com/story/welfare-state-algorithms/">
    <title>Inside the Suspicion Machine | WIRED</title>
    <dc:date>2023-03-21T15:43:38+00:00</dc:date>
    <link>https://www.wired.com/story/welfare-state-algorithms/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[--- Last tag for the underlying analysis.
--- The bit about coding _any_ comment from the social worker as a flag for trouble is mind-blowing, and not in a good way.]]></description>
<dc:subject>classifiers risk_assessment welfare_state algorithmic_fairness have_read to_teach:data-mining track_down_references bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:102b2cb016b2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:risk_assessment"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:welfare_state"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2203.06498">
    <title>[2203.06498] The worst of both worlds: A comparative analysis of errors in learning from data in psychology and machine learning</title>
    <dc:date>2022-03-31T23:35:44+00:00</dc:date>
    <link>https://arxiv.org/abs/2203.06498</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>data_analysis bad_data_analysis psychology data_mining gelman.andrew to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6c960a97eea4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:gelman.andrew"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://mobile.twitter.com/benmschmidt/status/1419497587296571395">
    <title>Benjamin Schmidt on Twitter: &quot;This PNAS article claiming to find a world-wide outbreak of depression since 2000 is shockingly bad. The authors don't bother to understand the 2019 Google Books &quot;corpus&quot; a tiny bit; everything they find is explained by Googl</title>
    <dc:date>2021-07-28T04:35:11+00:00</dc:date>
    <link>https://mobile.twitter.com/benmschmidt/status/1419497587296571395</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["That's not getting into the run-of-the-mill badness of believing that using the phrases 'will end' and 'will not end' transparently both somehow transparently reflect the pathology of 'catastrophizing.' The discipline of psychology will not end anytime soon--we're stuck wth that."

--- Shades of [http://bactra.org/weblog/770.html]]]></description>
<dc:subject>bad_data_analysis text_mining twitter_threads_that_should_be_blog_posts</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ec52a8789029/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter_threads_that_should_be_blog_posts"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.technologyreview.com/2021/07/07/1027916/we-tested-ai-interview-tools/?_hsmi=140522126&amp;_hsenc=p2ANqtz-8vjTOrf7wPGyBHzGvNM6HAsRT9_ivd6OAndeGlgd7q_DeKQzL8wzhIWUxeUBctrU37fL6-4CUvXLmfyBLOvUsDCnCCddi0lNRwTMkkbRP71Z9mu90">
    <title>We tested AI interview tools. Here’s what we found. | MIT Technology Review</title>
    <dc:date>2021-07-26T15:07:26+00:00</dc:date>
    <link>https://www.technologyreview.com/2021/07/07/1027916/we-tested-ai-interview-tools/?_hsmi=140522126&amp;_hsenc=p2ANqtz-8vjTOrf7wPGyBHzGvNM6HAsRT9_ivd6OAndeGlgd7q_DeKQzL8wzhIWUxeUBctrU37fL6-4CUvXLmfyBLOvUsDCnCCddi0lNRwTMkkbRP71Z9mu90</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Our candidate turned to MyInterview and repeated the experiment. She read the same Wikipedia entry aloud in German. The algorithm not only returned a personality assessment, but it also predicted our candidate to be a 73% match for the fake job, putting her in the top half of all the applicants we had asked to apply."]]></description>
<dc:subject>to_teach:data-mining utter_stupidity via:yorksranter bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7a8fd8909696/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:yorksranter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://mobile.twitter.com/HistDem/status/1395774558096039938">
    <title>Steven Ruggles on Twitter: &quot;/1. Yesterday at the ACS Data Users Conference, the Census Bureau described its plans to replace the American Community Survey (ACS) microdata with “fully synthetic” data over the next three years. https://t.co/8btLxiA3iM&quot; </title>
    <dc:date>2021-06-11T18:31:16+00:00</dc:date>
    <link>https://mobile.twitter.com/HistDem/status/1395774558096039938</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[--- As described, this seems ridiculous.  (See also [https://mobile.twitter.com/HistDem/status/1402712595707084805].)  I have always been suspicious of differential privacy [http://bactra.org/weblog/1138.html], but this would indeed be catastrophic for many, many users of Census data, _if_ it's as described.  Because it's Twitter, my figuring out whether this really is accurate would be a minor project in and of itself.  (Ruggles is a respected historian and demographer and it's implausible that he's just panicking, but in this day and age, who can say?)]]></description>
<dc:subject>census privacy data_sets bad_data_analysis twitter_threads_that_should_be_blog_posts statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1ce8f24749a5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:census"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter_threads_that_should_be_blog_posts"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2105.03948">
    <title>[2105.03948] Trustworthiness of statistical inference</title>
    <dc:date>2021-05-12T18:32:54+00:00</dc:date>
    <link>https://arxiv.org/abs/2105.03948</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We examine the role of trustworthiness and trust in statistical inference, arguing that it is the extent of trustworthiness in inferential statistical tools which enables trust in the conclusions. Certain tools, such as the p-value and significance test, have recently come under renewed criticism, with some arguing that they damage trust in statistics. We argue the contrary, beginning from the position that the central role of these methods is to form the basis for trusted conclusions in the face of uncertainty in the data, and noting that it is the misuse and misunderstanding of these tools which damages trustworthiness and hence trust. We go on to argue that recent calls to ban these tools would tackle the symptom, not the cause, and themselves risk damaging the capability of science to advance, and feeding into public suspicion of the discipline of statistics. The consequence could be aggravated mistrust of our discipline and of science more generally. In short, the very proposals could work in quite the contrary direction from that intended. We make some alternative proposals for tackling the misuse and misunderstanding of these methods, and for how trust in our discipline might be promoted."]]></description>
<dc:subject>to:NB statistics trust science_as_a_social_process bad_data_analysis hand.david_j.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c95c5506079b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trust"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:science_as_a_social_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hand.david_j."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3814191">
    <title>The Rise and Fall of 'Social Bot' Research by Florian Gallwitz, Michael Kreil :: SSRN</title>
    <dc:date>2021-04-20T13:22:14+00:00</dc:date>
    <link>https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3814191</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The idea that social media platforms like Twitter are inhabited by vast numbers of “social bots” has become widely accepted in recent years. “Social bots” are assumed to be automated social media accounts operated by malicious actors with the goal of manipulating public opinion. They are credited with the ability to produce content autonomously and to interact with human users. “Social bot” activity has been reported in many different political contexts, including Donald Trump’s election and the Brexit referendum in 2016. However, the relevant publications either use crude and questionable heuristics to discriminate between supposed “social bots” and humans or—in the vast majority of the cases—fully rely on the output of automatic bot detection tools, most commonly Botometer. We point out fundamental theoretical flaws of these approaches. Also, we closely and systematically inspected hundreds of accounts that had been counted or even presented as “social bots” in peer-reviewed studies. We were unable to find a single “social bot”. Instead, we found mostly accounts undoubtedly operated by human users, the vast majority of them using Twitter in an inconspicious and unremarkable fashion without the slightest traces of automation. We conclude that studies claiming to investigate the prevalence or influence of “social bots” have, in reality, just investigated false positives and artifacts of the flawed detection methods employed."]]></description>
<dc:subject>to:NB social_media bad_data_analysis deceiving_us_has_become_an_industrial_process via:henry_farrell to_teach:baby-nets re:actually-dr-internet-is-the-name-of-the-monsters-creator</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:13bcfcda3191/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_media"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:deceiving_us_has_become_an_industrial_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:henry_farrell"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:actually-dr-internet-is-the-name-of-the-monsters-creator"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/literalbanana/status/1380386461728522243">
    <title>Science Banana on Twitter: &quot;one last thing - if you thought preregistration and replication would save us, this one describes its own experiments as replications of each other ten times and describes itself as “pre-registered” five times&quot; / Twitter</title>
    <dc:date>2021-04-11T03:23:26+00:00</dc:date>
    <link>https://twitter.com/literalbanana/status/1380386461728522243</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>twitter_threads_that_should_be_blog_posts social_measurement bad_science text_mining bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ae18a81bf461/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter_threads_that_should_be_blog_posts"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://kieranhealy.org/blog/archives/2021/01/26/income-and-happiness/">
    <title>Income and Happiness - kieranhealy.org</title>
    <dc:date>2021-01-27T04:18:19+00:00</dc:date>
    <link>https://kieranhealy.org/blog/archives/2021/01/26/income-and-happiness/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c4c510e57206/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2101.07097">
    <title>[2101.07097] The Violating Assumptions Series: Simulated demonstrations to illustrate how assumptions can affect statistical estimates</title>
    <dc:date>2021-01-19T19:55:31+00:00</dc:date>
    <link>https://arxiv.org/abs/2101.07097</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["When teaching and discussing statistical assumptions, our focus is oftentimes placed on how to test and address potential violations rather than the effects of violating assumptions on the estimates produced by our statistical models. The latter represents a potential avenue to help us better understand the impact of researcher degrees of freedom on the statistical estimates we produce. The Violating Assumptions Series is an endeavor I have undertaken to demonstrate the effects of violating assumptions on the estimates produced across various statistical models. The series will review assumptions associated with estimating causal associations, as well as more complicated statistical models including, but not limited to, multilevel models, path models, structural equation models, and Bayesian models. In addition to the primary goal, the series of posts is designed to illustrate how simulations can be used to develop a comprehensive understanding of applied statistics."]]></description>
<dc:subject>to:NB causal_inference statistics bad_data_analysis teaching_statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5350422ee66f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:teaching_statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.mcgill.ca/oss/article/critical-thinking/dunning-kruger-effect-probably-not-real">
    <title>The Dunning-Kruger Effect Is Probably Not Real | Office for Science and Society - McGill University</title>
    <dc:date>2021-01-03T20:51:26+00:00</dc:date>
    <link>https://www.mcgill.ca/oss/article/critical-thinking/dunning-kruger-effect-probably-not-real</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[--- It's rather annoying that there was no link to the R code, but I reproduced the basics with a little bit of work (incorporated below for reference).  Setting the measurement standard deviation (s) to 1, as in my code, gives a modest but perceptible over-estimation at low percentiles and under-estimation at high percentiles; set it to 5 and marvel.



n <- 1000
s <- 1
actual.raw <- rnorm(n)
perceived.raw <- actual.raw+rnorm(n,sd=s)

buckets <- cut(actual.raw,
               breaks=quantile(actual.raw,
                               probs=c(0:4)/4))

perceived <- 100*ecdf(perceived.raw)(perceived.raw)
actual <- 100*ecdf(actual.raw)(actual.raw)


plot(x=actual, y=perceived, cex=0.1,
     xlab="Actual percentile", ylab="Perceived percentile")
points(y=aggregate(perceived~buckets,
                   FUN=mean)[,2],
       x=aggregate(actual~buckets,
                   FUN=mean)[,2],
       pch=16, col="blue")
abline(0,1, col="grey")
abline(lm(perceived~actual),col="blue")

# For contrast:
plot(x=actual.raw, y=perceived.raw, cex=0.1,
     xlab="Actual raw score", ylab="Perceived raw score")
abline(0,1, col="grey")
abline(lm(perceived.raw~actual.raw),col="blue")]]></description>
<dc:subject>debunking bad_data_analysis psychology dunning-kruger visual_display_of_quantitative_information to_teach:linear_models via:tsuomela</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:faac8efe5364/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:debunking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dunning-kruger"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:visual_display_of_quantitative_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:linear_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:tsuomela"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1707.06289">
    <title>[1707.06289] Meaningless comparisons lead to false optimism in medical machine learning</title>
    <dc:date>2020-12-13T20:21:17+00:00</dc:date>
    <link>https://arxiv.org/abs/1707.06289</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A new trend in medicine is the use of algorithms to analyze big datasets, e.g. using everything your phone measures about you for diagnostics or monitoring. However, these algorithms are commonly compared against weak baselines, which may contribute to excessive optimism. To assess how well an algorithm works, scientists typically ask how well its output correlates with medically assigned scores. Here we perform a meta-analysis to quantify how the literature evaluates their algorithms for monitoring mental wellbeing. We find that the bulk of the literature (∼77%) uses meaningless comparisons that ignore patient baseline state. For example, having an algorithm that uses phone data to diagnose mood disorders would be useful. However, it is possible to over 80% of the variance of some mood measures in the population by simply guessing that each patient has their own average mood - the patient-specific baseline. Thus, an algorithm that just predicts that our mood is like it usually is can explain the majority of variance, but is, obviously, entirely useless. Comparing to the wrong (population) baseline has a massive effect on the perceived quality of algorithms and produces baseless optimism in the field. To solve this problem we propose "user lift" that reduces these systematic errors in the evaluation of personalized medical monitoring."]]></description>
<dc:subject>to:NB bad_data_analysis prediction recht.benjamin to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0bfb6d2667fe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:recht.benjamin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.npr.org/sections/ed/2018/08/27/640323347/the-school-shootings-that-werent">
    <title>The School Shootings That Weren't : NPR</title>
    <dc:date>2020-11-29T18:57:40+00:00</dc:date>
    <link>https://www.npr.org/sections/ed/2018/08/27/640323347/the-school-shootings-that-werent</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["How many times per year does a gun go off in an American school?
"We should know. But we don't.
"This spring the U.S. Education Department reported that in the 2015-2016 school year, "nearly 240 schools ... reported at least 1 incident involving a school-related shooting." The number is far higher than most other estimates.
"But NPR reached out to every one of those schools repeatedly over the course of three months and found that more than two-thirds of these reported incidents never happened. Child Trends, a nonpartisan nonprofit research organization, assisted NPR in analyzing data from the government's Civil Rights Data Collection.
"We were able to confirm just 11 reported incidents, either directly with schools or through media reports."]]></description>
<dc:subject>track_down_references mass_shootings violence bad_data_analysis social_measurement re:statistics_of_muckers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:129a0c146def/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mass_shootings"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:violence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:statistics_of_muckers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projects.tampabay.com/projects/2020/investigations/police-pasco-sheriff-targeted/intelligence-led-policing/">
    <title>Pasco’s sheriff created a futuristic program to stop crime before it happens. It monitors and harasses families. | Investigations | Tampa Bay Times</title>
    <dc:date>2020-11-29T16:01:42+00:00</dc:date>
    <link>https://projects.tampabay.com/projects/2020/investigations/police-pasco-sheriff-targeted/intelligence-led-policing/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Pasco County Sheriff Chris Nocco took office in 2011 with a bold plan: to create a cutting-edge intelligence program that could stop crime before it happened.
"What he actually built was a system to continuously monitor and harass Pasco County residents, a Tampa Bay Times investigation has found.
"First the Sheriff’s Office generates lists of people it considers likely to break the law, based on arrest histories, unspecified intelligence and arbitrary decisions by police analysts.
"Then it sends deputies to find and interrogate anyone whose name appears, often without probable cause, a search warrant or evidence of a specific crime.
"They swarm homes in the middle of the night, waking families and embarrassing people in front of their neighbors. They write tickets for missing mailbox numbers and overgrown grass, saddling residents with court dates and fines. They come again and again, making arrests for any reason they can.
"One former deputy described the directive like this: “Make their lives miserable until they move or sue.”"

]]></description>
<dc:subject>to_teach:data-mining algorithmic_fairness crime police bad_data_analysis have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0dcb142cdda2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:police"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.bbc.com/worklife/article/20200123-how-your-twitter-feed-could-help-find-your-dream-job">
    <title>How your Twitter feed could help find your dream job - BBC Worklife</title>
    <dc:date>2020-11-27T06:15:01+00:00</dc:date>
    <link>https://www.bbc.com/worklife/article/20200123-how-your-twitter-feed-could-help-find-your-dream-job</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>twitter text_mining to_teach:data-mining bad_data_analysis to:blog trapped_in_plutos_republic re:career_advising_in_plutos_republic bad_science_journalism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b003b2327e6c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:career_advising_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science_journalism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nautil.us/blog/scientists-can-predict-your-job-by-your-social_media-personality">
    <title>Twitter Can Help You Match Your Personality to a Career</title>
    <dc:date>2020-11-27T06:14:40+00:00</dc:date>
    <link>http://nautil.us/blog/scientists-can-predict-your-job-by-your-social_media-personality</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>text_mining twitter bad_data_analysis to_teach:data-mining to:blog trapped_in_plutos_republic re:career_advising_in_plutos_republic bad_science_journalism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f1c8a863e7dd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:career_advising_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science_journalism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sophieheloisebennett.com/posts/a-levels-2020/">
    <title>On A Levels, Ofqual and Algorithms · Sophie Bennett</title>
    <dc:date>2020-08-24T16:02:43+00:00</dc:date>
    <link>https://www.sophieheloisebennett.com/posts/a-levels-2020/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>to_teach:data-mining education standardized_testing statistics bad_data_analysis prediction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:78edd9d547bb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:education"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:standardized_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/content/116/52/26459">
    <title>Social media-predicted personality traits and values can help match people to their ideal jobs | PNAS</title>
    <dc:date>2020-07-16T15:49:42+00:00</dc:date>
    <link>https://www.pnas.org/content/116/52/26459</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Work is thought to be more enjoyable and beneficial to individuals and society when there is congruence between one’s personality and one’s occupation. We provide large-scale evidence that occupations have distinctive psychological profiles, which can successfully be predicted from linguistic information unobtrusively collected through social media. Based on 128,279 Twitter users representing 3,513 occupations, we automatically assess user personalities and visually map the personality profiles of different professions. Similar occupations cluster together, pointing to specific sets of jobs that one might be well suited for. Observations that contradict existing classifications may point to emerging occupations relevant to the 21st century workplace. Findings illustrate how social media can be used to match people to their ideal occupation."

--- Some observations:
1. They did not actually measure people's personality traits; they _assumed_ that a commercial IBM product can map word usage to personality traits.
1a. In particular, they _assumed_ that this remains accurate for what people write on Twitter, as opposed to whatever context IBM developed their system in (not specified here).
2. They did not actually measure "ideal" occupations; they saw whether a classifier using the estimated personality traits could map people to their actual occupations.
2a. They artificially balance their 10 professions so that each has 955 members.  (I presume that they randomly sampled the occupations with more members, though I don't quite see them saying that; maybe I missed it.  Also, I presume they did _not_ go hunting for the best group of 10 occupations.)  So the baseline accuracy would be only 10%, and getting about 70% under CV does indeed mean that there's some signal here.
2b. It's good that they include error bars on their accuracy figures!
2c.  Since they include those error bars, we can see that the difference in classification accuracy between the different methods are both small and statistically insignificant.  In particular, good old fashioned logistic regression is pretty much on par with everything else.
2d. They don't seem to have actually tried the obvious classifier here, which would map each person to the occupation whose feature-vector center ("medoid") was closest to the person's feature-vector ("prototype method").  But they did at least use k-nearest-neighbors, which performed about as well as all the others.
3. Calling this evidence that we could go from analyzing Twitter word usage to "ideal" job recommendations presumes that most people are _already_ in their ideal jobs.
4. This was edited by Susan Fiske [https://statmodeling.stat.columbia.edu/2017/02/08/authority-figures-spread-happy-talk-still-dont-get-it/].

_Maybe_ people reveal their personalities, in the Big 5 sense, by what they write on Twitter.  (Operationally, "personality" in the Big 5 sense is pretty close to "what words would you use to describe yourself on a questionnaire?")  And _maybe_ the way people reveal their personalities in their word usage on Twitter is so context-independent that it can reliably generalize across all the different sub-cultures and sub-societies and self-organized genre conventions of Twitter, so there is one globally reliable mapping.  (I am not going to repeat all of [http://bactra.org/weblog/770.html], but I could.)  And _maybe_ IBM has provided that mapping with an API.  And _maybe_ people with different personalities select in to different professions.  (As an alternative: different occupations train people differently, which alters their personalities, or at least the verbal expressions thereof, and different occupations expose people to different situations, which alters what they say and maybe even shapes their personalities.)  And _maybe_ people select in to professions where they are happier.  And _maybe_ if we looked at how young people talk on Twitter, before they've chosen an occupation, and extract their personality from it, and map them to a profession with lots of similar personality vectors already in it, they'll be happier in that occupation than in others.  But this study provides at best very, very weak evidence for all this.  (I want to say "no evidence at all", but I also don't want to get into arguments about the theory of evidence.)  What the study does show is that people in different occupations use different words on Twitter, and that these differences are detectable through the filter of IBM's purported personality estimator.

]]></description>
<dc:subject>to:NB have_read bad_science bad_data_analysis classifiers text_mining personality_tests logistic_regression social_media psychology why_oh_why_cant_we_have_a_better_academic_publishing_system to_teach:data-mining forty_minutes_of_my_life_im_not_getting_back trapped_in_plutos_republic to:blog twitter re:career_advising_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:780cca65f6d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:personality_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:logistic_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_media"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:forty_minutes_of_my_life_im_not_getting_back"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:career_advising_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/andrewkern/gdp_pgs/blob/master/araThal.ipynb">
    <title>Polygenic Score prediction of Gross Domestic Product using Arabidopsis thaliana GWAS</title>
    <dc:date>2020-07-15T15:02:46+00:00</dc:date>
    <link>https://github.com/andrewkern/gdp_pgs/blob/master/araThal.ipynb</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This was born out of an undergraduate course on Computational Genetics, in which I teach about GWAS. As GWAS is a technology that is built on correlative relations I like to try to do bad things with it. As always-- correlation does not equal causation. Recently the use of Polygenic scores (PGS) has become a popular way of doing phenotypic prediction from genotype data. PGS, at their most basic, are a sum of GWAS estimated effect sizes for an individual's genotype.
"In this notebook I develop a PGS predictor of Gross Domestic Product (GDP) of the country of origin of Arabidopsis thaliana accessions. I do this by treating GDP as a phenotype and then performing GWAS for that phenotype using a set of Arabidopsis genomes. I will make no attempt to correct for population structure at first-- this is the point of this excercise. All of the data for this comes from the 1001 Genomes dataset, its associated metadata, and GDP data that I gleaned from here."

--- Note that even after "correcting" for the top 10 principal components of genetic variation, he gets an out-of-sample R^2 of 0.14.  
(To check: were the held-out samples accidentally included in calculating the PCs?)  Hence the last tag.]]></description>
<dc:subject>bad_data_analysis deliberately_bad_data_analysis deliberately_bad_data_analysis_indistinguishable_from_papers_in_science_and_nature genetics statistics regression economics development_economics funny:geeky funny:malicious funny:pointed to_teach:linear_models gwas galtons_problem_rules_everything_around_me</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b2089e8a8b65/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:deliberately_bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:deliberately_bad_data_analysis_indistinguishable_from_papers_in_science_and_nature"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:genetics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:development_economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:funny:geeky"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:funny:malicious"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:funny:pointed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:linear_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:gwas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:galtons_problem_rules_everything_around_me"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://unherd.com/2020/07/why-we-stopped-trusting-experts/">
    <title>Why we stopped trusting 'experts' - UnHerd</title>
    <dc:date>2020-07-15T14:40:32+00:00</dc:date>
    <link>https://unherd.com/2020/07/why-we-stopped-trusting-experts/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Why might a published result fail to replicate — that is, why might an academic journal publish a result that’s ultimately false? Ritchie mentions four underlying causes: fraud, bias, negligence and hype."

--- I presume (?) Ritchie also considers "the effect is real, but requires additional conditions not specified, or even realized, by the original paper", i.e., Yarkoni's "generalizability crisis".]]></description>
<dc:subject>book_reviews science_as_a_social_process bad_data_analysis why_oh_why_cant_we_have_a_better_academic_publishing_system why_oh_why_cant_we_have_a_better_intelligentsia re:anti-nudging</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3addc5f340c0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:book_reviews"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:science_as_a_social_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_intelligentsia"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:anti-nudging"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nber.org/papers/w26480">
    <title>Teacher Effects on Student Achievement and Height: A Cautionary Tale</title>
    <dc:date>2020-07-13T18:23:23+00:00</dc:date>
    <link>https://www.nber.org/papers/w26480</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Estimates of teacher “value-added” suggest teachers vary substantially in their ability to promote student learning. Prompted by this finding, many states and school districts have adopted value-added measures as indicators of teacher job performance. In this paper, we conduct a new test of the validity of value-added models. Using administrative student data from New York City, we apply commonly estimated value-added models to an outcome teachers cannot plausibly affect: student height. We find the standard deviation of teacher effects on height is nearly as large as that for math and reading achievement, raising obvious questions about validity. Subsequent analysis finds these “effects” are largely spurious variation (noise), rather than bias resulting from sorting on unobserved factors related to achievement. Given the difficulty of differentiating signal from noise in real-world teacher effect estimates, this paper serves as a cautionary tale for their use in practice."]]></description>
<dc:subject>to:NB value-added_measures statistics econometrics bad_data_analysis have_skimmed trapped_in_plutos_republic value-added_measurement_in_education</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:23cf195172d5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:value-added_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:econometrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:value-added_measurement_in_education"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www-brookings-edu.cdn.ampproject.org/v/s/www.brookings.edu/blog/brown-center-chalkboard/2020/01/21/can-a-teacher-really-impact-student-height-a-cautionary-tale-on-value-added-models/amp/?usqp=mq331AQCKAE%3D&amp;amp_js_v=0.1#referrer=https%3A%2F%2Fwww.google.com&amp;amp_tf=From%20%251%24s&amp;ampshare=https%3A%2F%2Fwww.brookings.edu%2Fblog%2Fbrown-center-chalkboard%2F2020%2F01%2F21%2Fcan-a-teacher-really-impact-student-height-a-cautionary-tale-on-value-added-models%2F">
    <title>Can a teacher really impact student height? A cautionary tale on value-added models</title>
    <dc:date>2020-07-13T18:20:46+00:00</dc:date>
    <link>https://www-brookings-edu.cdn.ampproject.org/v/s/www.brookings.edu/blog/brown-center-chalkboard/2020/01/21/can-a-teacher-really-impact-student-height-a-cautionary-tale-on-value-added-models/amp/?usqp=mq331AQCKAE%3D&amp;amp_js_v=0.1#referrer=https%3A%2F%2Fwww.google.com&amp;amp_tf=From%20%251%24s&amp;ampshare=https%3A%2F%2Fwww.brookings.edu%2Fblog%2Fbrown-center-chalkboard%2F2020%2F01%2F21%2Fcan-a-teacher-really-impact-student-height-a-cautionary-tale-on-value-added-models%2F</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>bad_data_analysis value-added_measures track_down_references value-added_measurement_in_education</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:898643235592/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:value-added_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:value-added_measurement_in_education"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://teaching.sociology.ul.ie/bhalpin/wordpress/?p=669">
    <title>Correlations, smoothed time-series and sewage sludge | Sociology, Statistics and Software</title>
    <dc:date>2020-07-13T17:59:50+00:00</dc:date>
    <link>http://teaching.sociology.ul.ie/bhalpin/wordpress/?p=669</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Dr. Yule, Dr. Slutsky, please call your offices.]]></description>
<dc:subject>epidemiology coronavirus_pandemic_of_2019-- bad_data_analysis time_series to_teach:data_over_space_and_time via:?</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:838f89b04c2d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:epidemiology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:coronavirus_pandemic_of_2019--"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0235318">
    <title>Systematic review of the use of “magnitude-based inference” in sports science and medicine</title>
    <dc:date>2020-07-13T16:43:55+00:00</dc:date>
    <link>https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0235318</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Magnitude-based inference (MBI) is a controversial statistical method that has been used in hundreds of papers in sports science despite criticism from statisticians. To better understand how this method has been applied in practice, we systematically reviewed 232 papers that used MBI. We extracted data on study design, sample size, and choice of MBI settings and parameters. Median sample size was 10 per group (interquartile range, IQR: 8–15) for multi-group studies and 14 (IQR: 10–24) for single-group studies; few studies reported a priori sample size calculations (15%). Authors predominantly applied MBI’s default settings and chose “mechanistic/non-clinical” rather than “clinical” MBI even when testing clinical interventions (only 16 studies out of 232 used clinical MBI). Using these data, we can estimate the Type I error rates for the typical MBI study. Authors frequently made dichotomous claims about effects based on the MBI criterion of a “likely” effect and sometimes based on the MBI criterion of a “possible” effect. When the sample size is n = 8 to 15 per group, these inferences have Type I error rates of 12%-22% and 22%-45%, respectively. High Type I error rates were compounded by multiple testing: Authors reported results from a median of 30 tests related to outcomes; and few studies specified a primary outcome (14%). We conclude that MBI has promoted small studies, promulgated a “black box” approach to statistics, and led to numerous papers where the conclusions are not supported by the data. Amidst debates over the role of p-values and significance testing in science, MBI also provides an important natural experiment: we find no evidence that moving researchers away from p-values or null hypothesis significance testing makes them less prone to dichotomization or over-interpretation of findings."

--- I hadn't heard of this particular little cult, but sheesh.  (The last sentence of the abstract it the key.)]]></description>
<dc:subject>to:NB have_read bad_data_analysis statistics why_oh_why_cant_we_have_a_better_academic_publishing_system hypothesis_testing estimation trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:47bc87dbe9e7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/@jon.mummolo/prominent-claims-that-policing-is-not-racially-biased-rest-on-flawed-science-6f66535dc7e5">
    <title>Prominent Claims that Policing is Not Racially Biased Rest on Flawed Science | by Jonathan Mummolo | Jul, 2020 | Medium</title>
    <dc:date>2020-07-12T22:01:23+00:00</dc:date>
    <link>https://medium.com/@jon.mummolo/prominent-claims-that-policing-is-not-racially-biased-rest-on-flawed-science-6f66535dc7e5</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[I am very happy to be a co-signer.

(I think anyone who honestly cares about doing careful science to inform public debates, or even just avoiding elementary errors in quantitative reasoning, could also endorse this, whatever their politics.)
(And, because we've apparently collectively regressed to early adolescence: I'm sure that some of the 800-odd other co-signers endorse views I'd regard as harmful nonsense.  So what?)]]></description>
<dc:subject>police bad_data_analysis why_oh_why_cant_we_have_a_better_academic_publishing_system trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3fc7f07cd13f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:police"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2006.03895">
    <title>[2006.03895] The Criminality From Face Illusion</title>
    <dc:date>2020-06-19T17:30:25+00:00</dc:date>
    <link>https://arxiv.org/abs/2006.03895</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The automatic analysis of face images can generate predictions about a person's gender, age, race, facial expression, body mass index, and various other indices and conditions. A few recent publications have claimed success in analyzing an image of a person's face in order to predict the person's status as Criminal / Non-Criminal. Predicting criminality from face may initially seem similar to other facial analytics, but we argue that attempts to create a criminality-from-face algorithm are necessarily doomed to fail, that apparently promising experimental results in recent publications are an illusion resulting from inadequate experimental design, and that there is potentially a large social cost to belief in the criminality from face illusion."]]></description>
<dc:subject>to:NB to_read prediction crime classifiers bad_data_analysis to_teach:data-mining to_teach:statistics_of_inequality_and_discrimination via:yorksranter trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:34b1929c5c7c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:yorksranter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.washingtonpost.com/opinions/2020/01/28/it-took-us-months-contest-flawed-study-police-bias-heres-why-thats-dangerous/">
    <title>It took us months to contest a flawed study on police bias. Here’s why that’s dangerous. - The Washington Post</title>
    <dc:date>2020-06-12T19:25:46+00:00</dc:date>
    <link>https://www.washingtonpost.com/opinions/2020/01/28/it-took-us-months-contest-flawed-study-police-bias-heres-why-thats-dangerous/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>racism bad_data_analysis to_teach:statistics_of_inequality_and_discrimination police why_oh_why_cant_we_have_a_better_academic_publishing_system trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:47370d13a63a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:racism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statistics_of_inequality_and_discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:police"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/WhiteHouseCEA/status/1257680258364555264">
    <title>CEA on Twitter: &quot;To better visualize observed data, we also continually update a curve-fitting exercise to summarize COVID-19's observed trajectory. Particularly with irregular data, curve fitting can improve data visualization. As shown, IHME's mortality</title>
    <dc:date>2020-05-05T18:17:46+00:00</dc:date>
    <link>https://twitter.com/WhiteHouseCEA/status/1257680258364555264</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Saving this for "how not to fit and extrapolate trends" next time I teach time series.
(And, yes, this appears to be the legit Council of Economic Advisers, not some parody or high-jacker.)
]]></description>
<dc:subject>coronavirus_pandemic_of_2019-- time_series prediction utter_stupidity bad_data_analysis to_teach:data_over_space_and_time our_decrepit_institutions blogged</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:938ec650f993/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:coronavirus_pandemic_of_2019--"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:our_decrepit_institutions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:blogged"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://theconversation.com/three-charts-that-show-where-the-coronavirus-death-rate-is-heading-137103">
    <title>Three charts that show where the coronavirus death rate is heading</title>
    <dc:date>2020-04-27T17:34:51+00:00</dc:date>
    <link>https://theconversation.com/three-charts-that-show-where-the-coronavirus-death-rate-is-heading-137103</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[On the one hand: Figure 2 is really one of the worst statistical graphics I've ever seen, and I am bookmarking it largely to offer as comic relief the next time I teach spatio-temporal statistics.  (The horizontal axis is the first derivative of the vertical axis; everything here would be conveyed by a simple plot of quantity vs. time, or at most of 2nd derivative of quantity vs. time.)  _Of course_ it was invented by a graphic designer trying to pretty up the author's Excel charts.
On the other hand: this is the co-author of a co-author.  There but for the grace of God, etc.]]></description>
<dc:subject>visual_display_of_quantitative_information bad_data_analysis to_teach:data_over_space_and_time trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ea86d8082f67/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:visual_display_of_quantitative_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://rexdouglass.github.io/TIGR/Douglass_2020_How_To_Be_Curious_Instead_of_Contrarian_About_Covid19.nb.html">
    <title>How to be Curious Instead of Contrarian About COVID-19: Eight Data Science Lessons From ‘Coronavirus Perspective’ (Epstein 2020)</title>
    <dc:date>2020-04-01T03:02:23+00:00</dc:date>
    <link>https://rexdouglass.github.io/TIGR/Douglass_2020_How_To_Be_Curious_Instead_of_Contrarian_About_Covid19.nb.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["It is an order of magnitude less effort to spam poorly constructed hypotheticals than it is to deconstruct them. This review took a substantial amount of time, and in the meantime the original piece was poorly revised, several interviews and a podcast were released, and a second post trying to cover for the first went live.15 More will no doubt soon continue to move the goal posts and argument. In a world where actual life or death policy analysis is being treated like a high school debate round, the only strategic move is to step back, slow down, and draw methodological lessons for our students and colleagues that will apply to a broad set of current and future analyses."]]></description>
<dc:subject>evisceration bad_data_analysis epidemiology epstein.richard utter_stupidity coronavirus_pandemic_of_2019-- have_read anti-contrarianism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:96a3fdc53067/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:evisceration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:epidemiology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:epstein.richard"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:coronavirus_pandemic_of_2019--"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:anti-contrarianism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.sagepub.com/doi/abs/10.1207/s15327957pspr0203_4">
    <title>HARKing: Hypothesizing After the Results are Known - Norbert L. Kerr, 1998</title>
    <dc:date>2020-01-23T21:28:33+00:00</dc:date>
    <link>https://journals.sagepub.com/doi/abs/10.1207/s15327957pspr0203_4</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This article considers a practice in scientific communication termed HARKing (Hypothesizing After the Results are Known). HARKing is defined as presenting a post hoc hypothesis (i.e., one based on or informed by one's results) in one's research report as if it were, in fact, an a priori hypotheses. Several forms of HARKing are identified and survey data are presented that suggests that at least some forms of HARKing are widely practiced and widely seen as inappropriate. I identify several reasons why scientists might HARK. Then I discuss several reasons why scientists ought not to HARK. It is conceded that the question of whether HARKing's costs exceed its benefits is a complex one that ought to be addressed through research, open discussion, and debate. To help stimulate such discussion (and for those such as myself who suspect that HARKing's costs do exceed its benefits), I conclude the article with some suggestions for deterring HARKing."

--- As my mother used to say: First draw your curve, then plot your data.]]></description>
<dc:subject>to:NB to_read bad_data_analysis methodological_advice to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ecc59190e01e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:methodological_advice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://models.street-artists.org/2020/01/09/nothing-to-see-here-move-along-regression-discontinuity-edition/">
    <title>Nothing to see here… move along (regression discontinuity edition) | Models Of Reality</title>
    <dc:date>2020-01-12T22:06:57+00:00</dc:date>
    <link>http://models.street-artists.org/2020/01/09/nothing-to-see-here-move-along-regression-discontinuity-edition/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Interesting, but surely this just speaks to the magnitude of the jump one should expect to see under the null?  I.e., if one got the null distribution sensibly, by simulation, shouldn't this effect be incorporated?]]></description>
<dc:subject>approximation causal_inference bad_data_analysis statistics via:gelman regression regression_discontinuity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d70c2ae82bc4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:gelman"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression_discontinuity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nber.org/papers/w26480#fromrss">
    <title>Teacher Effects on Student Achievement and Height: A Cautionary Tale</title>
    <dc:date>2019-11-25T17:07:37+00:00</dc:date>
    <link>https://www.nber.org/papers/w26480#fromrss</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Estimates of teacher “value-added” suggest teachers vary substantially in their ability to promote student learning. Prompted by this finding, many states and school districts have adopted value-added measures as indicators of teacher job performance. In this paper, we conduct a new test of the validity of value-added models. Using administrative student data from New York City, we apply commonly estimated value-added models to an outcome teachers cannot plausibly affect: student height. We find the standard deviation of teacher effects on height is nearly as large as that for math and reading achievement, raising obvious questions about validity. Subsequent analysis finds these “effects” are largely spurious variation (noise), rather than bias resulting from sorting on unobserved factors related to achievement. Given the difficulty of differentiating signal from noise in real-world teacher effect estimates, this paper serves as a cautionary tale for their use in practice."]]></description>
<dc:subject>to:NB value-added_measures statistics education social_measurement bad_data_analysis value-added_measurement_in_education economistic_imperialism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8a4d2fd4b050/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:value-added_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:education"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:value-added_measurement_in_education"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:economistic_imperialism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.12475">
    <title>[1909.12475] Hidden Stratification Causes Clinically Meaningful Failures in Machine Learning for Medical Imaging</title>
    <dc:date>2019-10-01T17:19:57+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.12475</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Machine learning models for medical image analysis often suffer from poor performance on important subsets of a population that are not identified during training or testing. For example, overall performance of a cancer detection model may be high, but the model still consistently misses a rare but aggressive cancer subtype. We refer to this problem as hidden stratification, and observe that it results from incompletely describing the meaningful variation in a dataset. While hidden stratification can substantially reduce the clinical efficacy of machine learning models, its effects remain difficult to measure. In this work, we assess the utility of several possible techniques for measuring and describing hidden stratification effects, and characterize these effects both on multiple medical imaging datasets and via synthetic experiments on the well-characterised CIFAR-100 benchmark dataset. We find evidence that hidden stratification can occur in unidentified imaging subsets with low prevalence, low label quality, subtle distinguishing features, or spurious correlates, and that it can result in relative performance differences of over 20% on clinically important subsets. Finally, we explore the clinical implications of our findings, and suggest that evaluation of hidden stratification should be a critical component of any machine learning deployment in medical imaging."]]></description>
<dc:subject>to:NB classifiers data_mining prediction bad_data_analysis statistics to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:48588e6ab9e2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.06539">
    <title>[1909.06539] Not again! Data Leakage in Digital Pathology</title>
    <dc:date>2019-10-01T17:12:30+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.06539</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Bioinformatics of high throughput omics data (e.g. microarrays and proteomics) has been plagued by uncountable issues with reproducibility at the start of the century. Concerns have motivated international initiatives such as the FDA's led MAQC Consortium, addressing reproducibility of predictive biomarkers by means of appropriate Data Analysis Plans (DAPs). For instance, repreated cross-validation is a standard procedure meant at mitigating the risk that information from held-out validation data may be used during model selection. We prove here that, many years later, Data Leakage can still be a non-negligible overfitting source in deep learning models for digital pathology. In particular, we evaluate the impact of (i) the presence of multiple images for each subject in histology collections; (ii) the systematic adoption of training over collection of subregions (i.e. "tiles" or "patches") extracted for the same subject. We verify that accuracy scores may be inflated up to 41%, even if a well-designed 10x5 iterated cross-validation DAP is applied, unless all images from the same subject are kept together either in the internal training or validation splits. Results are replicated for 4 classification tasks in digital pathology on 3 datasets, for a total of 373 subjects, and 543 total slides (around 27, 000 tiles). Impact of applying transfer learning strategies with models pre-trained on general-purpose or digital pathology datasets is also discussed."]]></description>
<dc:subject>to:NB cross-validation statistics bad_data_analysis to_teach:undergrad-ADA to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:55f36f7dc31d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.08702">
    <title>[1908.08702] Economically rational sample-size choice and irreproducibility</title>
    <dc:date>2019-09-12T15:03:55+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.08702</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Several systematic studies have suggested that a large fraction of published research is not reproducible. One probable reason for low reproducibility is insufficient sample size, resulting in low power and low positive predictive value. It has been suggested that insufficient sample-size choice is driven by a combination of scientific competition and 'positive publication bias'. Here we formalize this intuition in a simple model, in which scientists choose economically rational sample sizes, balancing the cost of experimentation with income from publication. Specifically, assuming that a scientist's income derives only from 'positive' findings (positive publication bias) and that individual samples cost a fixed amount, allows to leverage basic statistical formulas into an economic optimality prediction. We find that if effects have i) low base probability, ii) small effect size or iii) low grant income per publication, then the rational (economically optimal) sample size is small. Furthermore, for plausible distributions of these parameters we find a robust emergence of a bimodal distribution of obtained statistical power and low overall reproducibility rates, matching empirical findings. Overall, the model describes a simple mechanism explaining both the prevalence and the persistence of small sample sizes. It suggests economic rationality, or economic pressures, as a principal driver of irreproducibility."

--- To be clear, my skepticism here isn't about the basic idea, which has been articulated about a zillion times (back to Meehl at least...), but rather whether mathing it up with dubious simplifying assumptions adds anything of value.]]></description>
<dc:subject>to:NB bad_data_analysis statistics sociology_of_science economics color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:94a29c313297/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology_of_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.04436">
    <title>[1909.04436] The Prevalence of Errors in Machine Learning Experiments</title>
    <dc:date>2019-09-12T15:01:54+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.04436</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Context: Conducting experiments is central to research machine learning research to benchmark, evaluate and compare learning algorithms. Consequently it is important we conduct reliable, trustworthy experiments. Objective: We investigate the incidence of errors in a sample of machine learning experiments in the domain of software defect prediction. Our focus is simple arithmetical and statistical errors. Method: We analyse 49 papers describing 2456 individual experimental results from a previously undertaken systematic review comparing supervised and unsupervised defect prediction classifiers. We extract the confusion matrices and test for relevant constraints, e.g., the marginal probabilities must sum to one. We also check for multiple statistical significance testing errors. Results: We find that a total of 22 out of 49 papers contain demonstrable errors. Of these 7 were statistical and 16 related to confusion matrix inconsistency (one paper contained both classes of error). Conclusions: Whilst some errors may be of a relatively trivial nature, e.g., transcription errors their presence does not engender confidence. We strongly urge researchers to follow open science principles so errors can be more easily be detected and corrected, thus as a community reduce this worryingly high error rate with our computational experiments."]]></description>
<dc:subject>to:NB bad_data_analysis machine_learning to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c50569f7f6f8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1044-7">
    <title>Gene name errors are widespread in the scientific literature | Genome Biology | Full Text</title>
    <dc:date>2019-09-12T12:43:14+00:00</dc:date>
    <link>https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-1044-7</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>bad_data_analysis genetics excel_considered_harmful</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b296b5af427c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:genetics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:excel_considered_harmful"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1905.11052">
    <title>[1905.11052] Does the $h_α$ index reinforce the Matthew effect in science? Agent-based simulations using Stata and R</title>
    <dc:date>2019-05-28T16:51:36+00:00</dc:date>
    <link>https://arxiv.org/abs/1905.11052</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Recently, Hirsch (2019a) proposed a new variant of the h index called the hα index. He formulated as follows: "we define the hα index of a scientist as the number of papers in the h-core of the scientist (i.e. the set of papers that contribute to the h-index of the scientist) where this scientist is the α-author" (p. 673). The hα index was criticized by Leydesdorff, Bornmann, and Opthof (2019). One of their most important points is that the index reinforces the Matthew effect in science. We address this point in the current study using a recently developed Stata command (h_index) and R package (hindex), which can be used to simulate h index and hαindex applications in research evaluation. The user can investigate under which conditions hα reinforces the Matthew effect. The results of our study confirm what Leydesdorff et al. (2019) expected: the hα index reinforces the Matthew effect. This effect can be intensified if strategic behavior of the publishing scientists and cumulative advantage effects are additionally considered in the simulation."]]></description>
<dc:subject>to:NB bibliometry bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d00a3643f600/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bibliometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/doi/abs/10.1146/annurev-criminol-011518-024638">
    <title>Looking Through Broken Windows: The Impact of Neighborhood Disorder on Aggression and Fear of Crime Is an Artifact of Research Design | Annual Review of Criminology</title>
    <dc:date>2019-05-26T18:00:09+00:00</dc:date>
    <link>https://www.annualreviews.org/doi/abs/10.1146/annurev-criminol-011518-024638</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Broken windows theory (BWT) has heavily influenced social science and policy over the past 30 years. It posits that disorder in neighborhoods leads to elevated crime by inviting additional criminal activity and by discouraging the positive social behavior that prevents crime. Scholars have debated the veracity of BWT, and here we conduct a meta-analysis of 96 studies to examine the effects of disorder on residents’ (a) general proclivities for aggressive behavior and (b) perceptions of and attitudes toward their neighborhood (e.g., fear of crime), with particular attention to aspects of research design that might confound causal inference. We found no consistent evidence that disorder induces greater aggression or more negative attitudes toward the neighborhood. Studies that found such effects disproportionately utilized weaker research designs that omit key correlates or confound perceptions of disorder with other neighborhood attitudes. We explore implications for theory, research, and policy."]]></description>
<dc:subject>to:NB bad_data_analysis crime sociology broken_windows</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3810cb68d629/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:broken_windows"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/doi/abs/10.1146/annurev-soc-073117-041429">
    <title>Interpreting and Understanding Logits, Probits, and Other Nonlinear Probability Models | Annual Review of Sociology</title>
    <dc:date>2019-05-26T17:57:53+00:00</dc:date>
    <link>https://www.annualreviews.org/doi/abs/10.1146/annurev-soc-073117-041429</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Methods textbooks in sociology and other social sciences routinely recommend the use of the logit or probit model when an outcome variable is binary, an ordered logit or ordered probit when it is ordinal, and a multinomial logit when it has more than two categories. But these methodological guidelines take little or no account of a body of work that, over the past 30 years, has pointed to problematic aspects of these nonlinear probability models and, particularly, to difficulties in interpreting their parameters. In this review, we draw on that literature to explain the problems, show how they manifest themselves in research, discuss the strengths and weaknesses of alternatives that have been suggested, and point to lines of further analysis."]]></description>
<dc:subject>to:NB statistics classifiers bad_data_analysis to_teach:undergrad-ADA</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:fca0788514e6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arstechnica.com/cars/2019/02/in-2017-the-feds-said-tesla-autopilot-cut-crashes-40-that-was-bogus/">
    <title>In 2017, the feds said Tesla Autopilot cut crashes 40%—that was bogus | Ars Technica</title>
    <dc:date>2019-02-14T17:36:01+00:00</dc:date>
    <link>https://arstechnica.com/cars/2019/02/in-2017-the-feds-said-tesla-autopilot-cut-crashes-40-that-was-bogus/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Unfortunately, the mistake here is so bald that it'd be hard to turn into a good teaching example.]]></description>
<dc:subject>bad_data_analysis to_teach driverless_cars</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:48b7a848a430/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:driverless_cars"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.pnas.org/content/108/42/E833.full">
    <title>Overlooked factors in the analysis of parole decisions | PNAS</title>
    <dc:date>2018-09-26T15:53:14+00:00</dc:date>
    <link>http://www.pnas.org/content/108/42/E833.full</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Danziger et al. (1) concluded that meal breaks taken by Israeli parole boards influence the boards’ decisions. This conclusion depends on the order of cases being random or at least exogenous to the timing of meal breaks. We examined data provided by the authors and obtained additional data from 12 hearing days (n = 227 decisions).* We also interviewed three attorneys, a parole panel judge, and five personnel at Israeli Prison Services and Court Management, learning that case ordering is not random and that several factors contribute to the downward trend in prisoner success between meal breaks. The most important is that the board tries to complete all cases from one prison before it takes a break and to start with another prison after the break. Within each session, unrepresented prisoners usually go last and are less likely to be granted parole than prisoners with attorneys. Using the same decision rules as Danziger et al., our data indicate that unrepresented prisoners account for about one-third of all cases, but they prevail only 15% of the time, whereas prisoners with counsel prevail at a 35% rate.
"This nonrandom order of cases might have become apparent had the authors not limited their analysis. They lumped together decisions rejecting parole and cases that were deferred to a later date. Theoretically and in practice, deferrals are not comparable to rejections of parole.
"Excluding these deferred cases, our data indicate a success rate of 67% for prisoners with counsel and 39% for unrepresented prisoners. Excluding deferrals in the authors' data yields very similar success rates, beginning at about 75% and dropping to 42% at the end of a session. Thus, we strongly suspect that the pattern of declining success rates is a result of hearing represented prisoners first and unrepresented prisoners last...."]]></description>
<dc:subject>psychology via:? bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7c616af63bbe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.pnas.org/content/115/10/2305">
    <title>Rainfall statistics, stationarity, and climate change | PNAS</title>
    <dc:date>2018-05-05T14:38:03+00:00</dc:date>
    <link>http://www.pnas.org/content/115/10/2305</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["There is a growing research interest in the detection of changes in hydrologic and climatic time series. Stationarity can be assessed using the autocorrelation function, but this is not yet common practice in hydrology and climate. Here, we use a global land-based gridded annual precipitation (hereafter P) database (1940–2009) and find that the lag 1 autocorrelation coefficient is statistically significant at around 14% of the global land surface, implying nonstationary behavior (90% confidence). In contrast, around 76% of the global land surface shows little or no change, implying stationary behavior. We use these results to assess change in the observed P over the most recent decade of the database. We find that the changes for most (84%) grid boxes are within the plausible bounds of no significant change at the 90% CI. The results emphasize the importance of adequately accounting for natural variability when assessing change."

--- They really do seem to be saying that because _independent, identically distributed_ random variables have 0 autocorrelation, all autocorrelated time series are non-stationary.  This is so unbelievably stupid that I am going to have to read it again very carefully before banging my head into my desk.]]></description>
<dc:subject>to:NB to_read bad_data_analysis time_series statistics to_teach:data_over_space_and_time color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1ff9d4129bec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://dx.doi.org/10.1111/ecoj.12461">
    <title>The Power of Bias in Economics Research - Ioannidis - 2017 - The Economic Journal - Wiley Online Library</title>
    <dc:date>2017-10-26T17:53:00+00:00</dc:date>
    <link>http://dx.doi.org/10.1111/ecoj.12461</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We investigate two critical dimensions of the credibility of empirical economics research: statistical power and bias. We survey 159 empirical economics literatures that draw upon 64,076 estimates of economic parameters reported in more than 6,700 empirical studies. Half of the research areas have nearly 90% of their results under-powered. The median statistical power is 18%, or less. A simple weighted average of those reported results that are adequately powered (power ≥ 80%) reveals that nearly 80% of the reported effects in these empirical economics literatures are exaggerated; typically, by a factor of two and with one-third inflated by a factor of four or more."]]></description>
<dc:subject>to:NB economics statistics hypothesis_testing bad_data_analysis bad_science_journalism re:neutral_model_of_inquiry via:d-squared to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c9ba72b6d2c0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science_journalism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:neutral_model_of_inquiry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:d-squared"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://scatter.wordpress.com/2014/06/10/the-hurricane-name-study-gets-worse/">
    <title>the hurricane name study gets worse – scatterplot</title>
    <dc:date>2017-09-22T17:34:41+00:00</dc:date>
    <link>https://scatter.wordpress.com/2014/06/10/the-hurricane-name-study-gets-worse/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>to_teach:linear_models bad_data_analysis linear_regression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b3570dd906ec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:linear_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:linear_regression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arstechnica.com/science/2017/04/the-peer-reviewed-saga-of-mindless-eating-mindless-research-is-bad-too/">
    <title>“Mindless Eating,” or how to send an entire life of research into question | Ars Technica</title>
    <dc:date>2017-07-01T21:43:18+00:00</dc:date>
    <link>https://arstechnica.com/science/2017/04/the-peer-reviewed-saga-of-mindless-eating-mindless-research-is-bad-too/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>statistics bad_data_analysis psychology bad_science trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f89a9c43999d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://extranewsfeed.com/the-noise-miners-cffe6c14b626">
    <title>The Noise Miners – Extra Newsfeed</title>
    <dc:date>2017-03-29T15:20:07+00:00</dc:date>
    <link>https://extranewsfeed.com/the-noise-miners-cffe6c14b626</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>funny:geeky funny:malicious satire data_mining social_science_methodology bad_data_analysis to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:107e8d0cd2b6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:funny:geeky"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:funny:malicious"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:satire"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_science_methodology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://sloanreview.mit.edu/article/moneyball-for-professors/">
    <title>‘Moneyball’ for Professors?</title>
    <dc:date>2016-12-19T19:21:54+00:00</dc:date>
    <link>http://sloanreview.mit.edu/article/moneyball-for-professors/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[The key paragraph:

"Using a hand-curated data set of 54 scholars who obtained doctorates after 1995 and held assistant professorships at top-10 operations research programs in 2003 or earlier, these statistical models made different decisions than the tenure committees for 16 (30%) of the candidates. Specifically, these new criteria yielded a set of scholars who, in the future, produced more papers published in the top journals and research that was cited more often than the scholars who were actually selected by tenure committees"

--- In other words, "success" here is defined entirely through the worst sort of abuse of citation metrics, i.e., through doing the things which everyone who has seriously studied citation metrics says you should _not_ use them for.  (Cf. https://arxiv.org/abs/0910.3529 .)  If the objective was to making academic hiring decisions _even less_ sensitive to actually intellectual quality, one could hardly do better.
I am sure that this idea will, however, be widely adopted and go from strength to strength.]]></description>
<dc:subject>bad_data_analysis academia bibliometry social_networks network_data_analysis prediction utter_stupidity have_read via:jbdelong to:blog</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5ae58096219e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:academia"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bibliometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:jbdelong"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.washingtonpost.com/news/monkey-cage/wp/2016/12/05/that-viral-graph-about-millennials-declining-support-for-democracy-its-very-misleading/">
    <title>That viral graph about millennials’ declining support for democracy? It’s very misleading. - The Washington Post</title>
    <dc:date>2016-12-05T22:48:59+00:00</dc:date>
    <link>https://www.washingtonpost.com/news/monkey-cage/wp/2016/12/05/that-viral-graph-about-millennials-declining-support-for-democracy-its-very-misleading/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>democracy surveys visual_display_of_quantitative_information bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c26873869ab3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:democracy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:surveys"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:visual_display_of_quantitative_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://baselinescenario.com/2016/10/17/you-cant-get-there-from-here/">
    <title>Economic Anxiety and the Limits of Data Journalism | The Baseline Scenario</title>
    <dc:date>2016-10-17T20:50:23+00:00</dc:date>
    <link>https://baselinescenario.com/2016/10/17/you-cant-get-there-from-here/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>us_politics racism trump.donald bad_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f91abeb234fd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:us_politics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:racism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trump.donald"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://violentmetaphors.com/2014/05/21/nicholas-wade-and-race-building-a-scientific-facade/">
    <title>Nicholas Wade and race: building a scientific façade – Violent metaphors</title>
    <dc:date>2016-08-17T17:05:52+00:00</dc:date>
    <link>https://violentmetaphors.com/2014/05/21/nicholas-wade-and-race-building-a-scientific-facade/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>racism racist_idiocy historical_genetics human_genetics statistics bad_data_analysis race wade.nicholas bad_science_journalism</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:55f0989f78cc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:racism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:racist_idiocy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:historical_genetics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:human_genetics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:race"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wade.nicholas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science_journalism"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.pnas.org/content/early/2016/06/27/1602413113.long">
    <title>Cluster failure: Why fMRI inferences for spatial extent have inflated false-positive rates</title>
    <dc:date>2016-06-30T18:22:02+00:00</dc:date>
    <link>http://www.pnas.org/content/early/2016/06/27/1602413113.long</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Functional MRI (fMRI) is 25 years old, yet surprisingly its most common statistical methods have not been validated using real data. Here, we used resting-state fMRI data from 499 healthy controls to conduct 3 million task group analyses. Using this null data with different experimental designs, we estimate the incidence of significant results. In theory, we should find 5% false positives (for a significance threshold of 5%), but instead we found that the most common software packages for fMRI analysis (SPM, FSL, AFNI) can result in false-positive rates of up to 70%. These results question the validity of some 40,000 fMRI studies and may have a large impact on the interpretation of neuroimaging results."

--- Nichols is a serious guy (and co-author of one of the best fMRI textbooks I've seen).  This is pretty awful news for the field.]]></description>
<dc:subject>to:NB spatial_statistics hypothesis_testing fmri neural_data_analysis statistics bad_data_analysis nichols.thomas_e. have_read to:blog</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1fd98f8c2f41/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spatial_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:fmri"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nichols.thomas_e."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arstechnica.co.uk/security/2016/02/the-nsas-skynet-program-may-be-killing-thousands-of-innocent-people/">
    <title>The NSA’s SKYNET program may be killing thousands of innocent people | Ars Technica UK</title>
    <dc:date>2016-02-16T17:56:29+00:00</dc:date>
    <link>http://arstechnica.co.uk/security/2016/02/the-nsas-skynet-program-may-be-killing-thousands-of-innocent-people/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[We have much to answer for.]]></description>
<dc:subject>the_continuing_crises national_surveillance_state machine_learning classifiers cross-validation bad_data_analysis terrorism_fears drones decision_trees ensemble_methods to_teach:data-mining to:blog random_forests</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1248c0c8cf03/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:the_continuing_crises"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:national_surveillance_state"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:terrorism_fears"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:drones"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision_trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_forests"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.harrowell.org.uk/blog/2015/12/05/that-time-i-was-nearly-burned-alive-by-a-machine-learning-model-and-didnt-even-notice-for-33-years/">
    <title>That time I was nearly burned alive by a machine-learning model and didn’t even notice for 33 years | The Yorkshire Ranter</title>
    <dc:date>2015-12-09T00:49:20+00:00</dc:date>
    <link>http://www.harrowell.org.uk/blog/2015/12/05/that-time-i-was-nearly-burned-alive-by-a-machine-learning-model-and-didnt-even-notice-for-33-years/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[This is so rich in morals for what I do and teach I hardly know where to start.  Beyond: holy shit.]]></description>
<dc:subject>nukes cold_war machine_learning prediction data_mining ussr bad_data_analysis the_nightmare_from_which_we_are_trying_to_awake or_perhaps_the_nightmare_into_which_we_are_slipping the_robo-nuclear_apocalypse_in_our_past_light_cone track_down_references to_teach:data-mining to_teach:statcomp via:james-nicoll to:blog intelligence_(spying)</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6e6e84925fb1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nukes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cold_war"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ussr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:the_nightmare_from_which_we_are_trying_to_awake"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:or_perhaps_the_nightmare_into_which_we_are_slipping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:the_robo-nuclear_apocalypse_in_our_past_light_cone"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:statcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:james-nicoll"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:intelligence_(spying)"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/bull-market/digital-locability-and-interocular-trauma-973397192975">
    <title>Digital Locability and Interocular Trauma — Bull Market — Medium</title>
    <dc:date>2015-04-14T00:13:27+00:00</dc:date>
    <link>https://medium.com/bull-market/digital-locability-and-interocular-trauma-973397192975</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Ah, the crushing intellectual superiority at the top of the financial & corporate heap...]]></description>
<dc:subject>have_read bad_data_analysis finance utter_stupidity dsquared dimon.jamie variance_estimation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:41c250f3a860/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:finance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:utter_stupidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dsquared"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dimon.jamie"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variance_estimation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.statschat.org.nz/2015/01/20/ask-a-silly-question-get-a-silly-answer/">
    <title>Ask a silly question, get a silly answer | Stats Chat</title>
    <dc:date>2015-01-21T00:46:50+00:00</dc:date>
    <link>http://www.statschat.org.nz/2015/01/20/ask-a-silly-question-get-a-silly-answer/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Presumably there a linguistic-pragmatics explanation of this --- people are interpreting the question so it makes sense as something asked for by an intelligent person, quite possibly more knowledgeable than they are.]]></description>
<dc:subject>bad_data_analysis bad_science_journalism surveys natural_history_of_truthiness blogged</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:25e8641902d4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_science_journalism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:surveys"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:natural_history_of_truthiness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:blogged"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1407.4240">
    <title>[1407.4240] Unconscious lie detection as an example of a widespread fallacy in the Neurosciences</title>
    <dc:date>2015-01-20T00:59:26+00:00</dc:date>
    <link>http://arxiv.org/abs/1407.4240</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Neuroscientists frequently use a certain statistical reasoning to establish the existence of distinct neuronal processes in the brain. We show that this reasoning is flawed and that the large corresponding literature needs reconsideration. We illustrate the fallacy with a recent study that received an enormous press coverage because it concluded that humans detect deceit better if they use unconscious processes instead of conscious deliberations. The study was published under a new open-data policy that enabled us to reanalyze the data with more appropriate methods. We found that unconscious performance was close to chance - just as the conscious performance. This illustrates the flaws of this widely used statistical reasoning, the benefits of open-data practices, and the need for careful reconsideration of studies using the same rationale."]]></description>
<dc:subject>to:NB to_read statistics to_teach:undergrad-ADA psychology experimental_psychology bad_data_analysis have_skimmed</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6036d92d5703/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experimental_psychology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.chicagomag.com/Chicago-Magazine/June-2014/Chicago-crime-statistics/">
    <title>The Truth About Chicago’s Crime Rates: Part 2 | Chicago magazine | June 2014</title>
    <dc:date>2014-08-26T18:10:21+00:00</dc:date>
    <link>http://www.chicagomag.com/Chicago-Magazine/June-2014/Chicago-crime-statistics/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>crime juking_the_stats evidence_based chicago social_measurement social_science_methodology bad_data_analysis to_teach to:blog</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6238d6ca8d21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:juking_the_stats"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:evidence_based"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:chicago"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_science_methodology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://news.sciencemag.org/biology/2014/08/ecology-explaining-less-and-less">
    <title>Is ecology explaining less and less? | Science/AAAS | News</title>
    <dc:date>2014-08-25T19:32:34+00:00</dc:date>
    <link>http://news.sciencemag.org/biology/2014/08/ecology-explaining-less-and-less</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Repeat after me: R^2 is in no way a measure of explanation.]]></description>
<dc:subject>meta-analysis bad_data_analysis ecology statistics track_down_references</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b9d828de11a7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:meta-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ecology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>