<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (jm)</title>
    <link>https://pinboard.in/u:jm/public/</link>
    <description>recent bookmarks from jm</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://christinapagel.substack.com/p/where-are-we-with-covid-in-england"/>
	<rdf:li rdf:resource="https://filmmakermagazine.com/121867-joanne-mcneil-large-language-models-allison-parrish/"/>
	<rdf:li rdf:resource="https://jamanetwork.com/journals/jama/fullarticle/2797443"/>
	<rdf:li rdf:resource="https://twitter.com/President_MU/status/1410315246791802884"/>
	<rdf:li rdf:resource="https://twitter.com/hatr/status/1361756449802768387"/>
	<rdf:li rdf:resource="https://crypto.stanford.edu/prio/paper.pdf"/>
	<rdf:li rdf:resource="https://twitter.com/AdamJKucharski/status/1307958852248272898"/>
	<rdf:li rdf:resource="https://twitter.com/vincentglad/status/1303243869933404161/photo/1"/>
	<rdf:li rdf:resource="http://thaines.com/post/alevels2020"/>
	<rdf:li rdf:resource="https://medium.com/@Bob_Wachter/interpreting-covid-19-test-results-a-bayesian-approach-df058dad2ade"/>
	<rdf:li rdf:resource="https://twitter.com/Care2much18/status/1252819591090155523"/>
	<rdf:li rdf:resource="https://medium.com/@tomaspueyo/coronavirus-act-today-or-people-will-die-f4d3d9cd99ca"/>
	<rdf:li rdf:resource="https://www.fastcompany.com/90182112/want-to-make-money-build-a-business-on-a-bike-lane"/>
	<rdf:li rdf:resource="https://www.electricitymap.org/?page=country&amp;solar=false&amp;remote=true&amp;wind=false&amp;countryCode=IE"/>
	<rdf:li rdf:resource="https://developers.googleblog.com/2019/09/enabling-developers-and-organizations.html"/>
	<rdf:li rdf:resource="https://www.artificiallawyer.com/2019/06/04/france-bans-judge-analytics-5-years-in-prison-for-rule-breakers/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1902.04023"/>
	<rdf:li rdf:resource="https://www.chrisstucchio.com/pubs/slides/crunchconf_2018/slides.pdf"/>
	<rdf:li rdf:resource="https://notesonthefront.typepad.com/politicaleconomy/2019/01/the-far-rights-problem-with-immigration-facts.html?fbclid=IwAR2MJkON4vAnWTuPsgFdop61--X0bndsmQd2SZz6ZIo8Jw2uWsGETGeP4Xc"/>
	<rdf:li rdf:resource="https://www.nytimes.com/2016/08/30/upshot/surprisingly-little-evidence-for-the-usual-wisdom-about-teeth.html"/>
	<rdf:li rdf:resource="https://www.reuters.com/investigates/special-report/usa-immigration-court/"/>
	<rdf:li rdf:resource="https://bigbrotherwatch.org.uk/2018/04/a-closer-look-at-experian-big-data-and-artificial-intelligence-in-durham-police/"/>
	<rdf:li rdf:resource="https://eev.ee/blog/2018/01/02/random-with-care/"/>
	<rdf:li rdf:resource="https://www.wired.com/story/trueallele-software-transforming-how-courts-treat-dna-evidence/"/>
	<rdf:li rdf:resource="https://theconversation.com/cycling-to-work-major-new-study-suggests-health-benefits-are-staggering-76292#link_time=1501254014"/>
	<rdf:li rdf:resource="https://cyclingindustry.news/physical-separation-of-cyclists-from-traffic-crucial-to-dropping-injury-rates-shows-u-s-study/"/>
	<rdf:li rdf:resource="https://erikbern.com/2017/03/15/the-eigenvector-of-why-we-moved-from-language-x-to-language-y.html"/>
	<rdf:li rdf:resource="https://github.com/tdunning/t-digest"/>
	<rdf:li rdf:resource="http://www.argmin.net/2016/11/14/fall-of-big-data/"/>
	<rdf:li rdf:resource="http://www.nytimes.com/2016/10/13/upshot/how-one-19-year-old-illinois-man-is-distorting-national-polling-averages.html?_r=0"/>
	<rdf:li rdf:resource="http://www.theregister.co.uk/2016/07/03/mri_software_bugs_could_upend_years_of_research/?mt=1467666616578"/>
	<rdf:li rdf:resource="http://rpubs.com/jrauser/percentiles"/>
	<rdf:li rdf:resource="http://blog.cryptographyengineering.com/2016/06/what-is-differential-privacy.html"/>
	<rdf:li rdf:resource="http://arstechnica.co.uk/security/2016/02/the-nsas-skynet-program-may-be-killing-thousands-of-innocent-people/"/>
	<rdf:li rdf:resource="http://www.johndcook.com/blog/2016/01/30/general-birthday-problem/"/>
	<rdf:li rdf:resource="http://priceonomics.com/the-guinness-brewer-who-revolutionized-statistics/"/>
	<rdf:li rdf:resource="http://www.dcscience.net/2015/12/11/placebo-effects-are-weak-regression-to-the-mean-is-the-main-reason-ineffective-treatments-appear-to-work/"/>
	<rdf:li rdf:resource="http://erikerlandson.github.io/blog/2015/11/20/very-fast-reservoir-sampling/"/>
	<rdf:li rdf:resource="https://www.vividcortex.com/blog/why-percentiles-dont-work-the-way-you-think"/>
	<rdf:li rdf:resource="http://googleresearch.blogspot.ie/2015/08/the-reusable-holdout-preserving.html"/>
	<rdf:li rdf:resource="http://www.dublincycling.com/cycling/bike-theft-survey-results"/>
	<rdf:li rdf:resource="http://america.aljazeera.com/articles/2014/10/29/sleep-study.html"/>
	<rdf:li rdf:resource="http://psy-lob-saw.blogspot.ie/2015/02/hdrhistogram-better-latency-capture.html"/>
	<rdf:li rdf:resource="http://www.quickmeme.com/scumbag-data-scientist"/>
	<rdf:li rdf:resource="http://fivethirtyeight.com/features/stop-playing-monopoly-with-your-kids-and-play-these-games-instead/"/>
	<rdf:li rdf:resource="https://www.schneier.com/essays/archives/2005/03/why_data_mining_wont.html"/>
	<rdf:li rdf:resource="https://blog.twitter.com/2015/introducing-practical-and-robust-anomaly-detection-in-a-time-series"/>
	<rdf:li rdf:resource="http://www.uncertml.org/"/>
	<rdf:li rdf:resource="http://www.cs.utexas.edu/users/mckinley/papers/uncertainty-asplos-2014.pdf"/>
	<rdf:li rdf:resource="http://www.codemesh.io/static/upload/media/141562653162935languagewars.pdf"/>
	<rdf:li rdf:resource="http://www.ssa.gov/history/lifeexpect.html"/>
	<rdf:li rdf:resource="https://github.com/FelixGV/tehuti"/>
	<rdf:li rdf:resource="https://groups.google.com/forum/#!msg/project-voldemort/Y52UyHQ8tBA/9Ei79_RvS3EJ"/>
	<rdf:li rdf:resource="http://godoc.org/github.com/codahale/tinystat/cmd/tinystat"/>
	<rdf:li rdf:resource="http://google-opensource.blogspot.ie/2014/09/causalimpact-new-open-source-package.html"/>
	<rdf:li rdf:resource="http://www.psmag.com/navigation/politics-and-law/punished-poor-problem-using-big-data-justice-system-88651/"/>
	<rdf:li rdf:resource="http://kamon.io/presentations/javacro14/#/"/>
	<rdf:li rdf:resource="http://www.irishtimes.com/news/health/daylight-saving-time-linked-to-heart-attacks-study-finds-1.1743441"/>
	<rdf:li rdf:resource="http://abe.is/analyzing-citibike-usage/"/>
	<rdf:li rdf:resource="http://www.bea.aero/fr/enquetes/vol.af.447/metron.search.analysis.pdf"/>
	<rdf:li rdf:resource="http://www.irishtimes.com/news/ireland/irish-news/sacked-google-worker-says-staff-ratings-fixed-to-fit-template-1.1721176"/>
	<rdf:li rdf:resource="https://twitter.com/jeremyjarvis/status/428848527226437632/photo/1"/>
	<rdf:li rdf:resource="http://www.edge.org/response-detail/25401"/>
	<rdf:li rdf:resource="http://cf.broadsheet.ie/wp-content/uploads/2013/11/20131106.jpg"/>
	<rdf:li rdf:resource="http://armon.github.io/statsite/"/>
	<rdf:li rdf:resource="http://www.cs.rutgers.edu/~muthu/bquant.pdf"/>
	<rdf:li rdf:resource="http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//pubs/archive/36737.pdf"/>
	<rdf:li rdf:resource="http://vudlab.com/fat-tails.html"/>
	<rdf:li rdf:resource="http://boundary.com/blog/2013/06/27/announcing-early-warnings/"/>
	<rdf:li rdf:resource="http://www.medicalindependent.ie/20844/news"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://christinapagel.substack.com/p/where-are-we-with-covid-in-england">
    <title>UK COVID vaccination modelling was dependent on a single Pythonista</title>
    <dc:date>2024-02-12T16:11:24+00:00</dc:date>
    <link>https://christinapagel.substack.com/p/where-are-we-with-covid-in-england</link>
    <dc:creator>jm</dc:creator><description><![CDATA[The UKHSA Comptroller complained that they could not audit or stand over QA practices on the model: "One of the reasons given was that the main model was coded in [...] Python and that they had to stop using it because the staff member that knew Python had left."  Now they're using a backup model written in Excel.]]></description>
<dc:subject>excel python modelling statistics uk ukhsa qa covid-19 quality-control</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:67997cd0272c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:excel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:modelling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ukhsa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:qa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quality-control"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://filmmakermagazine.com/121867-joanne-mcneil-large-language-models-allison-parrish/">
    <title>Turning Poetry into Art: Joanne McNeil on Large Language Models and the Poetry of Allison Parrish | Filmmaker Magazine</title>
    <dc:date>2023-07-31T16:13:48+00:00</dc:date>
    <link>https://filmmakermagazine.com/121867-joanne-mcneil-large-language-models-allison-parrish/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Alison Parrish is making great work.

<blockquote>Parrish has long thought of her work in conversation with Oulipo and other avant-garde movements, “using randomness to produce juxtapositions of concepts to make you think more deeply about the language that you’re using.” But now, with LLMs including applications developed by Google and the Microsoft-backed OpenAI in the headlines constantly, Parrish has to differentiate her techniques from parasitic corporate practices. “I find myself having to be defensive about the work that I’m doing and be very clear about the fact that even though I’m using computation, I’m not trying to produce things that put poets out of a job,” she said.

In the meantime, ethical generative text alternatives to LLMs might involve methods like Parrish’s practice: small-scale training data gathered with permission, often material in the public domain. “Just because something’s in the public domain doesn’t necessarily mean that it’s ethical to use it, but it’s a good starting point,” Parrish told me. ...

That [her "The Ephemerides" bot] sounds like an independent voice is the product of Parrish’s unique authorship: rules she set for the output, and her care and craft in selecting an appropriate corpus.  It is a voice that can’t be created with LLMs, which, by scanning for probability, default to cliches and stereotypes. “They’re inherently conservative,” Parrish said. “They encode the past, literally. That’s what they’re doing with these data sets.”
</blockquote>

]]></description>
<dc:subject>ai poetry ml statistics alison-parrish art poems generative-art text randomness</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7601debb0064/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:poetry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ml"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:alison-parrish"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:art"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:poems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:generative-art"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:randomness"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://jamanetwork.com/journals/jama/fullarticle/2797443">
    <title>Latest Long Covid estimates</title>
    <dc:date>2022-10-19T10:07:45+00:00</dc:date>
    <link>https://jamanetwork.com/journals/jama/fullarticle/2797443</link>
    <dc:creator>jm</dc:creator><description><![CDATA[tl;dr: 6.2% average rate, more women than men, 15% continued to suffer after 12 months.

<blockquote>A total of 1.2 million individuals who had symptomatic SARS-CoV-2 infection were included (mean age, 4-66 years; males, 26%-88%). In the modeled estimates, 6.2% (95% uncertainty interval [UI], 2.4%-13.3%) of individuals who had symptomatic SARS-CoV-2 infection experienced at least 1 of the 3 Long COVID symptom clusters in 2020 and 2021, including 3.2% (95% UI, 0.6%-10.0%) for persistent fatigue with bodily pain or mood swings, 3.7% (95% UI, 0.9%-9.6%) for ongoing respiratory problems, and 2.2% (95% UI, 0.3%-7.6%) for cognitive problems after adjusting for health status before COVID-19, comprising an estimated 51.0% (95% UI, 16.9%-92.4%), 60.4% (95% UI, 18.9%-89.1%), and 35.4% (95% UI, 9.4%-75.1%), respectively, of Long COVID cases. The Long COVID symptom clusters were more common in women aged 20 years or older (10.6% [95% UI, 4.3%-22.2%]) 3 months after symptomatic SARS-CoV-2 infection than in men aged 20 years or older (5.4% [95% UI, 2.2%-11.7%]). Both sexes younger than 20 years of age were estimated to be affected in 2.8% (95% UI, 0.9%-7.0%) of symptomatic SARS-CoV-2 infections. The estimated mean Long COVID symptom cluster duration was 9.0 months (95% UI, 7.0-12.0 months) among hospitalized individuals and 4.0 months (95% UI, 3.6-4.6 months) among nonhospitalized individuals. Among individuals with Long COVID symptoms 3 months after symptomatic SARS-CoV-2 infection, an estimated 15.1% (95% UI, 10.3%-21.1%) continued to experience symptoms at 12 months.</blockquote>

]]></description>
<dc:subject>long-covid statistics disease covid-19 papers jama disability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:cf65bc10f43a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:long-covid"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:disease"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jama"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:disability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/President_MU/status/1410315246791802884">
    <title>The model used to simulate the Irish COVID-19 response</title>
    <dc:date>2021-07-01T08:39:12+00:00</dc:date>
    <link>https://twitter.com/President_MU/status/1410315246791802884</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Detailed thread from Professor Philip Nolan on Twitter, on the scenario modelling used by NPHET to inform the government on likely COVID-19 infection trajectories; several models are used, including a basic SEIR model and an agent-based model, "where social structures and transmission are simulated in detail at the individual level; these show rapid spread in younger people with transmission into older groups, and  highlight uncertainty on the role of children and adolescents", and the role of super-spreader events.

tl;dr: "a variant with a transmission advantage [ie., Delta] can do very significant damage if we let it spread in a partially vaccinated population, the scale of the damage depends on the transmission advantage, and it starts slowly and escalates rapidly."]]></description>
<dc:subject>modelling data-science statistics epidemiology pandemics covid-19 sars-cov-2 ireland philip-nolan seir</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:21e859682f26/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:modelling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:epidemiology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pandemics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sars-cov-2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ireland"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:philip-nolan"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:seir"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/hatr/status/1361756449802768387">
    <title>want to ace an AI-based interview? add a bookshelf in the background</title>
    <dc:date>2021-02-22T10:59:18+00:00</dc:date>
    <link>https://twitter.com/hatr/status/1361756449802768387</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is comedy gold.  Turns out some digital phrenology software used for AI-aided interviewing will produce higher results for candidates who simply have a bookshelf as a background.

As Daniel Bilar puts it: it's the "Clever Hans" phenomenon, [...] 'spurious correlations, can occur when there is a feature in the data that is highly correlated with the correct outcome, but is not the cause for the answer being correct.']]></description>
<dc:subject>correlation clever-hans funny ai ml interviewing statistics phrenology</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:275724d41508/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:correlation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:clever-hans"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:funny"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ml"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:interviewing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:phrenology"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://crypto.stanford.edu/prio/paper.pdf">
    <title>Prio</title>
    <dc:date>2020-09-24T11:23:08+00:00</dc:date>
    <link>https://crypto.stanford.edu/prio/paper.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['Prio allows a set of servers to compute aggregate statistics over client-provided data while maintaining client privacy, defending against client misbehavior, and performing nearly as well as data-collection platforms that exhibit neither of these security properties.'

Aggregation operations include: integer sum and mean; variance and std dev; boolean OR/AND; min/max; sets; frequency count and percentiles/quantiles.]]></description>
<dc:subject>nizk zero-knowledge snark prio crypto privacy data-privacy statistics quantiles percentiles aggregation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2aa675a6dbfc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nizk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:zero-knowledge"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:snark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:prio"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aggregation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/AdamJKucharski/status/1307958852248272898">
    <title>Sweden has the smallest average household size in Europe</title>
    <dc:date>2020-09-21T10:14:59+00:00</dc:date>
    <link>https://twitter.com/AdamJKucharski/status/1307958852248272898</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is an interesting factor regarding COVID-19 transmission -- the majority of Swedish households have a single occupant, unlike everywhere else in Europe (twice the rate of Ireland, for instance).]]></description>
<dc:subject>covid-19 sweden households europe statistics eu housing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:940ab75a2d35/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sweden"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:households"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:europe"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:eu"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:housing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/vincentglad/status/1303243869933404161/photo/1">
    <title>illustration of how a rise in SARS-CoV-2 positivity in younger groups can soon become a rise in older groups</title>
    <dc:date>2020-09-08T09:26:06+00:00</dc:date>
    <link>https://twitter.com/vincentglad/status/1303243869933404161/photo/1</link>
    <dc:creator>jm</dc:creator><description><![CDATA[via Vincent Glad, on Twitter: the positivity rate stratified by age, in the Marseilles region]]></description>
<dc:subject>testing covid-19 age epidemiology dataviz statistics marseilles france</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7ed9065e39e7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:age"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:epidemiology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dataviz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:marseilles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:france"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://thaines.com/post/alevels2020">
    <title>A-Levels: The Model is not the Student</title>
    <dc:date>2020-08-17T11:19:06+00:00</dc:date>
    <link>http://thaines.com/post/alevels2020</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Solid description of the many errors in the UK's attempt to estimate correct grades for their A-level students this year.  They really made a massive mess of it.

'Ultimately, the government can only receive, at best, a D for their efforts; they tried but failed. We can only hope they will now pull themselves up, bring in the experts, and construct an algorithm worthy of an A.']]></description>
<dc:subject>ofqual schools marks exams a-levels uk estimation statistics maths fail</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f95b2089f16c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ofqual"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:schools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:marks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:exams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:a-levels"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:maths"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fail"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/@Bob_Wachter/interpreting-covid-19-test-results-a-bayesian-approach-df058dad2ade">
    <title>Interpreting Covid-19 Test Results: A Bayesian Approach</title>
    <dc:date>2020-06-13T22:42:29+00:00</dc:date>
    <link>https://medium.com/@Bob_Wachter/interpreting-covid-19-test-results-a-bayesian-approach-df058dad2ade</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is very clever -- it hadn't occurred to me at all, but of course it makes sense. tl;dr: prevalence, the prevailing rate of infection in the community, is a key factor in Covid-19 testing.

<blockquote>a brief tutorial on Covid-19 testing, with an emphasis on a Bayesian approach. After presenting the basics, we’ll walk through four confusing Covid-19 testing scenarios, just to give you a feel for the kinds of pickles we often find ourselves in.</blockquote>

]]></description>
<dc:subject>prevalence covid-19 bayes bayesian statistics testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:419f48573a05/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:prevalence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayesian"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/Care2much18/status/1252819591090155523">
    <title>on COVID-19 death rate statistics</title>
    <dc:date>2020-04-22T12:01:00+00:00</dc:date>
    <link>https://twitter.com/Care2much18/status/1252819591090155523</link>
    <dc:creator>jm</dc:creator><description><![CDATA[illuminating Twitter thread. tl;dr: most countries are juking the numbers by ignoring COVID deaths in elderly care homes (where a massive death toll is occurring), or by ignoring suspected COVID cases in favour of confirmed post-mortem cases, or by ignoring comorbidity.]]></description>
<dc:subject>covid-19 statistics lies-damn-lies death-rates comorbidity diseases europe deaths</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a34779538059/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid-19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lies-damn-lies"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:death-rates"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:comorbidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:diseases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:europe"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:deaths"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/@tomaspueyo/coronavirus-act-today-or-people-will-die-f4d3d9cd99ca">
    <title>Coronavirus: Why You Must Act Now - Tomas Pueyo - Medium</title>
    <dc:date>2020-03-11T10:30:15+00:00</dc:date>
    <link>https://medium.com/@tomaspueyo/coronavirus-act-today-or-people-will-die-f4d3d9cd99ca</link>
    <dc:creator>jm</dc:creator><description><![CDATA[some good estimates of current COVID-19 epidemiology]]></description>
<dc:subject>coronavirus covid19 healthcare epidemiology diseases statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:afc303b28ce7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coronavirus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:covid19"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:healthcare"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:epidemiology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:diseases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.fastcompany.com/90182112/want-to-make-money-build-a-business-on-a-bike-lane">
    <title>Want To Make Money? Build A Business On A Bike Lane</title>
    <dc:date>2019-11-25T11:30:25+00:00</dc:date>
    <link>https://www.fastcompany.com/90182112/want-to-make-money-build-a-business-on-a-bike-lane</link>
    <dc:creator>jm</dc:creator><description><![CDATA[“Local stores next to the protected bike lane have seen a 49% increase in sales, compared to an average of 3% for Manhattan as a whole.”]]></description>
<dc:subject>numbers statistics cycling bike-lanes shops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:4a8cc7b40f48/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:numbers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cycling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bike-lanes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:shops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.electricitymap.org/?page=country&amp;solar=false&amp;remote=true&amp;wind=false&amp;countryCode=IE">
    <title>electricityMap</title>
    <dc:date>2019-10-07T16:33:35+00:00</dc:date>
    <link>https://www.electricitymap.org/?page=country&amp;solar=false&amp;remote=true&amp;wind=false&amp;countryCode=IE</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is fascinating! 'a live visualization of where your electricity comes from and how much CO2 was emitted to produce it.' (via ClimateAction.tech)]]></description>
<dc:subject>electricity statistics graphs data energy climate renewables carbon co2</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:4a92fe5beab6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:electricity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:energy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:climate"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:renewables"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:carbon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:co2"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://developers.googleblog.com/2019/09/enabling-developers-and-organizations.html">
    <title>Google release an open-source differential-privacy lib</title>
    <dc:date>2019-09-09T14:24:03+00:00</dc:date>
    <link>https://developers.googleblog.com/2019/09/enabling-developers-and-organizations.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Differentially-private data analysis is a principled approach that enables organizations to learn from the majority of their data while simultaneously ensuring that those results do not allow any individual's data to be distinguished or re-identified. This type of analysis can be implemented in a wide variety of ways and for many different purposes. For example, if you are a health researcher, you may want to compare the average amount of time patients remain admitted across various hospitals in order to determine if there are differences in care. Differential privacy is a high-assurance, analytic means of ensuring that use cases like this are addressed in a privacy-preserving manner.

Currently, we provide algorithms to compute the following:

Count
Sum
Mean
Variance
Standard deviation
Order statistics (including min, max, and median)</blockquote>

]]></description>
<dc:subject>analytics google ml privacy differential-privacy aggregation statistics obfuscation approximation algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:98439e468432/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:analytics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ml"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:differential-privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aggregation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:obfuscation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.artificiallawyer.com/2019/06/04/france-bans-judge-analytics-5-years-in-prison-for-rule-breakers/">
    <title>France Bans Judge Analytics, 5 Years In Prison For Rule Breakers</title>
    <dc:date>2019-06-05T10:56:24+00:00</dc:date>
    <link>https://www.artificiallawyer.com/2019/06/04/france-bans-judge-analytics-5-years-in-prison-for-rule-breakers/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>
‘The identity data of magistrates and members of the judiciary cannot be reused with the purpose or effect of evaluating, analysing, comparing or predicting their actual or alleged professional practices.’

As far as Artificial Lawyer understands, this is the very first example of such a ban anywhere in the world. Insiders in France told Artificial Lawyer that the new law is a direct result of an earlier effort to make all case law easily accessible to the general public, which was seen at the time as improving access to justice and a big step forward for transparency in the justice sector.

However, judges in France had not reckoned on NLP and machine learning companies taking the public data and using it to model how certain judges behave in relation to particular types of legal matter or argument, or how they compare to other judges.

In short, they didn’t like how the pattern of their decisions – now relatively easy to model – were potentially open for all to see.
</blockquote>

]]></description>
<dc:subject>censorship france analytics judgements legal judges statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:70c20892a220/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:censorship"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:france"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:analytics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:judgements"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:legal"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:judges"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1902.04023">
    <title>[1902.04023] Computing Extremely Accurate Quantiles Using t-Digests</title>
    <dc:date>2019-02-18T11:05:52+00:00</dc:date>
    <link>https://arxiv.org/abs/1902.04023</link>
    <dc:creator>jm</dc:creator><description><![CDATA['We present on-line algorithms for computing approximations of rank-based statistics that give high accuracy, particularly near the tails of a distribution, with very small sketches. Notably, the method allows a quantile q to be computed with an accuracy relative to max(q,1−q) rather than absolute accuracy as with most other methods. This new algorithm is robust with respect to skewed distributions or ordered datasets and allows separately computed summaries to be combined with no loss in accuracy.  An open-source Java implementation of this algorithm is available from the author. Independent implementations in Go and Python are also available.'

(via Tony Finch)]]></description>
<dc:subject>java go python open-source quantiles percentiles approximation statistics sketching algorithms via:fanf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:6c84ec8a0947/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:go"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:open-source"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sketching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.chrisstucchio.com/pubs/slides/crunchconf_2018/slides.pdf">
    <title>_AI Ethics, Impossibility Theorems and Tradeoffs_</title>
    <dc:date>2019-01-28T16:14:07+00:00</dc:date>
    <link>https://www.chrisstucchio.com/pubs/slides/crunchconf_2018/slides.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Great slides by Chris Stucchio, Director of Data Science at Simpl, discussing the various ethical strategies of utilitarianism, procedural fairness, allocative fairness, and representational fairness, and how they can be implemented (or at least acknowledged) in machine learning/statistical systems.

'one meta-ethical prescription: formalize your ethical principles as terms in your utility function or as constraints. It is nearly certain that tradeoffs between these principles exist, and if we don’t acknowledge this, we run the risk of unknowingly engaging in bad actions.']]></description>
<dc:subject>discrimination ethics racism race ai statistics compas machine-learning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:bce0c1fb31f5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:discrimination"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ethics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:racism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:race"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:compas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://notesonthefront.typepad.com/politicaleconomy/2019/01/the-far-rights-problem-with-immigration-facts.html?fbclid=IwAR2MJkON4vAnWTuPsgFdop61--X0bndsmQd2SZz6ZIo8Jw2uWsGETGeP4Xc">
    <title>Some facts on immigration to Ireland</title>
    <dc:date>2019-01-15T22:15:29+00:00</dc:date>
    <link>https://notesonthefront.typepad.com/politicaleconomy/2019/01/the-far-rights-problem-with-immigration-facts.html?fbclid=IwAR2MJkON4vAnWTuPsgFdop61--X0bndsmQd2SZz6ZIo8Jw2uWsGETGeP4Xc</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Handy to have to hand next time right-wing talking points emerge:

<blockquote>Let’s summarise:

Ireland has a relatively high level of non-citizens in its population. But this is down to the high level of UK citizens and citizens from other English-speaking countries (US, Canada, Australia and New Zealand).
Ireland has significantly fewer non-citizens from outside the English-speaking world than high-income EU countries.
The proportion of non-citizens has remained stable over the last 10 years (i.e. there is no ‘surge’).
Non-citizens in Ireland are more integrated into the labour market than any other high-income EU country – that is, there is lower unemployment among non-citizens. So much for the ‘sponging-off-the-state’ argument.
We have had far fewer asylum-seekers and we grant asylum to far fewer than most other high-income EU countries.
The claims of the Far Right and their allies collapse when we look to reality. </blockquote>

]]></description>
<dc:subject>immigration facts statistics ireland asylum-seekers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8657155c24f0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:immigration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facts"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ireland"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:asylum-seekers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nytimes.com/2016/08/30/upshot/surprisingly-little-evidence-for-the-usual-wisdom-about-teeth.html">
    <title>Surprisingly Little Evidence for the Accepted Wisdom About Teeth - The New York Times</title>
    <dc:date>2018-09-14T13:55:42+00:00</dc:date>
    <link>https://www.nytimes.com/2016/08/30/upshot/surprisingly-little-evidence-for-the-usual-wisdom-about-teeth.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Turns out there is little evidence for many dental practices:

<blockquote>A systematic review in 2011 concluded that, in adults, toothbrushing with flossing versus toothbrushing alone most likely reduced gingivitis, or inflammation of the gums. But there was really weak evidence that it reduced plaque in the short term. There was no evidence that it reduced cavities. That’s pretty much what we learned recently.</blockquote>

]]></description>
<dc:subject>teeth dentistry dental health medicine statistics science</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:79ed3cc85695/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:teeth"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dentistry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dental"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.reuters.com/investigates/special-report/usa-immigration-court/">
    <title>ICE's Risk Classification Assessment turned into a digital rubber stamp</title>
    <dc:date>2018-06-26T22:39:40+00:00</dc:date>
    <link>https://www.reuters.com/investigates/special-report/usa-immigration-court/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[If this report is correct, this "statistics-based" risk classification tool is just a cruel joke:

<blockquote>To conform to Trump’s policies, Reuters has learned, ICE modified a tool officers have been using since 2013 when deciding whether an immigrant should be detained or released on bond. The computer-based Risk Classification Assessment uses statistics to determine an immigrant’s flight risk and danger to society.
Previously, the tool automatically recommended either “detain” or “release.” Last year, ICE spokesman Bourke said, the agency removed the “release” recommendation.</blockquote>

More: https://motherboard.vice.com/en_us/article/evk3kw/ice-modified-its-risk-assessment-software-so-it-automatically-recommends-detention
]]></description>
<dc:subject>immigration statistics machine-learning rubber-stamping fake-algorithms whitewashing ice us-politics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:52d02fbdf434/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:immigration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rubber-stamping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fake-algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:whitewashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:us-politics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://bigbrotherwatch.org.uk/2018/04/a-closer-look-at-experian-big-data-and-artificial-intelligence-in-durham-police/">
    <title>A Closer Look at Experian Big Data and Artificial Intelligence in Durham Police</title>
    <dc:date>2018-04-09T10:27:17+00:00</dc:date>
    <link>https://bigbrotherwatch.org.uk/2018/04/a-closer-look-at-experian-big-data-and-artificial-intelligence-in-durham-police/</link>
    <dc:creator>jm</dc:creator><description><![CDATA['UK police bought profiling data for their artificial intelligence (AI) system, deciding whether to hold suspects in custody, from ... Experian.'

'The AI tool uses 34 data categories including the offender’s criminal history, combined with their age, gender and two types of residential postcode. The use of postcode data is problematic in predictive software of this kind as it carries a risk of perpetuating bias towards areas marked by community deprivation.']]></description>
<dc:subject>experian marketing credit-score data policing uk durham ai statistics crime hart</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:888686c06181/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:experian"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:marketing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:credit-score"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:policing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:durham"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hart"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://eev.ee/blog/2018/01/02/random-with-care/">
    <title>Random with care</title>
    <dc:date>2018-01-05T22:14:12+00:00</dc:date>
    <link>https://eev.ee/blog/2018/01/02/random-with-care/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Some tips about RNGs and their usage

(via Tony Finch)]]></description>
<dc:subject>coding random math rngs prngs statistics distributions</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2f07c1bad73e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:random"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:math"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rngs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:prngs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributions"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.wired.com/story/trueallele-software-transforming-how-courts-treat-dna-evidence/">
    <title>The Impenetrable Program Transforming How Courts Treat DNA Evidence | WIRED</title>
    <dc:date>2017-11-30T11:45:46+00:00</dc:date>
    <link>https://www.wired.com/story/trueallele-software-transforming-how-courts-treat-dna-evidence/</link>
    <dc:creator>jm</dc:creator><description><![CDATA['So the lab turned to TrueAllele, a program sold by Cybergenetics, a small company dedicated to helping law enforcement analyze DNA where regular lab tests fail. They do it with something called probabilistic genotyping, which uses complex mathematical formulas to examine the statistical likelihood that a certain genotype comes from one individual over another. It’s a type of DNA testing that’s becoming increasingly popular in courtrooms. '

[...] 'But now legal experts, along with Johnson’s advocates, are joining forces to argue to a California court that TrueAllele—the seemingly magic software that helped law enforcement analyze the evidence that tied Johnson to the crimes—should be forced to reveal the code that sent Johnson to prison. This code, they say, is necessary in order to properly evaluate the technology. In fact, they say, justice from an unknown algorithm is no justice at all.']]></description>
<dc:subject>law justice trueallele software dna evidence statistics probability code-review auditing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8b677ee89f88/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:justice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:trueallele"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:software"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dna"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:evidence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:code-review"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:auditing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://theconversation.com/cycling-to-work-major-new-study-suggests-health-benefits-are-staggering-76292#link_time=1501254014">
    <title>Cycling to work: major new study suggests health benefits are staggering</title>
    <dc:date>2017-08-20T21:17:27+00:00</dc:date>
    <link>https://theconversation.com/cycling-to-work-major-new-study-suggests-health-benefits-are-staggering-76292#link_time=1501254014</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>We found that cycling to work was associated with a 41% lower risk of dying overall compared to commuting by car or public transport. Cycle commuters had a 52% lower risk of dying from heart disease and a 40% lower risk of dying from cancer. They also had 46% lower risk of developing heart disease and a 45% lower risk of developing cancer at all.</blockquote>

]]></description>
<dc:subject>cycling transport health medicine science commuting life statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ce8e076e456b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cycling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:transport"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:commuting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:life"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://cyclingindustry.news/physical-separation-of-cyclists-from-traffic-crucial-to-dropping-injury-rates-shows-u-s-study/">
    <title>Physical separation of cyclists from traffic “crucial” to dropping injury rates, shows U.S. study</title>
    <dc:date>2017-05-13T21:01:51+00:00</dc:date>
    <link>https://cyclingindustry.news/physical-separation-of-cyclists-from-traffic-crucial-to-dropping-injury-rates-shows-u-s-study/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Citing a further study of differing types of cycling infrastructure in Canada, the editorial writes that an 89% increase in safety was noted on streets with physical separation over streets where no such infrastructure existed. Unprotected cycling space was found to be 53% safer.

In 2014 there were 902 recorded cyclists fatalities in America and 35,206 serious injuries. Per kilometre cycled fatalities per 100 million kilometres cycled sat at 4.7. In the Netherlands and Denmark those rates sit at 1 and 1.1, respectively.</blockquote>

]]></description>
<dc:subject>cycling infrastructure roads safety accidents cars statistics us canada</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c9fca3759498/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cycling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infrastructure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:roads"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:safety"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:accidents"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cars"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:us"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:canada"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://erikbern.com/2017/03/15/the-eigenvector-of-why-we-moved-from-language-x-to-language-y.html">
    <title>The eigenvector of &quot;Why we moved from language X to language Y&quot;</title>
    <dc:date>2017-03-16T23:18:20+00:00</dc:date>
    <link>https://erikbern.com/2017/03/15/the-eigenvector-of-why-we-moved-from-language-x-to-language-y.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[this is actually quite interesting data]]></description>
<dc:subject>statistics programming languages golang go mysql coding</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:bc481ec8d1b8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:languages"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:golang"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:go"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mysql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/tdunning/t-digest">
    <title>tdunning/t-digest</title>
    <dc:date>2016-12-12T12:28:16+00:00</dc:date>
    <link>https://github.com/tdunning/t-digest</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>A new data structure for accurate on-line accumulation of rank-based statistics such as quantiles and trimmed means. The t-digest algorithm is also very parallel friendly making it useful in map-reduce and parallel streaming applications.

The t-digest construction algorithm uses a variant of 1-dimensional k-means clustering to product a data structure that is related to the Q-digest. This t-digest data structure can be used to estimate quantiles or compute other rank statistics. The advantage of the t-digest over the Q-digest is that the t-digest can handle floating point values while the Q-digest is limited to integers. With small changes, the t-digest can handle any values from any ordered set that has something akin to a mean. The accuracy of quantile estimates produced by t-digests can be orders of magnitude more accurate than those produced by Q-digests in spite of the fact that t-digests are more compact when stored on disk.</blockquote>

Super-nice feature is that it's mergeable, so amenable to parallel usage across multiple hosts if required.  Java implementation, ASL licensing.]]></description>
<dc:subject>data-structures algorithms java t-digest statistics quantiles percentiles aggregation digests estimation ranking</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:aaf9fb613f21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:t-digest"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aggregation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:digests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ranking"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.argmin.net/2016/11/14/fall-of-big-data/">
    <title>The Fall of BIG DATA – arg min blog</title>
    <dc:date>2016-11-14T22:01:12+00:00</dc:date>
    <link>http://www.argmin.net/2016/11/14/fall-of-big-data/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Strongly agreed with this -- particularly the second of the three major failures, specifically:

<blockquote>Our community has developed remarkably effective tools to microtarget advertisements. But if you use ad models to deliver news, that’s propaganda. And just because we didn’t intend to spread rampant misinformation doesn’t mean we are not responsible.</blockquote>

]]></description>
<dc:subject>big-data analytics data-science statistics us-politics trump data science propaganda facebook silicon-valley</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1b56e66fcb3a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:analytics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:us-politics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:trump"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:propaganda"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:silicon-valley"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.nytimes.com/2016/10/13/upshot/how-one-19-year-old-illinois-man-is-distorting-national-polling-averages.html?_r=0">
    <title>How One 19-Year-Old Illinois Man Is Distorting National Polling Averages - The New York Times</title>
    <dc:date>2016-10-13T10:57:27+00:00</dc:date>
    <link>http://www.nytimes.com/2016/10/13/upshot/how-one-19-year-old-illinois-man-is-distorting-national-polling-averages.html?_r=0</link>
    <dc:creator>jm</dc:creator><description><![CDATA[One "outlier" voter—a 19-year old black Trump supporter—was weighted so heavily that it shifted the whole poll significantly.  Stats fail]]></description>
<dc:subject>statistics nytimes politics via:reddit donald-trump hilary-clinton polling panels polls</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:dd1769070f5d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nytimes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:politics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:reddit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:donald-trump"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hilary-clinton"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:polling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:panels"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:polls"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.theregister.co.uk/2016/07/03/mri_software_bugs_could_upend_years_of_research/?mt=1467666616578">
    <title>MRI software bugs could upend years of research - The Register</title>
    <dc:date>2016-07-05T10:40:25+00:00</dc:date>
    <link>http://www.theregister.co.uk/2016/07/03/mri_software_bugs_could_upend_years_of_research/?mt=1467666616578</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>In their paper at PNAS, they write: “the most common software packages for fMRI analysis (SPM, FSL, AFNI) can result in false-positive rates of up to 70%. These results question the validity of some 40,000 fMRI studies and may have a large impact on the interpretation of neuroimaging results.”

For example, a bug that's been sitting in a package called 3dClustSim for 15 years, fixed in May 2015, produced bad results (3dClustSim is part of the AFNI suite; the others are SPM and FSL).  That's not a gentle nudge that some results might be overstated: it's more like making a bonfire of thousands of scientific papers.

Further: “Our results suggest that the principal cause of the invalid cluster inferences is spatial autocorrelation functions that do not follow the assumed Gaussian shape”.

The researchers used published fMRI results, and along the way they swipe the fMRI community for their “lamentable archiving and data-sharing practices” that prevent most of the discipline's body of work being re-analysed. ®</blockquote>

]]></description>
<dc:subject>fmri science mri statistics cluster-inference autocorrelation data papers medicine false-positives fps neuroimaging</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:44448561ba5c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fmri"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mri"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cluster-inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:autocorrelation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fps"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:neuroimaging"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://rpubs.com/jrauser/percentiles">
    <title>You CAN Average Percentiles</title>
    <dc:date>2016-07-05T10:18:14+00:00</dc:date>
    <link>http://rpubs.com/jrauser/percentiles</link>
    <dc:creator>jm</dc:creator><description><![CDATA[John Rauser on this oft-cited dictum of percentile usage in monitoring, and when it's wrong and it's actually possible to reason with averaged percentiles, and when it breaks down.]]></description>
<dc:subject>statistics percentiles quantiles john-rauser histograms averaging mean p99</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:e2f019aeecad/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:john-rauser"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:histograms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:averaging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mean"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:p99"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.cryptographyengineering.com/2016/06/what-is-differential-privacy.html">
    <title>Differential Privacy</title>
    <dc:date>2016-06-15T10:48:04+00:00</dc:date>
    <link>http://blog.cryptographyengineering.com/2016/06/what-is-differential-privacy.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Apple have announced they plan to use it; Google use a DP algorithm called RAPPOR in Chrome usage statistics.  In summary: "novel privacy technology that allows inferring statistics about populations while preserving the privacy of individual users".]]></description>
<dc:subject>apple privacy anonymization google rappor algorithms sampling populations statistics differential-privacy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:664a8dab51a2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:apple"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anonymization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rappor"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sampling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:populations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:differential-privacy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arstechnica.co.uk/security/2016/02/the-nsas-skynet-program-may-be-killing-thousands-of-innocent-people/">
    <title>The NSA’s SKYNET program may be killing thousands of innocent people</title>
    <dc:date>2016-02-16T14:55:33+00:00</dc:date>
    <link>http://arstechnica.co.uk/security/2016/02/the-nsas-skynet-program-may-be-killing-thousands-of-innocent-people/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Death by Random Forest: this project is a horrible misapplication of machine learning. Truly appalling, when a false positive means death:

<blockquote>
The NSA evaluates the SKYNET program using a subset of 100,000 randomly selected people (identified by their MSIDN/MSI pairs of their mobile phones), and a a known group of seven terrorists. The NSA then trained the learning algorithm by feeding it six of the terrorists and tasking SKYNET to find the seventh. This data provides the percentages for false positives in the slide above.

"First, there are very few 'known terrorists' to use to train and test the model," Ball said. "If they are using the same records to train the model as they are using to test the model, their assessment of the fit is completely bullshit. The usual practice is to hold some of the data out of the training process so that the test includes records the model has never seen before. Without this step, their classification fit assessment is ridiculously optimistic."

The reason is that the 100,000 citizens were selected at random, while the seven terrorists are from a known cluster. Under the random selection of a tiny subset of less than 0.1 percent of the total population, the density of the social graph of the citizens is massively reduced, while the "terrorist" cluster remains strongly interconnected. Scientifically-sound statistical analysis would have required the NSA to mix the terrorists into the population set before random selection of a subset—but this is not practical due to their tiny number.

This may sound like a mere academic problem, but, Ball said, is in fact highly damaging to the quality of the results, and thus ultimately to the accuracy of the classification and assassination of people as "terrorists." A quality evaluation is especially important in this case, as the random forest method is known to overfit its training sets, producing results that are overly optimistic. The NSA's analysis thus does not provide a good indicator of the quality of the method.
</blockquote>]]></description>
<dc:subject>terrorism surveillance nsa security ai machine-learning random-forests horror false-positives classification statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:4442c0f23ed8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:terrorism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:surveillance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nsa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:random-forests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:horror"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.johndcook.com/blog/2016/01/30/general-birthday-problem/">
    <title>The general birthday problem</title>
    <dc:date>2016-02-01T11:03:25+00:00</dc:date>
    <link>http://www.johndcook.com/blog/2016/01/30/general-birthday-problem/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Good explanation and scipy code for the birthday paradox and hash collisions]]></description>
<dc:subject>hashing hashes collisions birthday-problem birthday-paradox coding probability statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5e19813a6fb5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:birthday-problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:birthday-paradox"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://priceonomics.com/the-guinness-brewer-who-revolutionized-statistics/">
    <title>The Guinness Brewer Who Revolutionized Statistics</title>
    <dc:date>2016-01-04T12:37:26+00:00</dc:date>
    <link>http://priceonomics.com/the-guinness-brewer-who-revolutionized-statistics/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[William S. Gosset, discoverer of the Student's T-Test.  Amazon should have taken note of this trick:

<blockquote>Upon completing his work on the t-distribution, Gosset was eager to make his work public. It was an important finding, and one he wanted to share with the wider world. The managers of Guinness were not so keen on this. They realized they had an advantage over the competition by using this method, and were not excited about relinquishing that leg up. If Gosset were to publish the paper, other breweries would be on to them. So they came to a compromise. Guinness agreed to allow Gosset to publish the finding, as long as he used a pseudonym. This way, competitors would not be able to realize that someone on Guinness’s payroll was doing such research, and figure out that the company’s scientifically enlightened approach was key to their success.</blockquote>

]]></description>
<dc:subject>statistics william-gosset history guinness brewing t-test pseudonyms dublin</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2209fbbfcbd5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:william-gosset"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:history"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:guinness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:brewing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:t-test"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pseudonyms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dublin"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.dcscience.net/2015/12/11/placebo-effects-are-weak-regression-to-the-mean-is-the-main-reason-ineffective-treatments-appear-to-work/">
    <title>Placebo effects are weak: regression to the mean is the main reason ineffective treatments appear to work</title>
    <dc:date>2015-12-16T14:17:36+00:00</dc:date>
    <link>http://www.dcscience.net/2015/12/11/placebo-effects-are-weak-regression-to-the-mean-is-the-main-reason-ineffective-treatments-appear-to-work/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>“Statistical regression to the mean predicts that patients selected for abnormalcy will, on the average, tend to improve. We argue that most improvements attributed to the placebo effect are actually instances of statistical regression.”</blockquote>

]]></description>
<dc:subject>medicine science statistics placebo evidence via:hn regression-to-the-mean</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:71de2897190c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:placebo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:evidence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:hn"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:regression-to-the-mean"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://erikerlandson.github.io/blog/2015/11/20/very-fast-reservoir-sampling/">
    <title>Very Fast Reservoir Sampling</title>
    <dc:date>2015-12-15T12:03:18+00:00</dc:date>
    <link>http://erikerlandson.github.io/blog/2015/11/20/very-fast-reservoir-sampling/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[via Tony Finch.  'In this post I will demonstrate how to do reservoir sampling orders of magnitude faster than the traditional “naive” reservoir sampling algorithm, using a fast high-fidelity approximation to the reservoir sampling-gap distribution.']]></description>
<dc:subject>statistics reservoir-sampling sampling algorithms poisson bernoulli performance</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c4fe345c5f6b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reservoir-sampling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sampling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:poisson"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bernoulli"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.vividcortex.com/blog/why-percentiles-dont-work-the-way-you-think">
    <title>Why Percentiles Don’t Work the Way you Think</title>
    <dc:date>2015-12-09T11:26:47+00:00</dc:date>
    <link>https://www.vividcortex.com/blog/why-percentiles-dont-work-the-way-you-think</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Baron Schwartz on metrics, percentiles, and aggregation. +1, although as a HN commenter noted, quantile digests are probably the better fix]]></description>
<dc:subject>performance percentiles quantiles statistics metrics monitoring baron-schwartz vividcortex</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c441c328a979/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:monitoring"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:baron-schwartz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:vividcortex"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://googleresearch.blogspot.ie/2015/08/the-reusable-holdout-preserving.html">
    <title>The reusable holdout: Preserving validity in adaptive data analysis</title>
    <dc:date>2015-08-18T13:21:04+00:00</dc:date>
    <link>http://googleresearch.blogspot.ie/2015/08/the-reusable-holdout-preserving.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Useful stats hack from Google: "We show how to safely reuse a holdout data set many times to validate the results of adaptively chosen analyses."]]></description>
<dc:subject>statistics google reusable-holdout training ml machine-learning data-analysis holdout corpus sampling</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:0b87ef283056/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reusable-holdout"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:training"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ml"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:holdout"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:corpus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sampling"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.dublincycling.com/cycling/bike-theft-survey-results">
    <title>Dublin Bike Theft Survey Results</title>
    <dc:date>2015-05-08T08:56:17+00:00</dc:date>
    <link>http://www.dublincycling.com/cycling/bike-theft-survey-results</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Dublin Cycling Campaign's survey results: estimated 20,000 bikes stolen per year in Dublin; only 1% of thefts results in a conviction]]></description>
<dc:subject>dublin bikes cycling theft crime statistics infographics dcc</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:d3eead7d37e0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dublin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bikes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cycling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:theft"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infographics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dcc"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://america.aljazeera.com/articles/2014/10/29/sleep-study.html">
    <title>Ask the Decoder: Did I sign up for a global sleep study?</title>
    <dc:date>2015-03-09T17:34:21+00:00</dc:date>
    <link>http://america.aljazeera.com/articles/2014/10/29/sleep-study.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>How meaningful is this corporate data science, anyway?  Given the tech-savvy people in the Bay Area, Jawbone likely had a very dense sample of Jawbone wearers to draw from for its Napa earthquake analysis. That allowed it to look at proximity to the epicenter of the earthquake from location information.

Jawbone boasts its sample population of roughly “1 million Up wearers who track their sleep using Up by Jawbone.” But when looking into patterns county by county in the U.S., Jawbone states, it takes certain statistical liberties to show granularity while accounting for places where there may not be many Jawbone users.

So while Jawbone data can show us interesting things about sleep patterns across a very large population, we have to remember how selective that population is. Jawbone wearers are people who can afford a $129 wearable fitness gadget and the smartphone or computer to interact with the output from the device.

Jawbone is sharing what it learns with the public, but think of all the public health interests or other third parties that might be interested in other research questions from a large scale data set. Yet this data is not collected with scientific processes and controls and is not treated with the rigor and scrutiny that a scientific study requires.

Jawbone and other fitness trackers don’t give us the option to use their devices while opting out of contributing to the anonymous data sets they publish. Maybe that ought to change.
</blockquote>

]]></description>
<dc:subject>jawbone privacy data-protection anonymization aggregation data medicine health earthquakes statistics iot wearables</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b2cff21e8284/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jawbone"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-protection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anonymization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aggregation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:earthquakes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:iot"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:wearables"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://psy-lob-saw.blogspot.ie/2015/02/hdrhistogram-better-latency-capture.html">
    <title>HdrHistogram: A better latency capture method</title>
    <dc:date>2015-02-16T11:27:54+00:00</dc:date>
    <link>http://psy-lob-saw.blogspot.ie/2015/02/hdrhistogram-better-latency-capture.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[An excellent intro to HdrHistogram usage]]></description>
<dc:subject>hdrhistogram hdr histograms statistics latency measurement metrics percentiles quantiles gil-tene nitsan-wakart</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:fe1e9f2ecc3d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hdrhistogram"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hdr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:histograms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gil-tene"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nitsan-wakart"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.quickmeme.com/scumbag-data-scientist">
    <title>scumbag data scientist memes</title>
    <dc:date>2015-01-31T11:10:18+00:00</dc:date>
    <link>http://www.quickmeme.com/scumbag-data-scientist</link>
    <dc:creator>jm</dc:creator><description><![CDATA[lol.]]></description>
<dc:subject>funny data-science statistics machine-learning hadoop bayes memes image-macros</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c2158da0bb75/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:funny"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hadoop"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:memes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:image-macros"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://fivethirtyeight.com/features/stop-playing-monopoly-with-your-kids-and-play-these-games-instead/">
    <title>Stop Playing Monopoly With Your Kids (And Play These Games Instead) | FiveThirtyEight</title>
    <dc:date>2015-01-26T16:16:07+00:00</dc:date>
    <link>http://fivethirtyeight.com/features/stop-playing-monopoly-with-your-kids-and-play-these-games-instead/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[538 apply their numbercrunching skills to the BoardGameGeek ratings index]]></description>
<dc:subject>boardgames games kids children 538 statistics ratings</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:72d6c2d53a07/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:boardgames"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:games"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kids"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:children"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:538"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ratings"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.schneier.com/essays/archives/2005/03/why_data_mining_wont.html">
    <title>Schneier on Security: Why Data Mining Won't Stop Terror</title>
    <dc:date>2015-01-12T15:07:56+00:00</dc:date>
    <link>https://www.schneier.com/essays/archives/2005/03/why_data_mining_wont.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A good reference URL to cut-and-paste when "scanning internet traffic for terrorist plots" rears its head:

<blockquote>This unrealistically accurate system will generate 1 billion false alarms for every real terrorist plot it uncovers. Every day of every year, the police will have to investigate 27 million potential plots in order to find the one real terrorist plot per month. Raise that false-positive accuracy to an absurd 99.9999 percent and you're still chasing 2,750 false alarms per day -- but that will inevitably raise your false negatives, and you're going to miss some of those 10 real plots.</blockquote>

Also, Ben Goldacre saying the same thing: http://www.badscience.net/2009/02/datamining-would-be-lovely-if-it-worked/]]></description>
<dc:subject>internet scanning filtering specificity statistics data-mining terrorism law nsa gchq false-positives false-negatives</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:40691f3d07b8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:internet"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scanning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:filtering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:specificity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:terrorism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nsa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gchq"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-negatives"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://blog.twitter.com/2015/introducing-practical-and-robust-anomaly-detection-in-a-time-series">
    <title>Introducing practical and robust anomaly detection in a time series</title>
    <dc:date>2015-01-07T22:42:32+00:00</dc:date>
    <link>https://blog.twitter.com/2015/introducing-practical-and-robust-anomaly-detection-in-a-time-series</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Twitter open-sources an anomaly-spotting R package: <blockquote>Early detection of anomalies plays a key role in ensuring high-fidelity data is available to our own product teams and those of our data partners. This package helps us monitor spikes in user engagement on the platform surrounding holidays, major sporting events or during breaking news. Beyond surges in social engagement, exogenic factors – such as bots or spammers – may cause an anomaly in number of favorites or followers. The package can be used to find such bots or spam, as well as detect anomalies in system metrics after a new software release. We’re open-sourcing AnomalyDetection because we’d like the public community to evolve the package and learn from it as we have.</blockquote>

]]></description>
<dc:subject>statistics twitter r anomaly-detection outliers metrics time-series spikes holt-winters</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:569f792516a5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:r"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anomaly-detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:outliers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:time-series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spikes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:holt-winters"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.uncertml.org/">
    <title>UncertML</title>
    <dc:date>2015-01-07T00:17:32+00:00</dc:date>
    <link>http://www.uncertml.org/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>a conceptual model, with accompanying XML schema, that may be used to quantify and exchange complex uncertainties in data. The interoperable model can be used to describe uncertainty in a variety of ways including:

Samples
Statistics including mean, variance, standard deviation and quantile
Probability distributions including marginal and joint distributions and mixture models</blockquote>

]]></description>
<dc:subject>via:conor uncertainty statistics xml formats</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:d25e81904ab8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:conor"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uncertainty"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:xml"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:formats"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.utexas.edu/users/mckinley/papers/uncertainty-asplos-2014.pdf">
    <title>'Uncertain&lt;T&gt;: A First-Order Type for Uncertain Data' [paper, PDF]</title>
    <dc:date>2014-12-28T23:20:32+00:00</dc:date>
    <link>http://www.cs.utexas.edu/users/mckinley/papers/uncertainty-asplos-2014.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['Emerging applications increasingly use estimates such as sensor
data (GPS), probabilistic models, machine learning, big
data, and human data. Unfortunately, representing this uncertain
data with discrete types (floats, integers, and booleans)
encourages developers to pretend it is not probabilistic, which
causes three types of uncertainty bugs. (1) Using estimates
as facts ignores random error in estimates. (2) Computation
compounds that error. (3) Boolean questions on probabilistic
data induce false positives and negatives.
This paper introduces Uncertain<T>, a new programming
language abstraction for uncertain data. We implement a
Bayesian network semantics for computation and conditionals
that improves program correctness. The runtime uses sampling
and hypothesis tests to evaluate computation and conditionals
lazily and efficiently. We illustrate with sensor and
machine learning applications that Uncertain<T> improves
expressiveness and accuracy.'

(via Tony Finch)]]></description>
<dc:subject>uncertainty estimation types strong-typing coding probability statistics machine-learning sampling via:fanf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:4e691997eba0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uncertainty"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:types"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:strong-typing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sampling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.codemesh.io/static/upload/media/141562653162935languagewars.pdf">
    <title>&quot;The Programming Language Wars - Questions And Responsibilities for the Programming Language Community&quot;</title>
    <dc:date>2014-12-04T11:16:06+00:00</dc:date>
    <link>http://www.codemesh.io/static/upload/media/141562653162935languagewars.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[actual stats and data on how programming languages affect coding work]]></description>
<dc:subject>statistics data coding languages static-typing dynamic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:87eae3c5fd2f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:languages"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:static-typing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dynamic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.ssa.gov/history/lifeexpect.html">
    <title>Life expectancy increases are due mainly to healthier children, not longer old age</title>
    <dc:date>2014-11-11T10:35:33+00:00</dc:date>
    <link>http://www.ssa.gov/history/lifeexpect.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Interesting -- I hadn't expected this.

'Life expectancy at birth [in the US] in 1930 was indeed only 58 for men and 62 for women, and the retirement age was 65. But life expectancy at birth in the early decades of the 20th century was low due mainly to high infant mortality, and someone who died as a child would never have worked and paid into Social Security. A more appropriate measure is probably life expectancy after attainment of adulthood.' .... 'Men who attained age 65 could expect to collect Social Security benefits for almost 13 years (and the numbers are even higher for women).'

In Ireland, life expectancy at birth has increased 18.4 years since 1926 -- but life expectancy for men aged 65 (the pension age) has only increased by 3.8 years.  This means that increased life expectancy figures are not particularly relevant to the "pension crunch" story.

Via Fred Logue: https://twitter.com/fplogue/status/532093184646873089]]></description>
<dc:subject>via:fplogue statistics taxes life-expectancy pensions infant-mortality health 1930s</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ee24e0e95004/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fplogue"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:taxes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:life-expectancy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pensions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infant-mortality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:1930s"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/FelixGV/tehuti">
    <title>FelixGV/tehuti</title>
    <dc:date>2014-10-09T10:53:00+00:00</dc:date>
    <link>https://github.com/FelixGV/tehuti</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Felix says: 

'Like I said, I'd like to move it to a more general / non-personal repo in the future, but haven't had the time yet. Anyway, you can still browse the code there for now. It is not a big code base so not that hard to wrap one's mind around it.

It is Apache licensed and both Kafka and Voldemort are using it so I would say it is pretty self-contained (although Kafka has not moved to Tehuti proper, it is essentially the same code they're using, minus a few small fixes missing that we added).

Tehuti is a bit lower level than CodaHale (i.e.: you need to choose exactly which stats you want to measure and the boundaries of your histograms), but this is the type of stuff you would build a wrapper for and then re-use within your code base. For example: the Voldemort RequestCounter class.']]></description>
<dc:subject>asl2 apache open-source tehuti metrics percentiles quantiles statistics measurement latency kafka voldemort linkedin</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a2f55ebce7bb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:asl2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:apache"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:open-source"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tehuti"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:voldemort"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:linkedin"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://groups.google.com/forum/#!msg/project-voldemort/Y52UyHQ8tBA/9Ei79_RvS3EJ">
    <title>Tehuti</title>
    <dc:date>2014-10-08T09:45:50+00:00</dc:date>
    <link>https://groups.google.com/forum/#!msg/project-voldemort/Y52UyHQ8tBA/9Ei79_RvS3EJ</link>
    <dc:creator>jm</dc:creator><description><![CDATA[An embryonic metrics library for Java/Scala from Felix GV at LinkedIn, extracted from Kafka's metric implementation and in the new Voldemort release.  It fixes the major known problems with the Meter/Timer implementations in Coda-Hale/Dropwizard/Yammer Metrics.

'Regarding Tehuti: it has been extracted from Kafka's metric implementation. The code was originally written by Jay Kreps, and then maintained improved by some Kafka and Voldemort devs, so it definitely is not the work of just one person. It is in my repo at the moment but I'd like to put it in a more generally available (git and maven) repo in the future. I just haven't had the time yet...

As for comparing with CodaHale/Yammer, there were a few concerns with it, but the main one was that we didn't like the exponentially decaying histogram implementation. While that implementation is very appealing in terms of (low) memory usage, it has several misleading characteristics (a lack of incoming data points makes old measurements linger longer than they should, and there's also a fairly high possiblity of losing interesting outlier data points). This makes the exp decaying implementation robust in high throughput fairly constant workloads, but unreliable in sparse or spiky workloads. The Tehuti implementation provides semantics that we find easier to reason with and with a small code footprint (which we consider a plus in terms of maintainability). Of course, it is still a fairly young project, so it could be improved further.'

More background at the kafka-dev thread: http://mail-archives.apache.org/mod_mbox/kafka-dev/201402.mbox/%3C131A7649-ED57-45CB-B4D6-F34063267664@linkedin.com%3E]]></description>
<dc:subject>kafka metrics dropwizard java scala jvm timers ewma statistics measurement latency sampling tehuti voldemort linkedin jay-kreps</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b56664c1a098/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dropwizard"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scala"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jvm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:timers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ewma"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sampling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tehuti"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:voldemort"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:linkedin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jay-kreps"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://godoc.org/github.com/codahale/tinystat/cmd/tinystat">
    <title>tinystat - GoDoc</title>
    <dc:date>2014-09-21T22:20:09+00:00</dc:date>
    <link>http://godoc.org/github.com/codahale/tinystat/cmd/tinystat</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>tinystat is used to compare two or more sets of measurements (e.g., runs of a multiple runs of benchmarks of two possible implementations) and determine if they are statistically different, using Student's t-test. It's inspired largely by FreeBSD's ministat (written by Poul-Henning Kamp).</blockquote>

]]></description>
<dc:subject>t-test student statistics go coda-hale tinystat stats tools command-line unix</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:4f23dc3f0640/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:t-test"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:student"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:go"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coda-hale"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tinystat"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stats"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:command-line"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:unix"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://google-opensource.blogspot.ie/2014/09/causalimpact-new-open-source-package.html">
    <title>CausalImpact: A new open-source package for estimating causal effects in time series</title>
    <dc:date>2014-09-15T10:50:48+00:00</dc:date>
    <link>http://google-opensource.blogspot.ie/2014/09/causalimpact-new-open-source-package.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>How can we measure the number of additional clicks or sales that an AdWords campaign generated? How can we estimate the impact of a new feature on app downloads? How do we compare the effectiveness of publicity across countries?

In principle, all of these questions can be answered through causal inference.

In practice, estimating a causal effect accurately is hard, especially when a randomised experiment is not available. One approach we've been developing at Google is based on Bayesian structural time-series models. We use these models to construct a synthetic control — what would have happened to our outcome metric in the absence of the intervention. This approach makes it possible to estimate the causal effect that can be attributed to the intervention, as well as its evolution over time.

We've been testing and applying structural time-series models for some time at Google. For example, we've used them to better understand the effectiveness of advertising campaigns and work out their return on investment. We've also applied the models to settings where a randomised experiment was available, to check how similar our effect estimates would have been without an experimental control.

Today, we're excited to announce the release of CausalImpact, an open-source R package that makes causal analyses simple and fast. With its release, all of our advertisers and users will be able to use the same powerful methods for estimating causal effects that we've been using ourselves.

Our main motivation behind creating the package has been to find a better way of measuring the impact of ad campaigns on outcomes. However, the CausalImpact package could be used for many other applications involving causal inference. Examples include problems found in economics, epidemiology, or the political and social sciences.
</blockquote>

]]></description>
<dc:subject>causal-inference r google time-series models bayes adwords advertising statistics estimation metrics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a62b2f300071/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:causal-inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:r"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:time-series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:adwords"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:advertising"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.psmag.com/navigation/politics-and-law/punished-poor-problem-using-big-data-justice-system-88651/">
    <title>Punished for Being Poor: Big Data in the Justice System</title>
    <dc:date>2014-08-19T13:20:55+00:00</dc:date>
    <link>http://www.psmag.com/navigation/politics-and-law/punished-poor-problem-using-big-data-justice-system-88651/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is awful.  Totally the wrong tool for the job -- a false positive rate which is miniscule for something like spam filtering, could translate to a really horrible outcome for a human life.

<blockquote>Currently, over 20 states use data-crunching risk-assessment programs for sentencing decisions, usually consisting of proprietary software whose exact methods are unknown, to determine which individuals are most likely to re-offend. The Senate and House are also considering similar tools for federal sentencing. These data programs look at a variety of factors, many of them relatively static, like criminal and employment history, age, gender, education, finances, family background, and residence. Indiana, for example, uses the LSI-R, the legality of which was upheld by the state’s supreme court in 2010. Other states use a model called COMPAS, which uses many of the same variables as LSI-R and even includes high school grades. Others are currently considering the practice as a way to reduce the number of inmates and ensure public safety. (Many more states use or endorse similar assessments when sentencing sex offenders, and the programs have been used in parole hearings for years.) Even the American Law Institute has embraced the practice, adding it to the Model Penal Code, attesting to the tool’s legitimacy.</blockquote>


(via stroan)]]></description>
<dc:subject>via:stroan statistics false-positives big-data law law-enforcement penal-code risk sentencing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:26eaf70354e0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:stroan"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:law-enforcement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:penal-code"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:risk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sentencing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://kamon.io/presentations/javacro14/#/">
    <title>Monitoring Reactive Applications with Kamon</title>
    <dc:date>2014-05-19T10:41:38+00:00</dc:date>
    <link>http://kamon.io/presentations/javacro14/#/</link>
    <dc:creator>jm</dc:creator><description><![CDATA["quality monitoring tools for apps built in Akka, Spray and Play!".  Uses Gil Tene's HDRHistogram and dropwizard Metrics under the hood.]]></description>
<dc:subject>metrics dropwizard hdrhistogram gil-tene kamon akka spray play reactive statistics java scala percentiles latency</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:dd1798009833/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dropwizard"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hdrhistogram"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gil-tene"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kamon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:akka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spray"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:play"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reactive"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scala"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.irishtimes.com/news/health/daylight-saving-time-linked-to-heart-attacks-study-finds-1.1743441">
    <title>Daylight saving time linked to heart attacks, study finds</title>
    <dc:date>2014-03-31T08:41:24+00:00</dc:date>
    <link>http://www.irishtimes.com/news/health/daylight-saving-time-linked-to-heart-attacks-study-finds-1.1743441</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Switching over to daylight saving time, and losing one hour of sleep, raised the risk of having a heart attack the following Monday by 25 per cent, compared to other Mondays during the year, according to a new US study released today. [...] The study found that heart attack risk fell 21 per cent later in the year, on the Tuesday after the clock was returned to standard time, and people got an extra hour’s sleep.</blockquote>

One clear answer: we need 25-hour days.

More details: http://www.sciencedaily.com/releases/2014/03/140329175108.htm --

<blockquote>Researchers used Michigan's BMC2 database, which collects data from all non-federal hospitals across the state, to identify admissions for heart attacks requiring percutaneous coronary intervention from Jan. 1, 2010 through Sept. 15, 2013. A total of 42,060 hospital admissions occurring over 1,354 days were included in the analysis. Total daily admissions were adjusted for seasonal and weekday variation, as the rate of heart attacks peaks in the winter and is lowest in the summer and is also greater on Mondays and lower over the weekend. The hospitals included in this study admit an average of 32 patients having a heart attack on any given Monday. But on the Monday immediately after springing ahead there were on average an additional eight heart attacks. There was no difference in the total weekly number of percutaneous coronary interventions performed for either the fall or spring time changes compared to the weeks before and after the time change.
</blockquote>]]></description>
<dc:subject>daylight dst daylight-savings time dates calendar science health heart-attacks michigan hospitals statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1e3bf8d8c6b0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:daylight"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dst"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:daylight-savings"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dates"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:calendar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:heart-attacks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:michigan"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hospitals"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://abe.is/analyzing-citibike-usage/">
    <title>Analyzing Citibike Usage</title>
    <dc:date>2014-03-18T14:51:29+00:00</dc:date>
    <link>http://abe.is/analyzing-citibike-usage/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Abe Stanway crunches the stats on Citibike usage in NYC, compared to the weather data from Wunderground.]]></description>
<dc:subject>data correlation statistics citibike cycling nyc data-science weather</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5a3ec788c5c3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:correlation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:citibike"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cycling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nyc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:weather"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.bea.aero/fr/enquetes/vol.af.447/metron.search.analysis.pdf">
    <title>How the search for flight AF447 used Bayesian inference</title>
    <dc:date>2014-03-12T15:33:10+00:00</dc:date>
    <link>http://www.bea.aero/fr/enquetes/vol.af.447/metron.search.analysis.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Via jgc, the search for the downed Air France flight was optimized using this technique:

'Metron’s approach to this search planning problem is rooted in classical Bayesian inference, 
which allows organization of available data with associated uncertainties and computation of the 
Probability Distribution Function (PDF) for target location given these data. In following this 
approach, the first step was to gather the available information about the location of the impact site 
of the aircraft. This information was sometimes contradictory and filled with ambiguities and 
uncertainties. Using a Bayesian approach we organized this material into consistent scenarios, 
quantified the uncertainties with probability distributions, weighted the relative likelihood of each 
scenario, and performed a simulation to produce a prior PDF for the location of the wreck.']]></description>
<dc:subject>metron bayes bayesian-inference machine-learning statistics via:jgc air-france disasters probability inference searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:e7c127ca54da/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metron"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayesian-inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:jgc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:air-france"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:disasters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.irishtimes.com/news/ireland/irish-news/sacked-google-worker-says-staff-ratings-fixed-to-fit-template-1.1721176">
    <title>Sacked Google worker says staff ratings fixed to fit template</title>
    <dc:date>2014-03-12T10:59:00+00:00</dc:date>
    <link>http://www.irishtimes.com/news/ireland/irish-news/sacked-google-worker-says-staff-ratings-fixed-to-fit-template-1.1721176</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Allegations of fixing to fit the stack-ranking curve: 'someone at Google always had to get a low score “of 2.9”, so the unit could match the bell curve. She said senior staff “calibrated” the ratings supplied by line managers to ensure conformity with the template and these calibrations could reduce a line manager’s assessment of an employee, in effect giving them the poisoned score of less than three.']]></description>
<dc:subject>stack-ranking google ireland employment work bell-curve statistics eric-schmidt</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:49351fe530da/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stack-ranking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ireland"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:employment"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:work"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bell-curve"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:eric-schmidt"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/jeremyjarvis/status/428848527226437632/photo/1">
    <title>&quot;A data scientist is a ...&quot;</title>
    <dc:date>2014-02-01T21:01:57+00:00</dc:date>
    <link>https://twitter.com/jeremyjarvis/status/428848527226437632/photo/1</link>
    <dc:creator>jm</dc:creator><description><![CDATA["A data scientist is a statistician who lives in San Francisco" - slide from Monkigras this year.  lols

]]></description>
<dc:subject>data-scientist statistics statistician funny jokes san-francisco tech monkigras</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2030d373aa62/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-scientist"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistician"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:funny"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jokes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:san-francisco"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tech"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:monkigras"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.edge.org/response-detail/25401">
    <title>Nassim Taleb: retire Standard Deviation</title>
    <dc:date>2014-01-15T21:32:15+00:00</dc:date>
    <link>http://www.edge.org/response-detail/25401</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Use the mean absolute deviation [...] it corresponds to "real life" much better than the first—and to reality. In fact, whenever people make decisions after being supplied with the standard deviation number, they act as if it were the expected mean deviation.'

Graydon Hoare in turn recommends the median absolute deviation.  I prefer percentiles, anyway ;)]]></description>
<dc:subject>statistics standard-deviation stddev maths nassim-taleb deviation volatility rmse distributions</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f9906de54f2e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:standard-deviation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stddev"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:maths"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nassim-taleb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:deviation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:volatility"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rmse"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributions"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://cf.broadsheet.ie/wp-content/uploads/2013/11/20131106.jpg">
    <title>&quot;The Top 6 Reasons This Infographic Is Just Wrong Enough To Sound Convincing&quot;</title>
    <dc:date>2013-11-06T17:10:13+00:00</dc:date>
    <link>http://cf.broadsheet.ie/wp-content/uploads/2013/11/20131106.jpg</link>
    <dc:creator>jm</dc:creator><description><![CDATA[+1 to all of this, but especially #5 (polar area diagrams).]]></description>
<dc:subject>diagrams infographics infoviz visualisation data fail statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:e8bf0f36332a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:diagrams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infographics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infoviz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:visualisation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fail"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://armon.github.io/statsite/">
    <title>Statsite</title>
    <dc:date>2013-11-01T16:58:31+00:00</dc:date>
    <link>http://armon.github.io/statsite/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A C reimplementation of Etsy's statsd, with some interesting memory optimizations.

<blockquote>Statsite is designed to be both highly performant, and very flexible. To achieve this, it implements the stats collection and aggregation in pure C, using libev to be extremely fast. This allows it to handle hundreds of connections, and millions of metrics. After each flush interval expires, statsite performs a fork/exec to start a new stream handler invoking a specified application. Statsite then streams the aggregated metrics over stdin to the application, which is free to handle the metrics as it sees fit.  This allows statsite to aggregate metrics and then ship metrics to any number of sinks (Graphite, SQL databases, etc). There is an included Python script that ships metrics to graphite.</blockquote>

]]></description>
<dc:subject>statsd graphite statsite performance statistics service-metrics metrics ops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:98f01ffaa9cc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statsd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graphite"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statsite"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:service-metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.rutgers.edu/~muthu/bquant.pdf">
    <title>&quot;Effective Computation of Biased Quantiles over Data Streams&quot; [paper]</title>
    <dc:date>2013-11-01T16:57:06+00:00</dc:date>
    <link>http://www.cs.rutgers.edu/~muthu/bquant.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>
Skew is prevalent in many data sources such as IP traffic streams.To continually summarize the distribution of such data, a high-biased set of quantiles (e.g., 50th, 90th and 99th percentiles) with finer error guarantees at higher ranks (e.g., errors of 5, 1 and 0.1 percent, respectively) is more useful than uniformly distributed quantiles (e.g., 25th, 50th and 75th percentiles) with uniform error guarantees. In this paper, we address the following two prob-lems. First, can we compute quantiles with finer error guarantees for the higher ranks of the data distribution effectively, using less space and computation time than computing all quantiles uniformly at the finest error? Second, if specific quantiles and their error bounds are requested a priori, can the necessary space usage and computation time be reduced?  We answer both questions in the affirmative by formalizing them as the “high-biased” quantiles and the “targeted” quantiles problems, respectively, and presenting algorithms with provable guarantees, that perform significantly better than previously known solutions for these problems. We implemented our algorithms in the Gigascope data stream management system, and evaluated alternate approaches for maintaining the relevant summary structures.Our experimental results on real and synthetic IP data streams complement our theoretical analyses, and highlight the importance of lightweight, non-blocking implementations when maintaining summary structures over high-speed data streams.
<blockquote>

Implemented as a timer-histogram storage system in http://armon.github.io/statsite/ .]]></description>
<dc:subject>statistics quantiles percentiles stream-processing skew papers histograms latency algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:54346b7d58f0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quantiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:percentiles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stream-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:skew"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:histograms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//pubs/archive/36737.pdf">
    <title>_Availability in Globally Distributed Storage Systems_ [pdf]</title>
    <dc:date>2013-09-24T22:08:06+00:00</dc:date>
    <link>http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en//pubs/archive/36737.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[empirical BigTable and GFS failure numbers from Google are orders of magnitude higher than naïve independent-failure models. (via kragen)]]></description>
<dc:subject>via:kragen failure bigtable gfs statistics outages reliability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:bb7d3593288e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:kragen"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:failure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bigtable"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gfs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:outages"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reliability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://vudlab.com/fat-tails.html">
    <title>Fat Tails</title>
    <dc:date>2013-07-02T20:44:37+00:00</dc:date>
    <link>http://vudlab.com/fat-tails.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Nice d3.js demo of the fat-tailed distribution:

<blockquote>A fat-tailed distribution looks normal but the parts far away from the average are thicker, meaning a higher chance of huge deviations. [...] Fat tails don't mean more variance; just different variance. For a given variance, a higher chance of extreme deviations implies a lower chance of medium ones.</blockquote>

]]></description>
<dc:subject>dataviz via:hn statistics visualization distributions fat-tailed kurtosis d3.js javascript variance deviation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ccd01496776d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dataviz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:hn"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fat-tailed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kurtosis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:d3.js"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:javascript"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:variance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:deviation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://boundary.com/blog/2013/06/27/announcing-early-warnings/">
    <title>Boundary's Early Warnings alarm</title>
    <dc:date>2013-06-27T21:09:22+00:00</dc:date>
    <link>http://boundary.com/blog/2013/06/27/announcing-early-warnings/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Anomaly detection on network throughput metrics, alarming if throughputs on selected flows deviate by 1, 2, or 3 standard deviations from a historical baseline.]]></description>
<dc:subject>network-monitoring throughput boundary service-metrics alarming ops statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:d20187298612/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:network-monitoring"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:throughput"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:boundary"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:service-metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:alarming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.medicalindependent.ie/20844/news">
    <title>Not the ‘best in the world’ - The Medical Independent</title>
    <dc:date>2013-04-18T13:50:37+00:00</dc:date>
    <link>http://www.medicalindependent.ie/20844/news</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Debunking this prolife talking point:

<blockquote>'Our maternity services are amongst the best in the world’. This phrase has been much hackneyed since the heartbreaking death of Savita Halappanavar was revealed in mid October. James Reilly and other senior politicians are particularly guilty of citing this inaccurate position. So what is the state of Irish maternity services and how do our figures compare with other comparable countries? Let’s start with the statistics.</blockquote>

The bottom line:

<blockquote>Eight deaths per 100,000 is not bad, but it ranks our maternity services far from the best in world and below countries such as Slovakia and Poland.</blockquote>]]></description>
<dc:subject>pro-choice ireland savita medicine health maternity morbidity statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a517efcd5326/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pro-choice"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ireland"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:savita"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:maternity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:morbidity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>