<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (arthegall)</title>
    <link>https://pinboard.in/u:arthegall/public/</link>
    <description>recent bookmarks from arthegall</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="http://arxiv.org/abs/1412.4869"/>
	<rdf:li rdf:resource="https://orgtheory.wordpress.com/2016/03/14/algorithms-in-society-comments-on-a-talk-by-jure-leskovec/"/>
	<rdf:li rdf:resource="http://biorxiv.org/content/early/2015/06/05/020453"/>
	<rdf:li rdf:resource="http://blog.thegrandlocus.com/static/misc/is_the_scientific_paper_fraudulent.pdf"/>
	<rdf:li rdf:resource="http://continuum.io/blog/blaze"/>
	<rdf:li rdf:resource="http://www.harvardlawreview.org/symposium/papers2012/cohen.pdf"/>
	<rdf:li rdf:resource="http://illposed.net/boston_r_meetup_2012.pdf"/>
	<rdf:li rdf:resource="https://cloudant.com/blog/cloudant-labs-on-google-spanner/"/>
	<rdf:li rdf:resource="https://code.google.com/p/graphchi/wiki/IntroductionToGraphChi"/>
	<rdf:li rdf:resource="https://github.com/twitter/cassovary"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.0594"/>
	<rdf:li rdf:resource="http://en.scientificcommons.org/53598026"/>
	<rdf:li rdf:resource="http://queue.acm.org/detail.cfm?id=2169076"/>
	<rdf:li rdf:resource="http://www.cs.berkeley.edu/~alekh/"/>
	<rdf:li rdf:resource="http://db.cs.berkeley.edu/jmh/papers/cleaning-unece.pdf"/>
	<rdf:li rdf:resource="http://www.wired.com/magazine/2010/06/ff_sergeys_search/all/1"/>
	<rdf:li rdf:resource="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8013"/>
	<rdf:li rdf:resource="http://www.sciencemag.org/cgi/content/full/323/5919/1297#ref3"/>
	<rdf:li rdf:resource="http://www.springer.com/statistics/statistical+theory+and+methods/book/978-0-387-32906-2"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="http://arxiv.org/abs/1412.4869">
    <title>[1412.4869] Expectation propagation as a way of life</title>
    <dc:date>2016-09-20T14:39:40+00:00</dc:date>
    <link>http://arxiv.org/abs/1412.4869</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[Andrew Gelman has "expectation" and "propagation" tattooed across his knuckles]]></description>
<dc:subject>arxiv machinelearning approximation big-data research-article message-passing expectation-propagation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:1dcd87d9d0f1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:arxiv"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:research-article"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:message-passing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:expectation-propagation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://orgtheory.wordpress.com/2016/03/14/algorithms-in-society-comments-on-a-talk-by-jure-leskovec/">
    <title>algorithms in society – comments on a talk by jure leskovec | orgtheory.net</title>
    <dc:date>2016-03-20T18:27:23+00:00</dc:date>
    <link>https://orgtheory.wordpress.com/2016/03/14/algorithms-in-society-comments-on-a-talk-by-jure-leskovec/</link>
    <dc:creator>arthegall</dc:creator><dc:subject>to-watch sociology big-data regression-trees machinelearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:2f742ab63efc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:to-watch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:sociology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:regression-trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:machinelearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://biorxiv.org/content/early/2015/06/05/020453">
    <title>DISSECT: A new tool for analyzing extremely large genomic datasets | bioRxiv</title>
    <dc:date>2015-06-08T10:10:56+00:00</dc:date>
    <link>http://biorxiv.org/content/early/2015/06/05/020453</link>
    <dc:creator>arthegall</dc:creator><dc:subject>phenotypes mixed-linear-models genomics big-data bioarxiv preprint research-article</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:8b941a13dcbf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:phenotypes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:mixed-linear-models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:genomics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:bioarxiv"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:preprint"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:research-article"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.thegrandlocus.com/static/misc/is_the_scientific_paper_fraudulent.pdf">
    <title>Peter Medawar, &quot;Is the Scientific Paper Fraudulent?&quot; (1964)</title>
    <dc:date>2014-10-08T14:55:35+00:00</dc:date>
    <link>http://blog.thegrandlocus.com/static/misc/is_the_scientific_paper_fraudulent.pdf</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA["The simplest application of the Millsian process of induction to sociology came in a rather strange movement called Mass Observation. The belief underlying Mass Observation was apparently this: that if one could only record and set down the actual raw facts about what people do and what people say in pubs, in trains, when they make love to each other, when they are playing games, and so on, then somehow, from this wealth of information, a great generalization would inevitably emerge."  

Dudes (and lady dudes), John Stuart Mill was the ULTIMATE BIG DATA HIPSTER, he was doing it before it was cool.  ]]></description>
<dc:subject>publishing science big-data awesome peter-medawar induction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:43e1dc41a639/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:publishing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:awesome"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:peter-medawar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:induction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://continuum.io/blog/blaze">
    <title>A Python Compiler for Big Data</title>
    <dc:date>2012-12-18T10:58:53+00:00</dc:date>
    <link>http://continuum.io/blog/blaze</link>
    <dc:creator>arthegall</dc:creator><dc:subject>via:chl via:arsyed data python big-data compiler blaze numpy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:6fa682ef858f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:via:chl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:via:arsyed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:compiler"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:blaze"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:numpy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.harvardlawreview.org/symposium/papers2012/cohen.pdf">
    <title>Julie Cohen, &quot;What Privacy Is For&quot; (Harvard Law Review, forthcoming) **DRAFT**</title>
    <dc:date>2012-11-14T11:14:39+00:00</dc:date>
    <link>http://www.harvardlawreview.org/symposium/papers2012/cohen.pdf</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[Via Julian Sanchez, via Evgeny Morozov.  To read and criticize -- in particular, Section IV seems like kind of a mess.  Isn't there more to "data" than marketing?  Isn't there more to privacy than just the "construction of selfhood?"  Morozov describes it (https://twitter.com/normative/status/268606367714123776) as "the best paper on privacy you'll read all year," but I really hope that's not true.  ]]></description>
<dc:subject>to-be-shot-after-a-fair-trial evgeny-morozov julian-sanchez privacy paper legal politics culture big-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:7ec40ebe02fe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:to-be-shot-after-a-fair-trial"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:evgeny-morozov"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:julian-sanchez"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:paper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:legal"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:politics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:culture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://illposed.net/boston_r_meetup_2012.pdf">
    <title>Large-Scale Linear Algebra with R</title>
    <dc:date>2012-10-29T02:17:14+00:00</dc:date>
    <link>http://illposed.net/boston_r_meetup_2012.pdf</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[Bryan Lewis's slides on using R and SciDB. ]]></description>
<dc:subject>scidb bryan-lewis slides R linear-algebra big-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:912316d11b21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:scidb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:bryan-lewis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:slides"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:R"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:linear-algebra"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://cloudant.com/blog/cloudant-labs-on-google-spanner/">
    <title>Cloudant Labs: On Google Spanner | Cloudant</title>
    <dc:date>2012-09-24T20:56:36+00:00</dc:date>
    <link>https://cloudant.com/blog/cloudant-labs-on-google-spanner/</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[Mike Miller writes about the Spanner paper.]]></description>
<dc:subject>cloudant mike-miller database spanner google architecture nosql opinion big-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:694a64d62f1b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:cloudant"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:mike-miller"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:database"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:spanner"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:nosql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:opinion"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/p/graphchi/wiki/IntroductionToGraphChi">
    <title>IntroductionToGraphChi - graphchi - How GraphChi works - Big Data - small machine - Google Project Hosting</title>
    <dc:date>2012-08-23T12:56:00+00:00</dc:date>
    <link>https://code.google.com/p/graphchi/wiki/IntroductionToGraphChi</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA["TODO: write better and link the paper." <-- funny.
I need to show this to Dan S. when he gets out of the hospital.  I think that the idea of sliding-window computations on large graphs, and "vertex-centric" graph computations, is close to the vague idea I was sketching out to him on the whiteboard.  The only thing that was different was the idea of deciding how the nodes themselves were laid out, to optimize the use of small windows in a sequential (linear) scan, using probabilistic graph embedding techniques.  Maybe just computing the graph's (or graph Laplacian's) Fiedler vector...?  I forget.]]></description>
<dc:subject>graph graph-computation graphchi streaming data computation big-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:5274d6a8d7e9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:graph"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:graph-computation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:graphchi"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:streaming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:computation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/twitter/cassovary">
    <title>twitter/cassovary</title>
    <dc:date>2012-07-03T19:13:35+00:00</dc:date>
    <link>https://github.com/twitter/cassovary</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA["Cassovary is a simple "big graph" processing library for the JVM. Most JVM-hosted graph libraries are flexible but not space efficient. Cassovary is designed from the ground up to first be able to efficiently handle graphs with billions of nodes and edges."  --- Hmmmm.]]></description>
<dc:subject>graphs twitter github scala java jvm library big-data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:e4a250515319/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:github"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:scala"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:jvm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.0594">
    <title>[1206.0594] Simple and Deterministic Matrix Sketching</title>
    <dc:date>2012-06-09T14:02:15+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.0594</link>
    <dc:creator>arthegall</dc:creator><dc:subject>via:vaguery sketches data big-data arxiv research-article approximation stream-algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:d1753b6c5087/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:via:vaguery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:sketches"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:arxiv"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:research-article"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:stream-algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://en.scientificcommons.org/53598026">
    <title>Van Durme, Lall, &quot;Probabilistic Counting with Randomized Storage&quot; ICJAI, 2009.</title>
    <dc:date>2012-05-23T13:51:50+00:00</dc:date>
    <link>http://en.scientificcommons.org/53598026</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[I like the "TOMB Counter" name -- this is a reasonably important technique, and this is the first place I've found it referenced in the literature.]]></description>
<dc:subject>probabilistic-methods morris-counter bloom-filters research-article big-data to-re-read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:6d76b16231cd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:probabilistic-methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:morris-counter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:bloom-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:research-article"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:to-re-read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://queue.acm.org/detail.cfm?id=2169076">
    <title>Your Mouse is a Database - ACM Queue</title>
    <dc:date>2012-04-02T17:07:36+00:00</dc:date>
    <link>http://queue.acm.org/detail.cfm?id=2169076</link>
    <dc:creator>arthegall</dc:creator><dc:subject>streaming-data monads programming database big-data acm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:30056ada1904/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:streaming-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:monads"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:database"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:acm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.berkeley.edu/~alekh/">
    <title>Alekh Agarwal</title>
    <dc:date>2012-02-13T13:10:44+00:00</dc:date>
    <link>http://www.cs.berkeley.edu/~alekh/</link>
    <dc:creator>arthegall</dc:creator><dc:subject>via:cshalizi researcher homepage big-data statistics online-optimization learning machinelearning convex-optimization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:02a7218d9572/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:via:cshalizi"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:researcher"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:homepage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:online-optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:convex-optimization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://db.cs.berkeley.edu/jmh/papers/cleaning-unece.pdf">
    <title>Hellerstein, &quot;Quantitative Data Cleaning for Large Databases&quot;</title>
    <dc:date>2012-01-10T18:57:39+00:00</dc:date>
    <link>http://db.cs.berkeley.edu/jmh/papers/cleaning-unece.pdf</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[Via Adam M.  To read.]]></description>
<dc:subject>to-read database data big-data data-cleaning pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:arthegall/b:db7871071aa7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:to-read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:database"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:data-cleaning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.wired.com/magazine/2010/06/ff_sergeys_search/all/1">
    <title>&quot;Sergey Brin’s Search for a Parkinson’s Cure&quot; (Wired)</title>
    <dc:date>2010-06-28T19:56:54+00:00</dc:date>
    <link>http://www.wired.com/magazine/2010/06/ff_sergeys_search/all/1</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA["Grove disagrees somewhat with Brin’s emphasis on patterns over hypothesis. “You have to be looking for something,” he says. But the two compare notes on the disease from time to time; both are enthusiastic and active investors in the Michael J. Fox Foundation. (Grove is even known to show up on the online discussion forums.)" --- Hmm.   ("All Wired articles are wrong, and increasingly...." etc etc).
]]></description>
<dc:subject>wired sergey-brin michael-j-fox-foundation parkinsons health medicine science big-data obscurely-referential</dc:subject>
<dc:identifier>https://pinboard.in/u:arthegall/b:787060554df2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:wired"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:sergey-brin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:michael-j-fox-foundation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:parkinsons"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:health"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:medicine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:obscurely-referential"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8013">
    <title>Banko and Brill, &quot;Scaling to Very Very Large Corpora for Natural Language Disambiguation&quot;</title>
    <dc:date>2009-09-24T02:59:52+00:00</dc:date>
    <link>http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8013</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[To read.
]]></description>
<dc:subject>nlp data citeseer research-article big-data</dc:subject>
<dc:identifier>https://pinboard.in/u:arthegall/b:c6ea6f44c984/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:citeseer"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:research-article"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.sciencemag.org/cgi/content/full/323/5919/1297#ref3">
    <title>&quot;Beyond the Data Deluge&quot; -- Bell et al. 323 (5919): 1297 -- Science</title>
    <dc:date>2009-03-09T21:07:00+00:00</dc:date>
    <link>http://www.sciencemag.org/cgi/content/full/323/5919/1297#ref3</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[A better take (overview) on "Google Science" and "Big Data" -- "The urgency for new tools and technologies to enable data-intensive research has been building for a decade or more (2, 7). In 2007, Jim Gray laid out his vision for a fourth research paradigm--data-intensive science--which he described as collaborative, networked, and data-driven (1, 10). He defined eScience as the synthesis of information technology and science that enables challenges on previously unimaginable scales to be tackled.

Despite the enormous potential of this approach, data-intensive science has been slow to develop due to the subtleties of databases, schemas, and ontologies, and a general lack of understanding of these topics by the scientific community. ... Indeed, many areas of science lag commercial use and understanding of data analytics by at least a decade."
]]></description>
<dc:subject>science via:creeder big-data jim-gray</dc:subject>
<dc:identifier>https://pinboard.in/u:arthegall/b:33071a355474/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:via:creeder"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:jim-gray"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.springer.com/statistics/statistical+theory+and+methods/book/978-0-387-32906-2">
    <title>Unwin, Theus, and Hofmann, &quot;Graphics of Large Datasets: Visualizing a Million&quot;</title>
    <dc:date>2009-01-11T13:47:03+00:00</dc:date>
    <link>http://www.springer.com/statistics/statistical+theory+and+methods/book/978-0-387-32906-2</link>
    <dc:creator>arthegall</dc:creator><description><![CDATA[Via Andrew Gelman's blog... I probably should pick up a copy of this book.  ($90, gah.)
]]></description>
<dc:subject>visualization book graphics big-data</dc:subject>
<dc:identifier>https://pinboard.in/u:arthegall/b:020b6959ffba/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:book"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:graphics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:arthegall/t:big-data"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>