<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (Zardoz)</title>
    <link>https://pinboard.in/u:Zardoz/public/</link>
    <description>recent bookmarks from Zardoz</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://webarchive.jira.com/wiki/display/Heritrix/Heritrix"/>
	<rdf:li rdf:resource="http://geocommons.com/"/>
	<rdf:li rdf:resource="http://mybyteofcode.blogspot.com.au/2010/02/parse-csv-file-with-boost-tokenizer-in.html"/>
	<rdf:li rdf:resource="http://www.qtrac.eu/diffpdf.html"/>
	<rdf:li rdf:resource="http://www.antlr.org/wiki/dashboard.action"/>
	<rdf:li rdf:resource="http://tika.apache.org/1.1/formats.html"/>
	<rdf:li rdf:resource="https://bigml.com/"/>
	<rdf:li rdf:resource="http://highlyscalable.wordpress.com/2012/02/01/mapreduce-patterns/"/>
	<rdf:li rdf:resource="http://page.mi.fu-berlin.de/muehleis/ccrdf/"/>
	<rdf:li rdf:resource="http://www.commoncrawl.org/data/"/>
	<rdf:li rdf:resource="http://csvkit.readthedocs.org/en/latest/index.html"/>
	<rdf:li rdf:resource="https://scraperwiki.com/"/>
	<rdf:li rdf:resource="http://pandas.sourceforge.net/"/>
	<rdf:li rdf:resource="http://www.feedsapi.com/"/>
	<rdf:li rdf:resource="http://mydatamine.com/?p=1100"/>
	<rdf:li rdf:resource="http://www.readwriteweb.com/hack/2011/03/text-extraction.php"/>
	<rdf:li rdf:resource="http://datapatterns.org/"/>
	<rdf:li rdf:resource="http://nrabinowitz.github.com/pjscrape/"/>
	<rdf:li rdf:resource="http://www.alchemyapi.com/"/>
	<rdf:li rdf:resource="http://www.wordfrequency.info/"/>
	<rdf:li rdf:resource="https://code.google.com/apis/predict/"/>
	<rdf:li rdf:resource="http://txt2re.com/"/>
	<rdf:li rdf:resource="http://rubular.com/"/>
	<rdf:li rdf:resource="http://tomazkovacic.com/blog/56/list-of-resources-article-text-extraction-from-html-documents/"/>
	<rdf:li rdf:resource="http://code.google.com/p/google-refine/"/>
	<rdf:li rdf:resource="http://tomazkovacic.com/blog/14/extracting-article-text-from-html-documents/"/>
	<rdf:li rdf:resource="http://unpaper.berlios.de/"/>
	<rdf:li rdf:resource="http://www.propublica.org/nerds/item/doc-dollars-guides-collecting-the-data"/>
	<rdf:li rdf:resource="http://scrapy.org/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://webarchive.jira.com/wiki/display/Heritrix/Heritrix">
    <title>Heritrix - Heritrix - IA Webteam Confluence</title>
    <dc:date>2012-08-28T05:01:18+00:00</dc:date>
    <link>https://webarchive.jira.com/wiki/display/Heritrix/Heritrix</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>opensource web scraping datamining html</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:26175d9bd4a6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:opensource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://geocommons.com/">
    <title>GeoCommons</title>
    <dc:date>2012-06-26T01:11:00+00:00</dc:date>
    <link>http://geocommons.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>maps visualization opensource cloud webdesign datamining scraping</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:f11b0ebaaa26/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:maps"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:opensource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:webdesign"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://mybyteofcode.blogspot.com.au/2010/02/parse-csv-file-with-boost-tokenizer-in.html">
    <title>Parse CSV File With Boost Tokenizer In C++ - My Byte of Code</title>
    <dc:date>2012-06-12T05:04:47+00:00</dc:date>
    <link>http://mybyteofcode.blogspot.com.au/2010/02/parse-csv-file-with-boost-tokenizer-in.html</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>c++ tutorials csv parsing scraping datamining databases</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:c7defe5dc05d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:c++"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:tutorials"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:csv"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:parsing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.qtrac.eu/diffpdf.html">
    <title>DiffPDF</title>
    <dc:date>2012-06-12T00:08:28+00:00</dc:date>
    <link>http://www.qtrac.eu/diffpdf.html</link>
    <dc:creator>Zardoz</dc:creator><description><![CDATA[PDF file differ]]></description>
<dc:subject>pdf fileformats editor comparison pc_configuration text scraping datamining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:12cd989b7362/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:fileformats"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:editor"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:comparison"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:pc_configuration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.antlr.org/wiki/dashboard.action">
    <title>Dashboard - ANTLR Project</title>
    <dc:date>2012-05-25T02:53:39+00:00</dc:date>
    <link>http://www.antlr.org/wiki/dashboard.action</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>parsing c++ java programming text scraping</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:e2eb4a404fe9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:parsing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:c++"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tika.apache.org/1.1/formats.html">
    <title>Apache Tika - Supported Document Formats</title>
    <dc:date>2012-05-11T01:52:07+00:00</dc:date>
    <link>http://tika.apache.org/1.1/formats.html</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>programming web scraping text formatconverter datamining javascript</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:704319a61260/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:formatconverter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:javascript"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://bigml.com/">
    <title>BigML - Machine Learning Made Easy</title>
    <dc:date>2012-02-27T23:04:09+00:00</dc:date>
    <link>https://bigml.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>datamining ai scraping statistics databases cloud</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:0ed7bc2c2b16/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highlyscalable.wordpress.com/2012/02/01/mapreduce-patterns/">
    <title>MapReduce Patterns, Algorithms, and Use Cases</title>
    <dc:date>2012-02-14T00:00:58+00:00</dc:date>
    <link>http://highlyscalable.wordpress.com/2012/02/01/mapreduce-patterns/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>datamining databases non-sql algorithms text scraping visualization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:9181dfa51357/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:non-sql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://page.mi.fu-berlin.de/muehleis/ccrdf/">
    <title>Web Data Commons</title>
    <dc:date>2012-01-30T00:07:45+00:00</dc:date>
    <link>http://page.mi.fu-berlin.de/muehleis/ccrdf/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>cloud datamining web scraping statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:1744e6cb87af/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.commoncrawl.org/data/">
    <title>Data | CommonCrawl</title>
    <dc:date>2012-01-30T00:07:17+00:00</dc:date>
    <link>http://www.commoncrawl.org/data/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>cloud datamining web scraping statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Zardoz/b:c1e311d17984/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://csvkit.readthedocs.org/en/latest/index.html">
    <title>csvkit 0.4.2 (beta) — csvkit 0.4.2 (beta) documentation</title>
    <dc:date>2012-01-23T23:07:56+00:00</dc:date>
    <link>http://csvkit.readthedocs.org/en/latest/index.html</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>databases visualization text datamining cloud scraping programming csv</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:70e7b9405d49/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:csv"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://scraperwiki.com/">
    <title>ScraperWiki</title>
    <dc:date>2011-10-27T23:29:55+00:00</dc:date>
    <link>https://scraperwiki.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>scraping web html datamining databases text</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:a54e7400e4bb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://pandas.sourceforge.net/">
    <title>pandas: powerful Python data analysis toolkit</title>
    <dc:date>2011-09-19T23:15:01+00:00</dc:date>
    <link>http://pandas.sourceforge.net/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>programming datamining python statistics framework opensource databases scraping text visualization</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:c97de4786023/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:framework"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:opensource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.feedsapi.com/">
    <title>Feeds API full text rss feed information extraction - content retrieval | Create fulltextrss</title>
    <dc:date>2011-09-13T00:20:14+00:00</dc:date>
    <link>http://www.feedsapi.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>rss cloud scraping text html datamining</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:c7ee64166f1b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:rss"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://mydatamine.com/?p=1100">
    <title>Python Data Mining Resources</title>
    <dc:date>2011-08-27T09:52:31+00:00</dc:date>
    <link>http://mydatamine.com/?p=1100</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>python scraping text visualization datamining databases</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:c17be6a7b96b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.readwriteweb.com/hack/2011/03/text-extraction.php">
    <title>Overview of Text Extraction Algorithms</title>
    <dc:date>2011-08-23T06:37:55+00:00</dc:date>
    <link>http://www.readwriteweb.com/hack/2011/03/text-extraction.php</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>datamining html algorithms scraping databases text reference</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:020d1005ab75/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:reference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://datapatterns.org/">
    <title>Datapatterns v0.1 documentation</title>
    <dc:date>2011-08-23T06:36:56+00:00</dc:date>
    <link>http://datapatterns.org/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>scraping text html xml web cloud datamining databases visualization reference tutorials</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:12fc1bb202c1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:xml"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:reference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:tutorials"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nrabinowitz.github.com/pjscrape/">
    <title>pjscrape: A web-scraping framework written in Javascript, using PhantomJS and jQuery</title>
    <dc:date>2011-08-20T12:58:13+00:00</dc:date>
    <link>http://nrabinowitz.github.com/pjscrape/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>javascript scraping datamining databases web framework</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:84f813eb0054/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:javascript"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:framework"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.alchemyapi.com/">
    <title>AlchemyAPI - Transforming Text Into Knowledge</title>
    <dc:date>2011-05-19T01:37:45+00:00</dc:date>
    <link>http://www.alchemyapi.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>datamining scraping text visualization search algorithms framework free</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:1cc60dbdb553/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:framework"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:free"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.wordfrequency.info/">
    <title>Corpus-based word frequency lists, collocates, and n-grams</title>
    <dc:date>2011-05-04T00:26:07+00:00</dc:date>
    <link>http://www.wordfrequency.info/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>language text reference datamining algorithms statistics scraping visualization</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:752f01aeb858/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:reference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:visualization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/apis/predict/">
    <title>Google Prediction API - Google Code</title>
    <dc:date>2011-05-01T11:00:20+00:00</dc:date>
    <link>https://code.google.com/apis/predict/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>datamining google statistics algorithms scraping html cloud</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:810ae1ed1c15/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://txt2re.com/">
    <title>regular expression generator</title>
    <dc:date>2011-05-01T10:49:58+00:00</dc:date>
    <link>http://txt2re.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>programming scraping datamining regex</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:18d734bcd322/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:regex"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://rubular.com/">
    <title>Rubular: a Ruby regular expression editor and tester</title>
    <dc:date>2011-04-22T10:18:44+00:00</dc:date>
    <link>http://rubular.com/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>programming reference utilities regex datamining scraping web</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:136ec2be9d23/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:reference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:utilities"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:regex"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:web"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tomazkovacic.com/blog/56/list-of-resources-article-text-extraction-from-html-documents/">
    <title>List of resources: Article text extraction from HTML documents | My tech blog.</title>
    <dc:date>2011-04-07T00:14:24+00:00</dc:date>
    <link>http://tomazkovacic.com/blog/56/list-of-resources-article-text-extraction-from-html-documents/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>html text scraping algorithms datamining reference meta</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:10137c35e53f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:reference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:meta"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://code.google.com/p/google-refine/">
    <title>google-refine - Google Refine, a power tool for working with messy data (formerly Freebase Gridworks) - Google Project Hosting</title>
    <dc:date>2011-03-30T00:00:47+00:00</dc:date>
    <link>http://code.google.com/p/google-refine/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>google datamining databases opensource scraping spider cloud</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:aec790610923/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:opensource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:spider"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:cloud"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tomazkovacic.com/blog/14/extracting-article-text-from-html-documents/">
    <title>Extracting article text from HTML documents</title>
    <dc:date>2011-03-26T02:33:47+00:00</dc:date>
    <link>http://tomazkovacic.com/blog/14/extracting-article-text-from-html-documents/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>html text datamining scraping algorithms spider formatconverter</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:d7cc1843c516/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:spider"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:formatconverter"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://unpaper.berlios.de/">
    <title>unpaper 0.3</title>
    <dc:date>2011-01-17T23:41:48+00:00</dc:date>
    <link>http://unpaper.berlios.de/</link>
    <dc:creator>Zardoz</dc:creator><description><![CDATA[Scanned page cleanup before ocr-ing
]]></description>
<dc:subject>datamining text graphics ocr images scraping software formatconverter</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:4e4b417bcf35/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:graphics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:software"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:formatconverter"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.propublica.org/nerds/item/doc-dollars-guides-collecting-the-data">
    <title>Scraping for Journalism: A Guide for Collecting Data - ProPublica</title>
    <dc:date>2011-01-17T23:02:25+00:00</dc:date>
    <link>http://www.propublica.org/nerds/item/doc-dollars-guides-collecting-the-data</link>
    <dc:creator>Zardoz</dc:creator><description><![CDATA[Tutorials for data scraping and format conversion from websites.
]]></description>
<dc:subject>reference datamining html pdf text flash spider ocr graphics tiff fileformats scraping tutorials formatconverter</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:ee6393731e39/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:reference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:flash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:spider"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:graphics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:tiff"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:fileformats"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:tutorials"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:formatconverter"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://scrapy.org/">
    <title>Scrapy | An open source web scraping framework for Python</title>
    <dc:date>2010-10-19T00:43:24+00:00</dc:date>
    <link>http://scrapy.org/</link>
    <dc:creator>Zardoz</dc:creator><dc:subject>datamining openSource programming software spider scraping python</dc:subject>
<dc:identifier>https://pinboard.in/u:Zardoz/b:aff2bb4d30a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:openSource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:software"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:spider"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Zardoz/t:python"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>