<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (heyitsnoah)</title>
    <link>https://pinboard.in/u:heyitsnoah/public/</link>
    <description>recent bookmarks from heyitsnoah</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://github.com/apexdodge/NASCAR-Screen-Scraper"/>
	<rdf:li rdf:resource="http://tomazkovacic.com/blog/56/list-of-resources-article-text-extraction-from-html-documents/"/>
	<rdf:li rdf:resource="http://code.google.com/p/boilerpipe/"/>
	<rdf:li rdf:resource="http://tomazkovacic.com/blog/14/extracting-article-text-from-html-documents/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://github.com/apexdodge/NASCAR-Screen-Scraper">
    <title>apexdodge/NASCAR-Screen-Scraper</title>
    <dc:date>2012-01-29T00:21:36+00:00</dc:date>
    <link>https://github.com/apexdodge/NASCAR-Screen-Scraper</link>
    <dc:creator>heyitsnoah</dc:creator><description><![CDATA["NASCAR.com, to my knowledge, does not provide an API for acquiring driver stats. Here is a screen scraper for NASCAR.com to acquire all the relevant stats and races."]]></description>
<dc:subject>nascar scraping code python</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:heyitsnoah/b:c11b06f88051/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:nascar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:code"/>
	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tomazkovacic.com/blog/56/list-of-resources-article-text-extraction-from-html-documents/">
    <title>List of resources: Article text extraction from HTML documents</title>
    <dc:date>2011-03-20T20:36:14+00:00</dc:date>
    <link>http://tomazkovacic.com/blog/56/list-of-resources-article-text-extraction-from-html-documents/</link>
    <dc:creator>heyitsnoah</dc:creator><description><![CDATA["Following up to my overview of article text extractors, I’ll try to compile a list of research papers, articles, web APIs, libraries and other software that I encountered during my research."
]]></description>
<dc:subject>scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:heyitsnoah/b:e480d731559c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://code.google.com/p/boilerpipe/">
    <title>boilerpipe</title>
    <dc:date>2011-03-19T19:32:10+00:00</dc:date>
    <link>http://code.google.com/p/boilerpipe/</link>
    <dc:creator>heyitsnoah</dc:creator><description><![CDATA["The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page."
]]></description>
<dc:subject>code opensource scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:heyitsnoah/b:e364abfd0b26/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:code"/>
	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:opensource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tomazkovacic.com/blog/14/extracting-article-text-from-html-documents/">
    <title>Overview: Extracting article text from HTML documents</title>
    <dc:date>2011-03-19T19:28:32+00:00</dc:date>
    <link>http://tomazkovacic.com/blog/14/extracting-article-text-from-html-documents/</link>
    <dc:creator>heyitsnoah</dc:creator><description><![CDATA["In the world of web scraping, text mining and article reading utilities (readability bookmarklet) there is an ever growing demand for utilities that are capable of distinguishing parts of a HTML document which represent an article apart from other common website building blocks like menus, headers, footers, ads etc."
]]></description>
<dc:subject>scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:heyitsnoah/b:239f61490585/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:heyitsnoah/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>