<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (kwbr)</title>
    <link>https://pinboard.in/u:kwbr/public/</link>
    <description>recent bookmarks from kwbr</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://github.com/cullenwatson/jobspy"/>
	<rdf:li rdf:resource="https://www.zenrows.com/blog/mastering-web-scraping-in-python-scaling-to-distributed-crawling#get-html-headless-browsers"/>
	<rdf:li rdf:resource="https://github.com/twintproject/twint"/>
	<rdf:li rdf:resource="https://github.com/clips/pattern"/>
	<rdf:li rdf:resource="https://github.com/my8100/scrapydweb"/>
	<rdf:li rdf:resource="https://github.com/codelucas/newspaper"/>
	<rdf:li rdf:resource="https://github.com/emadehsan/thal"/>
	<rdf:li rdf:resource="http://tabula.technology/"/>
	<rdf:li rdf:resource="https://github.com/ssteuteville/scrapyz"/>
	<rdf:li rdf:resource="https://github.com/jmcarp/robobrowser"/>
	<rdf:li rdf:resource="https://github.com/binux/pyspider"/>
	<rdf:li rdf:resource="http://jakeaustwick.me/python-web-scraping-resource/?mc_list=python"/>
	<rdf:li rdf:resource="http://scrapy.org/"/>
	<rdf:li rdf:resource="http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/"/>
	<rdf:li rdf:resource="http://nrabinowitz.github.com/pjscrape/"/>
	<rdf:li rdf:resource="http://scraperwiki.com/"/>
	<rdf:li rdf:resource="http://www.selectorgadget.com/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://github.com/cullenwatson/jobspy">
    <title>cullenwatson/JobSpy: Jobs scraper library for LinkedIn, Indeed &amp; ZipRecruiter - github.com/</title>
    <dc:date>2023-09-15T22:17:50+00:00</dc:date>
    <link>https://github.com/cullenwatson/jobspy</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>Jobs scraper library for LinkedIn, Indeed & ZipRecruiter - cullenwatson/JobSpy: Jobs scraper library for LinkedIn, Indeed & ZipRecruiter</blockquote>]]></description>
<dc:subject>career scraping python jobsearch jobs</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:508817408b94/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:career"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:jobsearch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:jobs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.zenrows.com/blog/mastering-web-scraping-in-python-scaling-to-distributed-crawling#get-html-headless-browsers">
    <title>Mastering Web Scraping in Python: Scaling to Distributed Crawling - ZenRows</title>
    <dc:date>2021-09-10T21:20:21+00:00</dc:date>
    <link>https://www.zenrows.com/blog/mastering-web-scraping-in-python-scaling-to-distributed-crawling#get-html-headless-browsers</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>Build your own distributed crawler with custom parsers per domain. Discover new pages and store the exact content you need — all in less than 300 LOC.</blockquote>]]></description>
<dc:subject>redis celery python scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:071fc59bdbbc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:celery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/twintproject/twint">
    <title>twintproject/twint: An advanced Twitter scraping &amp; OSINT tool written in Python that doesn't use Twitter's API, allowing you to scrape a user's followers, following, Tweets and more while evading most API limitations.</title>
    <dc:date>2021-04-20T14:39:30+00:00</dc:date>
    <link>https://github.com/twintproject/twint</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>An advanced Twitter scraping & OSINT tool written in Python that doesn't use Twitter's API, allowing you to scrape a user's followers, following, Tweets and more while evading most API limitations. - twintproject/twint</blockquote>]]></description>
<dc:subject>twitter scraping python</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:b9408f62a7fe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/clips/pattern">
    <title>clips/pattern: Web mining module for Python, with tools for scraping, natural language processing, machine learning, network analysis and visualization.</title>
    <dc:date>2021-03-04T20:20:48+00:00</dc:date>
    <link>https://github.com/clips/pattern</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[Web mining module for Python, with tools for scraping, natural language processing, machine learning, network analysis and visualization. - clips/pattern]]></description>
<dc:subject>python datamining scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:f5bb4928b223/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/my8100/scrapydweb">
    <title>my8100/scrapydweb: Scrapyd cluster management, Scrapy log analysis &amp; visualization, Basic auth, Auto eggifying, Email notice and Mobile UI. GIF DEMO</title>
    <dc:date>2019-02-01T14:52:25+00:00</dc:date>
    <link>https://github.com/my8100/scrapydweb</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>Scrapyd cluster management, Scrapy log analysis & visualization, Basic auth, Auto eggifying, Email notice and Mobile UI. :film_strip: GIF DEMO :point_right: - my8100/scrapydweb</blockquote>]]></description>
<dc:subject>python scraping scrapy webscraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:b8d6cf0150b4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scrapy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:webscraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/codelucas/newspaper">
    <title>codelucas/newspaper: News, full-text, and article metadata extraction in Python 3. Advanced docs:</title>
    <dc:date>2019-01-13T11:57:00+00:00</dc:date>
    <link>https://github.com/codelucas/newspaper</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>News, full-text, and article metadata extraction in Python 3. Advanced docs: - codelucas/newspaper</blockquote>]]></description>
<dc:subject>python scraping nlp crawler news-aggregator</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:d26b71fff9b6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:crawler"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:news-aggregator"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/emadehsan/thal">
    <title>emadehsan/thal: Getting started with Puppeteer and Chrome Headless for Web Scraping</title>
    <dc:date>2017-09-06T11:51:40+00:00</dc:date>
    <link>https://github.com/emadehsan/thal</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>thal - Getting started with Puppeteer and Chrome Headless for Web Scraping</blockquote>]]></description>
<dc:subject>javascript scraping chrome headless automation</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:6ee581b685af/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:javascript"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:chrome"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:headless"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:automation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tabula.technology/">
    <title>Tabula: Extract Tables from PDFs</title>
    <dc:date>2016-03-09T20:00:05+00:00</dc:date>
    <link>http://tabula.technology/</link>
    <dc:creator>kwbr</dc:creator><dc:subject>pdf scraping csv excel</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:79b069fa61e3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:csv"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:excel"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/ssteuteville/scrapyz">
    <title>ssteuteville/scrapyz</title>
    <dc:date>2015-07-28T14:30:47+00:00</dc:date>
    <link>https://github.com/ssteuteville/scrapyz</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>scrapyz - "Scrape Easy" - an extension of the Scrapy framework.</blockquote>]]></description>
<dc:subject>python crawler scraping webscraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:3eb67903129d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:crawler"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:webscraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/jmcarp/robobrowser">
    <title>jmcarp/robobrowser</title>
    <dc:date>2015-01-09T09:24:33+00:00</dc:date>
    <link>https://github.com/jmcarp/robobrowser</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>RoboBrowser is a simple, Pythonic library for browsing the web without a standalone web browser. RoboBrowser can fetch a page, click on links and buttons, and fill out and submit forms. If you need to interact with web services that don't have APIs, RoboBrowser can help.</blockquote>]]></description>
<dc:subject>python scraping web mechanize crawler browser</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:ee491765e927/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:mechanize"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:crawler"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:browser"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/binux/pyspider">
    <title>binux/pyspider</title>
    <dc:date>2014-12-26T13:50:21+00:00</dc:date>
    <link>https://github.com/binux/pyspider</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[<blockquote>pyspider - A Powerful Spider System with Web UI</blockquote>]]></description>
<dc:subject>crawler python scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:c6f0cdd54f0a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:crawler"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jakeaustwick.me/python-web-scraping-resource/?mc_list=python">
    <title>Python web scraping resource</title>
    <dc:date>2014-08-13T18:32:45+00:00</dc:date>
    <link>http://jakeaustwick.me/python-web-scraping-resource/?mc_list=python</link>
    <dc:creator>kwbr</dc:creator><dc:subject>python scraping web crawler</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:4050cd158d81/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:crawler"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://scrapy.org/">
    <title>Scrapy | An open source web scraping framework for Python</title>
    <dc:date>2012-05-01T20:58:44+00:00</dc:date>
    <link>http://scrapy.org/</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[Scrapy is a fast high-level screen scraping and web crawling framework, used to crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing.]]></description>
<dc:subject>web scraping python</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:4025f33ef277/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/">
    <title>Emulating a Browser in Python with mechanize</title>
    <dc:date>2011-11-06T22:58:42+00:00</dc:date>
    <link>http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/</link>
    <dc:creator>kwbr</dc:creator><dc:subject>browser python html automation scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:3d871e4a664b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:browser"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:html"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:automation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nrabinowitz.github.com/pjscrape/">
    <title>pjscrape: A web-scraping framework written in Javascript, using PhantomJS and jQuery</title>
    <dc:date>2011-08-16T20:25:47+00:00</dc:date>
    <link>http://nrabinowitz.github.com/pjscrape/</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[pjscrape is a framework for anyone who's ever wanted a command-line tool for web scraping using Javascript and jQuery. Built to run with PhantomJS, it allows you to scrape pages in a fully rendered, Javascript-enabled context from the command line, no browser required.]]></description>
<dc:subject>jquery javascript scraping phantomjs webkit</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:kwbr/b:9fb769955d8d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:jquery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:javascript"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:phantomjs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:webkit"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://scraperwiki.com/">
    <title>Welcome | ScraperWiki</title>
    <dc:date>2011-04-17T18:09:13+00:00</dc:date>
    <link>http://scraperwiki.com/</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[ScraperWiki is all the tools you need for Screen Scraping, Data Mining & visualisation]]></description>
<dc:subject>api data python scraping datamining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:kwbr/b:8b43a091e1f8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:api"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:datamining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.selectorgadget.com/">
    <title>Introducing SelectorGadget: point and click CSS selectors</title>
    <dc:date>2009-02-27T15:38:34+00:00</dc:date>
    <link>http://www.selectorgadget.com/</link>
    <dc:creator>kwbr</dc:creator><description><![CDATA[SelectorGadget is an open source bookmarklet that makes CSS selector generation and discovery on complicated sites a breeze.
]]></description>
<dc:subject>javascript css jquery bookmarklet selector scraping</dc:subject>
<dc:identifier>https://pinboard.in/u:kwbr/b:3f6460d55668/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:javascript"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:css"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:jquery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:bookmarklet"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:selector"/>
	<rdf:li rdf:resource="https://pinboard.in/u:kwbr/t:scraping"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>