<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (dwillis)</title>
    <link>https://pinboard.in/u:dwillis/public/</link>
    <description>recent bookmarks from dwillis</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://jsoma.github.io/natural-pdf-workshop/"/>
	<rdf:li rdf:resource="https://mattwaite.github.io/posts/2025-11-24-parsing-pdfs-with-antigravity/"/>
	<rdf:li rdf:resource="https://arstechnica.com/ai/2025/03/why-extracting-data-from-pdfs-is-still-a-nightmare-for-data-experts/"/>
	<rdf:li rdf:resource="https://github.com/freedmand/semantra"/>
	<rdf:li rdf:resource="https://olmocr.allenai.org/"/>
	<rdf:li rdf:resource="https://wangari.substack.com/p/stop-copy-pasting-turn-pdfs-into"/>
	<rdf:li rdf:resource="https://generative-ai-newsroom.com/structured-outputs-making-llms-reliable-for-document-processing-c3b6b2baed36"/>
	<rdf:li rdf:resource="https://simonwillison.net/2024/Nov/3/docling/"/>
	<rdf:li rdf:resource="https://ds4sd.github.io/docling/"/>
	<rdf:li rdf:resource="https://github.com/VikParuchuri/surya"/>
	<rdf:li rdf:resource="https://chunkr.ai/"/>
	<rdf:li rdf:resource="https://unstract.com/blog/pdf-hell-and-practical-rag-applications/"/>
	<rdf:li rdf:resource="https://pdf-to-podcast.com/"/>
	<rdf:li rdf:resource="https://www.armytimes.com/news/your-army/2024/03/11/broken-track-suicides-suffering-in-armys-exhausted-armor-community/"/>
	<rdf:li rdf:resource="https://bunkum.us/2024/03/22/snakemake-text-extraction.html"/>
	<rdf:li rdf:resource="https://dangerzone.rocks/"/>
	<rdf:li rdf:resource="https://fleuret.org/public/lbdl.pdf"/>
	<rdf:li rdf:resource="https://www.nytimes.com/2023/05/15/insider/finding-order-in-a-thicket-of-nonprofit-data.html?unlocked_article_code=SDZwc-NwggX6sn_rT3zbWazjLheR9hhH4q20fg3p5EMELdJnjmYkvm4cqMwEuPjNY3jHYqtOd5AaKD4vuMcxzt1wZ1yF8vPzHKR4X3WD4QCaEPrVVvZ-fmrhFD_T46DdEsHCIZw0j8esEbs4i100tzBA6dOLzK7TI-ex_f3-wvPZJ8kGD6JS7-k7FVoxaGybAjg_d-4uKWXm-I8LYgznk0a50IdwSzu3Wua4le-TwbqHUZyairY_XIP2Py52UFDu_SSRNhkufmyLxpt8nV4VO5IvLnM6dg36af6jwiKLKcgCokYyGYSbIJ0xG5f8hCHKvwQtgFXbpnQ9B4H4nerfLWdLPf4WdFrJX-v7TZjGpKp6Xg&amp;smid=url-share"/>
	<rdf:li rdf:resource="https://colab.research.google.com/github/jina-ai/workshops/blob/main/pdf_search/pdf_search.ipynb"/>
	<rdf:li rdf:resource="https://til.simonwillison.net/aws/ocr-pdf-textract"/>
	<rdf:li rdf:resource="https://www.computerworld.com/article/3660643/pdf-to-excel-conversion-ultimate-guide-best-tools.html"/>
	<rdf:li rdf:resource="https://source.opennews.org/articles/so-many-ocr-options/"/>
	<rdf:li rdf:resource="https://www.filingdb.com/pdf-text-extraction"/>
	<rdf:li rdf:resource="http://blog.law.cornell.edu/tbruce/2013/06/14/pdf-re-sewing-the-blanket/"/>
	<rdf:li rdf:resource="http://source.mozillaopennews.org/en-US/articles/introducing-tabula/"/>
	<rdf:li rdf:resource="http://documentcloud.github.com/docsplit/"/>
	<rdf:li rdf:resource="http://siadapp.dmdc.osd.mil/personnel/MILITARY/miltop.htm"/>
	<rdf:li rdf:resource="http://clerk.house.gov/public_disc/financial.html"/>
	<rdf:li rdf:resource="http://www.unixuser.org/~euske/python/pdfminer/index.html"/>
	<rdf:li rdf:resource="http://skim-app.sourceforge.net/"/>
	<rdf:li rdf:resource="http://pybrary.net/pyPdf/"/>
	<rdf:li rdf:resource="http://www.pdfstore.com/details.asp?ProdID=130&amp;nl=ps"/>
	<rdf:li rdf:resource="http://blogs.adobe.com/acrolaw/2005/10/batch_ocr_using_1.html"/>
	<rdf:li rdf:resource="http://www.pdfforlawyers.com/2004/04/ocr_tutorial_fo.html"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://jsoma.github.io/natural-pdf-workshop/">
    <title>Modern PDF processing with Natural PDF</title>
    <dc:date>2026-03-07T16:57:53+00:00</dc:date>
    <link>https://jsoma.github.io/natural-pdf-workshop/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf teaching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:0ae6795ddf68/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://mattwaite.github.io/posts/2025-11-24-parsing-pdfs-with-antigravity/">
    <title>Parsing PDFs with Antigravity – Matt Waite’s Collection of Miscellany</title>
    <dc:date>2025-11-25T20:12:00+00:00</dc:date>
    <link>https://mattwaite.github.io/posts/2025-11-24-parsing-pdfs-with-antigravity/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>ai teaching data pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:c4e7eb758a90/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arstechnica.com/ai/2025/03/why-extracting-data-from-pdfs-is-still-a-nightmare-for-data-experts/">
    <title>Why extracting data from PDFs is still a nightmare for data experts - Ars Technica</title>
    <dc:date>2025-03-11T13:40:56+00:00</dc:date>
    <link>https://arstechnica.com/ai/2025/03/why-extracting-data-from-pdfs-is-still-a-nightmare-for-data-experts/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf egosurf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:1f022a0fed89/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:egosurf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/freedmand/semantra">
    <title>freedmand/semantra: Multi-tool for semantic search</title>
    <dc:date>2025-03-07T03:36:26+00:00</dc:date>
    <link>https://github.com/freedmand/semantra</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf newsapps</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:06d75b1d54f7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:newsapps"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://olmocr.allenai.org/">
    <title>olmOCR – Open-Source OCR for Accurate Document Conversion</title>
    <dc:date>2025-02-26T19:11:08+00:00</dc:date>
    <link>https://olmocr.allenai.org/</link>
    <dc:creator>dwillis</dc:creator><description><![CDATA[olmOCR]]></description>
<dc:subject>ocr pdf ai teaching nicar25</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:5c6160f4809a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:nicar25"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://wangari.substack.com/p/stop-copy-pasting-turn-pdfs-into">
    <title>Stop Copy-Pasting. Turn PDFs into Data in Seconds</title>
    <dc:date>2025-02-25T13:03:03+00:00</dc:date>
    <link>https://wangari.substack.com/p/stop-copy-pasting-turn-pdfs-into</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf teaching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:145f5dda9be3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://generative-ai-newsroom.com/structured-outputs-making-llms-reliable-for-document-processing-c3b6b2baed36">
    <title>Structured Outputs: Making LLMs Reliable for Document Processing | by Nick Hagar | Dec, 2024 | Generative AI in the Newsroom</title>
    <dc:date>2024-12-05T15:01:55+00:00</dc:date>
    <link>https://generative-ai-newsroom.com/structured-outputs-making-llms-reliable-for-document-processing-c3b6b2baed36</link>
    <dc:creator>dwillis</dc:creator><dc:subject>ai teaching pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:6c68c92c9ee2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://simonwillison.net/2024/Nov/3/docling/">
    <title>Docling</title>
    <dc:date>2024-11-03T12:01:19+00:00</dc:date>
    <link>https://simonwillison.net/2024/Nov/3/docling/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>python pdf ocr</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:ce85c7ecb38a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://ds4sd.github.io/docling/">
    <title>Home - Docling</title>
    <dc:date>2024-11-03T02:33:31+00:00</dc:date>
    <link>https://ds4sd.github.io/docling/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf ocr docs</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:8f5a97a7f020/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:docs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/VikParuchuri/surya">
    <title>VikParuchuri/surya: OCR, layout analysis, reading order, table recognition in 90+ languages</title>
    <dc:date>2024-10-17T21:58:46+00:00</dc:date>
    <link>https://github.com/VikParuchuri/surya</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf ocr</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:d0c9b129a57e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://chunkr.ai/">
    <title>Chunkr | Open Source Data Ingestion</title>
    <dc:date>2024-10-17T21:55:38+00:00</dc:date>
    <link>https://chunkr.ai/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>ocr pdf teaching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:302b6307e3f4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://unstract.com/blog/pdf-hell-and-practical-rag-applications/">
    <title>Extracting Data from PDFs | Challenges in RAG/LLM Applications</title>
    <dc:date>2024-07-05T00:10:32+00:00</dc:date>
    <link>https://unstract.com/blog/pdf-hell-and-practical-rag-applications/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>ai teaching pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:0a067a6f62a1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://pdf-to-podcast.com/">
    <title>PDF to Podcast</title>
    <dc:date>2024-06-13T10:40:31+00:00</dc:date>
    <link>https://pdf-to-podcast.com/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>ai teaching pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:e2d834850ca4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.armytimes.com/news/your-army/2024/03/11/broken-track-suicides-suffering-in-armys-exhausted-armor-community/">
    <title>BROKEN TRACK: Suicides &amp; suffering in Army’s exhausted armor community</title>
    <dc:date>2024-03-26T18:53:42+00:00</dc:date>
    <link>https://www.armytimes.com/news/your-army/2024/03/11/broken-track-suicides-suffering-in-armys-exhausted-armor-community/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>data teaching pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:926f9510a97d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://bunkum.us/2024/03/22/snakemake-text-extraction.html">
    <title>Snakemake for PDF text extraction is pretty pleasant</title>
    <dc:date>2024-03-22T15:53:32+00:00</dc:date>
    <link>https://bunkum.us/2024/03/22/snakemake-text-extraction.html</link>
    <dc:creator>dwillis</dc:creator><dc:subject>newsapps teaching ocr pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:a5c2f8fc84c7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:newsapps"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://dangerzone.rocks/">
    <title>Dangerzone: Convert potentially dangerous documents into safe PDFs</title>
    <dc:date>2024-02-27T14:03:28+00:00</dc:date>
    <link>https://dangerzone.rocks/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf utilities</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:10140e513102/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:utilities"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://fleuret.org/public/lbdl.pdf">
    <title>The Little Book of Deep Learning</title>
    <dc:date>2023-05-22T00:18:33+00:00</dc:date>
    <link>https://fleuret.org/public/lbdl.pdf</link>
    <dc:creator>dwillis</dc:creator><dc:subject>machinelearning pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:b8a11d1918fc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nytimes.com/2023/05/15/insider/finding-order-in-a-thicket-of-nonprofit-data.html?unlocked_article_code=SDZwc-NwggX6sn_rT3zbWazjLheR9hhH4q20fg3p5EMELdJnjmYkvm4cqMwEuPjNY3jHYqtOd5AaKD4vuMcxzt1wZ1yF8vPzHKR4X3WD4QCaEPrVVvZ-fmrhFD_T46DdEsHCIZw0j8esEbs4i100tzBA6dOLzK7TI-ex_f3-wvPZJ8kGD6JS7-k7FVoxaGybAjg_d-4uKWXm-I8LYgznk0a50IdwSzu3Wua4le-TwbqHUZyairY_XIP2Py52UFDu_SSRNhkufmyLxpt8nV4VO5IvLnM6dg36af6jwiKLKcgCokYyGYSbIJ0xG5f8hCHKvwQtgFXbpnQ9B4H4nerfLWdLPf4WdFrJX-v7TZjGpKp6Xg&amp;smid=url-share">
    <title>Finding Order in a Thicket of Nonprofit Data - The New York Times</title>
    <dc:date>2023-05-16T00:26:00+00:00</dc:date>
    <link>https://www.nytimes.com/2023/05/15/insider/finding-order-in-a-thicket-of-nonprofit-data.html?unlocked_article_code=SDZwc-NwggX6sn_rT3zbWazjLheR9hhH4q20fg3p5EMELdJnjmYkvm4cqMwEuPjNY3jHYqtOd5AaKD4vuMcxzt1wZ1yF8vPzHKR4X3WD4QCaEPrVVvZ-fmrhFD_T46DdEsHCIZw0j8esEbs4i100tzBA6dOLzK7TI-ex_f3-wvPZJ8kGD6JS7-k7FVoxaGybAjg_d-4uKWXm-I8LYgznk0a50IdwSzu3Wua4le-TwbqHUZyairY_XIP2Py52UFDu_SSRNhkufmyLxpt8nV4VO5IvLnM6dg36af6jwiKLKcgCokYyGYSbIJ0xG5f8hCHKvwQtgFXbpnQ9B4H4nerfLWdLPf4WdFrJX-v7TZjGpKp6Xg&amp;smid=url-share</link>
    <dc:creator>dwillis</dc:creator><dc:subject>teaching pdf data</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:91d7fb2aefea/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:data"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://colab.research.google.com/github/jina-ai/workshops/blob/main/pdf_search/pdf_search.ipynb">
    <title>pdf_search.ipynb - Colaboratory</title>
    <dc:date>2022-07-30T00:31:30+00:00</dc:date>
    <link>https://colab.research.google.com/github/jina-ai/workshops/blob/main/pdf_search/pdf_search.ipynb</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf python search</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:87096a9529a2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:search"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://til.simonwillison.net/aws/ocr-pdf-textract">
    <title>Running OCR against a PDF file with AWS Textract | Simon Willison’s TILs</title>
    <dc:date>2022-06-28T20:30:05+00:00</dc:date>
    <link>https://til.simonwillison.net/aws/ocr-pdf-textract</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf teaching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:45acb83cb5d8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.computerworld.com/article/3660643/pdf-to-excel-conversion-ultimate-guide-best-tools.html">
    <title>PDF to Excel conversion: Your ultimate guide to the best tools | Computerworld</title>
    <dc:date>2022-05-24T18:50:58+00:00</dc:date>
    <link>https://www.computerworld.com/article/3660643/pdf-to-excel-conversion-ultimate-guide-best-tools.html</link>
    <dc:creator>dwillis</dc:creator><dc:subject>teaching pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:d1556c59b5d5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://source.opennews.org/articles/so-many-ocr-options/">
    <title>Our Search for the Best OCR Tool, and What We Found - Features - Source: An OpenNews project</title>
    <dc:date>2021-08-27T17:22:37+00:00</dc:date>
    <link>https://source.opennews.org/articles/so-many-ocr-options/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf ocr teaching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:e6681b96b893/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.filingdb.com/pdf-text-extraction">
    <title>PDF text extraction | FilingDB</title>
    <dc:date>2020-03-09T14:55:39+00:00</dc:date>
    <link>https://www.filingdb.com/pdf-text-extraction</link>
    <dc:creator>dwillis</dc:creator><dc:subject>teaching pdf</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:fcc24cef445b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.law.cornell.edu/tbruce/2013/06/14/pdf-re-sewing-the-blanket/">
    <title>PDF: re-stitching the blanket » b-screeds</title>
    <dc:date>2013-06-14T16:10:59+00:00</dc:date>
    <link>http://blog.law.cornell.edu/tbruce/2013/06/14/pdf-re-sewing-the-blanket/</link>
    <dc:creator>dwillis</dc:creator><description><![CDATA[RT @trbruce: New blog post exorcising old PDF-demons that possess legislation, at ]]></description>
<dc:subject>pdf data congress</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:470f93f4ba15/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:congress"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://source.mozillaopennews.org/en-US/articles/introducing-tabula/">
    <title>Introducing Tabula - Features - Source: An OpenNews project</title>
    <dc:date>2013-04-03T17:09:12+00:00</dc:date>
    <link>http://source.mozillaopennews.org/en-US/articles/introducing-tabula/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>teaching pdf duke</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:dwillis/b:ea6e7a550e59/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:teaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:duke"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://documentcloud.github.com/docsplit/">
    <title>Doc⚡split</title>
    <dc:date>2009-12-07T15:43:50+00:00</dc:date>
    <link>http://documentcloud.github.com/docsplit/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>ruby pdf</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:4b1e3dd10a70/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ruby"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://siadapp.dmdc.osd.mil/personnel/MILITARY/miltop.htm">
    <title>Military Personnel Statistics</title>
    <dc:date>2009-10-22T17:46:41+00:00</dc:date>
    <link>http://siadapp.dmdc.osd.mil/personnel/MILITARY/miltop.htm</link>
    <dc:creator>dwillis</dc:creator><dc:subject>military statistics pdf</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:5b5dd72f1b89/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:military"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://clerk.house.gov/public_disc/financial.html">
    <title>Office of the Clerk</title>
    <dc:date>2009-06-08T16:05:37+00:00</dc:date>
    <link>http://clerk.house.gov/public_disc/financial.html</link>
    <dc:creator>dwillis</dc:creator><description><![CDATA[financial disclosures now available by member on House site.
]]></description>
<dc:subject>congress disclosures pdf</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:66a0ce3c5933/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:congress"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:disclosures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.unixuser.org/~euske/python/pdfminer/index.html">
    <title>PDFMiner</title>
    <dc:date>2008-07-28T15:38:59+00:00</dc:date>
    <link>http://www.unixuser.org/~euske/python/pdfminer/index.html</link>
    <dc:creator>dwillis</dc:creator><dc:subject>python pdf utilities</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:f64e4e2b5954/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:utilities"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://skim-app.sourceforge.net/">
    <title>Skim | Home</title>
    <dc:date>2007-04-02T19:55:08+00:00</dc:date>
    <link>http://skim-app.sourceforge.net/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf osx</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:130d7f2b5c9e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:osx"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://pybrary.net/pyPdf/">
    <title>pyPdf</title>
    <dc:date>2006-11-15T17:01:08+00:00</dc:date>
    <link>http://pybrary.net/pyPdf/</link>
    <dc:creator>dwillis</dc:creator><dc:subject>python pdf</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:d5c466f41884/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.pdfstore.com/details.asp?ProdID=130&amp;nl=ps">
    <title>PDF Store - Gemini - Multi-format PDF content extraction plug-in for Adobe Acrobat.</title>
    <dc:date>2006-03-15T02:40:12+00:00</dc:date>
    <link>http://www.pdfstore.com/details.asp?ProdID=130&amp;nl=ps</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf ocr</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:63d643e42318/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blogs.adobe.com/acrolaw/2005/10/batch_ocr_using_1.html">
    <title>Acrobat for Legal Professionals: Batch OCR using Acrobat Professional</title>
    <dc:date>2006-03-01T22:56:40+00:00</dc:date>
    <link>http://blogs.adobe.com/acrolaw/2005/10/batch_ocr_using_1.html</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf ocr</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:1bd2d415c8da/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.pdfforlawyers.com/2004/04/ocr_tutorial_fo.html">
    <title>PDF for Lawyers: OCR Tutorial for Acrobat 6</title>
    <dc:date>2006-03-01T22:56:14+00:00</dc:date>
    <link>http://www.pdfforlawyers.com/2004/04/ocr_tutorial_fo.html</link>
    <dc:creator>dwillis</dc:creator><dc:subject>pdf ocr</dc:subject>
<dc:identifier>https://pinboard.in/u:dwillis/b:761a6bce9c7f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:dwillis/t:ocr"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>