<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (jonty)</title>
    <link>https://pinboard.in/u:jonty/public/</link>
    <description>recent bookmarks from jonty</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://x.com/dylfreed/status/1831075759747723709"/>
	<rdf:li rdf:resource="https://github.com/Dicklesworthstone/llm_aided_ocr"/>
	<rdf:li rdf:resource="http://documentcloud.github.com/docsplit/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://x.com/dylfreed/status/1831075759747723709">
    <title>Qwen2-VL-7B Instruct model gets *100%* accuracy extracting text from this handwritten document</title>
    <dc:date>2024-09-04T12:31:59+00:00</dc:date>
    <link>https://x.com/dylfreed/status/1831075759747723709</link>
    <dc:creator>jonty</dc:creator><description><![CDATA[<blockquote>The new Qwen2-VL-7B Instruct model gets *100%* accuracy extracting text from this handwritten document. This is the first open weights model (Apache 2.0) that I've seen OCR this accurately. (Thank you @fdaudens for the tip!)

https://t.co/AB9r3bKDF0</blockquote>]]></description>
<dc:subject>extraction text ai recognition image writing transcription ocr</dc:subject>
<dc:identifier>https://pinboard.in/u:jonty/b:ad6c7e7db9da/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:recognition"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:image"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:writing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:transcription"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:ocr"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/Dicklesworthstone/llm_aided_ocr">
    <title>Dicklesworthstone/llm_aided_ocr: Enhance Tesseract OCR output for scanned PDFs by applying Large Language Model (LLM) corrections.</title>
    <dc:date>2024-08-10T12:15:16+00:00</dc:date>
    <link>https://github.com/Dicklesworthstone/llm_aided_ocr</link>
    <dc:creator>jonty</dc:creator><description><![CDATA[<blockquote>Enhance Tesseract OCR output for scanned PDFs by applying Large Language Model (LLM) corrections. - Dicklesworthstone/llm_aided_ocr</blockquote>]]></description>
<dc:subject>ocr text scanning recognition llm model corrections</dc:subject>
<dc:identifier>https://pinboard.in/u:jonty/b:591d15829c59/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:scanning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:recognition"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:llm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:model"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:corrections"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://documentcloud.github.com/docsplit/">
    <title>Doc⚡split</title>
    <dc:date>2010-12-22T13:45:07+00:00</dc:date>
    <link>http://documentcloud.github.com/docsplit/</link>
    <dc:creator>jonty</dc:creator><description><![CDATA["Docsplit is a command-line utility and Ruby library for splitting apart documents into their component parts: searchable UTF-8 plain text via OCR if necessary, page images or thumbnails in any format, PDFs, single pages, and document metadata (title, author, number of pages...)"]]></description>
<dc:subject>ruby pdf document parsing ocr documents data processing split</dc:subject>
<dc:identifier>https://pinboard.in/u:jonty/b:eb51e92dfec7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:ruby"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:document"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:parsing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:ocr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:documents"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jonty/t:split"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>