<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (jm)</title>
    <link>https://pinboard.in/u:jm/public/</link>
    <description>recent bookmarks from jm</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://github.com/mautrix/whatsapp?tab=readme-ov-file"/>
	<rdf:li rdf:resource="https://crowdview.ai/"/>
	<rdf:li rdf:resource="https://aws.amazon.com/about-aws/whats-new/2020/03/build-k-nearest-neighbor-similarity-search-engine-with-amazon-elasticsearch/"/>
	<rdf:li rdf:resource="https://www.theguardian.com/technology/2017/jul/09/everybody-lies-how-google-reveals-darkest-secrets-seth-stephens-davidowitz?CMP=fb_gu"/>
	<rdf:li rdf:resource="https://arstechnica.com/security/2016/05/google-dorking-when-pii-and-exploitable-bugs-are-only-a-search-away/"/>
	<rdf:li rdf:resource="https://medium.com/@nickgerleman/the-bkd-tree-da19cf9493fb#.2z8fzib60"/>
	<rdf:li rdf:resource="http://blog.phusion.nl/2010/12/06/efficient-substring-searching/"/>
	<rdf:li rdf:resource="http://www.bea.aero/fr/enquetes/vol.af.447/metron.search.analysis.pdf"/>
	<rdf:li rdf:resource="http://petermblair.com/fbl-n-gram-analyzer/"/>
	<rdf:li rdf:resource="http://www.irishtimes.com/newspaper/world/2011/0910/1224303851410.html"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://github.com/mautrix/whatsapp?tab=readme-ov-file">
    <title>GitHub - mautrix/whatsapp: A Matrix-WhatsApp puppeting bridge</title>
    <dc:date>2026-03-23T11:11:10+00:00</dc:date>
    <link>https://github.com/mautrix/whatsapp?tab=readme-ov-file</link>
    <dc:creator>jm</dc:creator><description><![CDATA[I've been investigating how I can back up my WhatsApp chat history and make it searchable (since WhatsApp's own built in search is not great).  Turns out you can bridge WhatsApp into Matrix, and gateway your chats over to a self-hosted Matrix.org server.  https://github.com/osteele/matrix-archive may then be a viable way to export those into a searchable format... or possibly this one? https://github.com/cameronaaron/matrix-archive/tree/master]]></description>
<dc:subject>matrix whatsapp messaging chat interop searching self-hosted</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:014855f8a243/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:matrix"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:whatsapp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:messaging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:chat"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:interop"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:self-hosted"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://crowdview.ai/">
    <title>CrowdView</title>
    <dc:date>2023-07-31T16:27:10+00:00</dc:date>
    <link>https://crowdview.ai/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[via Waxy, a search engine that exclusively searches discussion forums]]></description>
<dc:subject>search forums searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ef9a25fcca15/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:forums"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://aws.amazon.com/about-aws/whats-new/2020/03/build-k-nearest-neighbor-similarity-search-engine-with-amazon-elasticsearch/">
    <title>k-Nearest Neighbor (k-NN) similarity search engine with Amazon Elasticsearch</title>
    <dc:date>2020-03-04T11:48:22+00:00</dc:date>
    <link>https://aws.amazon.com/about-aws/whats-new/2020/03/build-k-nearest-neighbor-similarity-search-engine-with-amazon-elasticsearch/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Well, that's handy:

<blockquote>Amazon Elasticsearch Service now offers k-Nearest Neighbor (k-NN) search which can enhance search by similarity use cases like product recommendations, fraud detection, and image, video and semantic document retrieval. Built using the lightweight and efficient Non-Metric Space Library (NMSLIB), k-NN enables high scale, low latency nearest neighbor search on billions of documents across thousands of dimensions with the same ease as running any regular Elasticsearch query.  </blockquote>

]]></description>
<dc:subject>elasticsearch aws knn algorithms similarity searching search nmslib</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1ac9f1d202a0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:elasticsearch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:knn"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:similarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nmslib"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.theguardian.com/technology/2017/jul/09/everybody-lies-how-google-reveals-darkest-secrets-seth-stephens-davidowitz?CMP=fb_gu">
    <title>Everybody lies: how Google search reveals our darkest secrets | Technology | The Guardian</title>
    <dc:date>2017-07-10T13:00:53+00:00</dc:date>
    <link>https://www.theguardian.com/technology/2017/jul/09/everybody-lies-how-google-reveals-darkest-secrets-seth-stephens-davidowitz?CMP=fb_gu</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>What can we learn about ourselves from the things we ask online? US data scientist Seth Stephens‑Davidowitz analysed anonymous Google search results, uncovering disturbing truths about [America's] desires, beliefs and prejudices</blockquote>

Fascinating.  I find it equally interesting how flawed the existing methodologies for polling and surveying are, compared to Google's data, according to this]]></description>
<dc:subject>science big-data google lying surveys polling secrets data-science america racism searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:368c1e21158e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lying"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:surveys"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:polling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:secrets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:america"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:racism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arstechnica.com/security/2016/05/google-dorking-when-pii-and-exploitable-bugs-are-only-a-search-away/">
    <title>At the cost of security everywhere, Google dorking is still a thing | Ars Technica</title>
    <dc:date>2017-02-24T14:46:20+00:00</dc:date>
    <link>https://arstechnica.com/security/2016/05/google-dorking-when-pii-and-exploitable-bugs-are-only-a-search-away/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[I'd never heard of this term!]]></description>
<dc:subject>dorking google security searching web</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7fa259dd7da1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dorking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:web"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/@nickgerleman/the-bkd-tree-da19cf9493fb#.2z8fzib60">
    <title>The Bkd Tree</title>
    <dc:date>2016-01-04T10:44:17+00:00</dc:date>
    <link>https://medium.com/@nickgerleman/the-bkd-tree-da19cf9493fb#.2z8fzib60</link>
    <dc:creator>jm</dc:creator><description><![CDATA[good explanation of this new data structure for searching multidimensional data]]></description>
<dc:subject>search lucene bkd-trees searching data-structures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:d4a21d001bf0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lucene"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bkd-trees"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.phusion.nl/2010/12/06/efficient-substring-searching/">
    <title>Efficient substring searching</title>
    <dc:date>2014-03-31T13:44:45+00:00</dc:date>
    <link>http://blog.phusion.nl/2010/12/06/efficient-substring-searching/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is a couple of years old, but I like this:

<blockquote>Turbo Boyer-Moore is disappointing, its name doesn’t do it justice. In academia constant overhead doesn’t matter, but here we see that it matters a lot in practice. Turbo Boyer-Moore’s inner loop is so complex that we think we’re better off using the original Boyer-Moore.</blockquote>

A good demo of how large values of O(n) can be slower than small values of O(mn).]]></description>
<dc:subject>algorithms search strings coding big-o string-search searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:cad2a9fdecec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:strings"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-o"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:string-search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.bea.aero/fr/enquetes/vol.af.447/metron.search.analysis.pdf">
    <title>How the search for flight AF447 used Bayesian inference</title>
    <dc:date>2014-03-12T15:33:10+00:00</dc:date>
    <link>http://www.bea.aero/fr/enquetes/vol.af.447/metron.search.analysis.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Via jgc, the search for the downed Air France flight was optimized using this technique:

'Metron’s approach to this search planning problem is rooted in classical Bayesian inference, 
which allows organization of available data with associated uncertainties and computation of the 
Probability Distribution Function (PDF) for target location given these data. In following this 
approach, the first step was to gather the available information about the location of the impact site 
of the aircraft. This information was sometimes contradictory and filled with ambiguities and 
uncertainties. Using a Bayesian approach we organized this material into consistent scenarios, 
quantified the uncertainties with probability distributions, weighted the relative likelihood of each 
scenario, and performed a simulation to produce a prior PDF for the location of the wreck.']]></description>
<dc:subject>metron bayes bayesian-inference machine-learning statistics via:jgc air-france disasters probability inference searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:e7c127ca54da/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metron"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bayesian-inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:jgc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:air-france"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:disasters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://petermblair.com/fbl-n-gram-analyzer/">
    <title>feedback loop n-gram analyzer</title>
    <dc:date>2011-09-29T21:10:15+00:00</dc:date>
    <link>http://petermblair.com/fbl-n-gram-analyzer/</link>
    <dc:creator>jm</dc:creator><description><![CDATA['a simple parser of ARF compliant FBL complaints, which normalizes the email complaints and generates a 6-tuple n-gram version of the message. These n-grams are stored in a Redis database, keyed by the file in which they can be found. An inverse index also exists that allow you to find all messages containing a particular n-gram word.'
]]></description>
<dc:subject>anti-spam spam fbl feedback filtering n-grams similarity hashing redis searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:00bea3b79665/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anti-spam"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spam"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fbl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:feedback"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:filtering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:n-grams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:similarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.irishtimes.com/newspaper/world/2011/0910/1224303851410.html">
    <title>Dutch grepping Facebook for welfare fraud</title>
    <dc:date>2011-09-10T13:34:07+00:00</dc:date>
    <link>http://www.irishtimes.com/newspaper/world/2011/0910/1224303851410.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA['The [Dutch] councils are working with a specialist Amsterdam research firm, using the type of computer software previously deployed only in counterterrorism, monitoring [LinkedIn, Facebook and Twitter] traffic for keywords and cross-referencing any suspicious information with digital lists of social welfare recipients.

Among the giveaway terms, apparently, are “holiday” and “new car”. If the automated software finds a match between one of these terms and a person claiming social welfare payments, the information is passed on to investigators to gather real-life evidence.'  With a 30% false positive rate, apparently -- let's hope those investigations aren't too intrusive!]]></description>
<dc:subject>grep dutch holland via:tjmcintyre privacy facebook twitter linkedin welfare dole fraud false-positives searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:6616dc33ebe2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:grep"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dutch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:holland"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:tjmcintyre"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:linkedin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:welfare"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dole"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fraud"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>