<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (jm)</title>
    <link>https://pinboard.in/u:jm/public/</link>
    <description>recent bookmarks from jm</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://mahmoud-salem.net/the-invisible-shield"/>
	<rdf:li rdf:resource="https://orlp.net/blog/breaking-hash-functions/"/>
	<rdf:li rdf:resource="https://nochlin.com/blog/how-the-new-sqlite3_rsync-utility-works"/>
	<rdf:li rdf:resource="https://github.com/Cyan4973/xxHash/"/>
	<rdf:li rdf:resource="https://blog.centminmod.com/2021/01/30/2214/fast-tar-and-rsync-transfer-speed-for-linux-backups-using-zstd-compression/"/>
	<rdf:li rdf:resource="https://github.com/dropbox/setsum"/>
	<rdf:li rdf:resource="https://blog.cloudflare.com/when-bloom-filters-dont-bloom/"/>
	<rdf:li rdf:resource="https://www.infoq.com/news/2020/01/blake3-fast-crypto-hash/"/>
	<rdf:li rdf:resource="https://forums.aws.amazon.com/thread.jspa?threadID=22709"/>
	<rdf:li rdf:resource="https://eprint.iacr.org/2020/014"/>
	<rdf:li rdf:resource="http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html"/>
	<rdf:li rdf:resource="https://github.com/multiformats/multihash"/>
	<rdf:li rdf:resource="https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/"/>
	<rdf:li rdf:resource="http://hpc.ac.upc.edu/PDFs/dir05/file004529.pdf"/>
	<rdf:li rdf:resource="https://github.com/google/highwayhash"/>
	<rdf:li rdf:resource="https://www.engadget.com/2017/11/10/the-naked-truth-about-facebook-s-revenge-porn-tool/"/>
	<rdf:li rdf:resource="https://www.theguardian.com/technology/2017/nov/07/facebook-revenge-porn-nude-photos"/>
	<rdf:li rdf:resource="http://stackoverflow.com/questions/843972/image-comparison-fast-algorithm/844113#844113"/>
	<rdf:li rdf:resource="http://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion"/>
	<rdf:li rdf:resource="http://gwolf.org/node/4070"/>
	<rdf:li rdf:resource="https://en.m.wikipedia.org/wiki/Rendezvous_hashing"/>
	<rdf:li rdf:resource="https://blake2.net/blake2.pdf"/>
	<rdf:li rdf:resource="http://www.johndcook.com/blog/2016/01/30/general-birthday-problem/"/>
	<rdf:li rdf:resource="http://blog.silentsignal.eu/2015/06/10/poisonous-md5-wolves-among-the-sheep/"/>
	<rdf:li rdf:resource="https://github.com/trendmicro/tlsh"/>
	<rdf:li rdf:resource="http://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf"/>
	<rdf:li rdf:resource="http://davidjohnstone.net/pages/hash-collision-probability"/>
	<rdf:li rdf:resource="http://natmchugh.blogspot.co.uk/2014/10/how-i-created-two-images-with-same-md5.html"/>
	<rdf:li rdf:resource="http://corte.si/%2Fposts/code/bloom-filter-rules-of-thumb/index.html"/>
	<rdf:li rdf:resource="http://matthewcasperson.blogspot.ie/2013/11/minhash-for-dummies.html"/>
	<rdf:li rdf:resource="https://medium.com/@vijayp/f6bc289679a1"/>
	<rdf:li rdf:resource="http://arxiv.org/pdf/1406.2294v1.pdf"/>
	<rdf:li rdf:resource="http://www.awsarchitectureblog.com/2014/04/shuffle-sharding.html"/>
	<rdf:li rdf:resource="https://news.ycombinator.com/item?id=7506774"/>
	<rdf:li rdf:resource="http://www.eecs.harvard.edu/~michaelm/postscripts/esa2006b.pdf"/>
	<rdf:li rdf:resource="https://github.com/cscotta/recordinality"/>
	<rdf:li rdf:resource="http://courses.csail.mit.edu/6.851/spring12/lectures/"/>
	<rdf:li rdf:resource="http://jeremydhoon.github.com/2013/03/19/abusing-hash-kernels-for-wildly-unprincipled-machine-learning/"/>
	<rdf:li rdf:resource="http://stackoverflow.com/questions/14010906/given-that-hashmaps-in-jdk1-6-and-above-cause-problems-with-multi-threading-how"/>
	<rdf:li rdf:resource="http://fail0verflow.com/blog/2013/megafail.html"/>
	<rdf:li rdf:resource="https://www.131002.net/siphash/"/>
	<rdf:li rdf:resource="https://github.com/cloudera/impala/blob/master/be/src/experiments/hashing/cache-hash-table.h"/>
	<rdf:li rdf:resource="http://blog.headius.com/2012/09/avoiding-hash-lookups-in-ruby.html"/>
	<rdf:li rdf:resource="http://www.trailofbits.com/resources/flame-md5.pdf"/>
	<rdf:li rdf:resource="http://petermblair.com/fbl-n-gram-analyzer/"/>
	<rdf:li rdf:resource="http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html"/>
	<rdf:li rdf:resource="http://code.google.com/p/deeptoad/"/>
	<rdf:li rdf:resource="http://corte.si/posts/code/bloom-filter-rules-of-thumb/index.html"/>
	<rdf:li rdf:resource="http://rdist.root.org/2009/10/29/stop-using-unsafe-keyed-hashes-use-hmac/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://mahmoud-salem.net/the-invisible-shield">
    <title>How Do You Find an Illegal Image Without Looking at It?</title>
    <dc:date>2026-04-07T10:18:58+00:00</dc:date>
    <link>https://mahmoud-salem.net/the-invisible-shield</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A very good writeup of how illegal-image detection algorithms like PhotoDNA and PDQ work, and the Hasher-Matcher-Actioner three stage pattern

(via Erin Kissane)]]></description>
<dc:subject>csam detection filtering photodna pdq classifiers photos videos classification hashing fuzzy-hashing via:erin-kissane</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b9d795d6a889/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:csam"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:filtering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:photodna"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pdq"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:photos"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:videos"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fuzzy-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:erin-kissane"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://orlp.net/blog/breaking-hash-functions/">
    <title>Breaking CityHash64, MurmurHash2/3, wyhash, and more</title>
    <dc:date>2025-05-01T11:53:49+00:00</dc:date>
    <link>https://orlp.net/blog/breaking-hash-functions/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A bunch of new-to-me hash collision attacks on cityhash64, murmurhash2, murmurhash3, farmhash64, and wyhash]]></description>
<dc:subject>hashing security infosec hashdos collisions cityhash murmurhash farmhash wyhash</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b2090029dc7d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infosec"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashdos"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cityhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:murmurhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:farmhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:wyhash"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://nochlin.com/blog/how-the-new-sqlite3_rsync-utility-works">
    <title>How the New sqlite3_rsync Utility Works</title>
    <dc:date>2024-11-06T17:05:58+00:00</dc:date>
    <link>https://nochlin.com/blog/how-the-new-sqlite3_rsync-utility-works</link>
    <dc:creator>jm</dc:creator><description><![CDATA["I've enjoyed following the development of the new sqlite3_rsync utility in the SQLite project. The utility employs a bandwidth-efficient algorithm to synchronize new and modified pages from an origin SQLite database to a replica. You can learn more about the new utility here and try it out by following the instructions here.  Curious about its workings, I reviewed the code"

Interesting use of a truncated SHA-3 as the hash() implementation, for speed.]]></description>
<dc:subject>sqlite hashing rsync synchronization replication databases storage algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7f69d3bfe7d1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sqlite"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rsync"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:synchronization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:replication"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/Cyan4973/xxHash/">
    <title>Cyan4973/xxHash: Extremely fast non-cryptographic hash algorithm</title>
    <dc:date>2021-02-01T11:48:46+00:00</dc:date>
    <link>https://github.com/Cyan4973/xxHash/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[significantly faster than Murmur3 and City32; SSE code is even faster than sequential RAM reads :)]]></description>
<dc:subject>hashing hash xxhash performance coding speed algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:57cfebc0c1ce/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:xxhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:speed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://blog.centminmod.com/2021/01/30/2214/fast-tar-and-rsync-transfer-speed-for-linux-backups-using-zstd-compression/">
    <title>Fast Tar And Rsync Transfer Speed For Linux Backups Using Zstd Compression</title>
    <dc:date>2021-02-01T11:45:50+00:00</dc:date>
    <link>https://blog.centminmod.com/2021/01/30/2214/fast-tar-and-rsync-transfer-speed-for-linux-backups-using-zstd-compression/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[I can confirm, zstd is awesome -- must use xxHash more, too.

<blockquote>Newer Tar 1.32+ and Rsync 3.2.3 versions have added Facebook’s zstd compression algorithm and Rsync has added lz4 and xxHash checksum algorithms which give Tar and Rsync a tremendous boost in transfer speed.</blockquote>

]]></description>
<dc:subject>tar rsync backups xxhash hashing performance speed zstd compression lz4</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:93b4075d656b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rsync"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:backups"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:xxhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:speed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:zstd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:compression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lz4"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/dropbox/setsum">
    <title>dropbox/setsum</title>
    <dc:date>2020-12-08T22:33:57+00:00</dc:date>
    <link>https://github.com/dropbox/setsum</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Via Robert Escriva - 'the set-based checksum algorithm we made. Add items in any order and still get the same checksum. Union two independently created sets and get the same result as having done it as one iteration.']]></description>
<dc:subject>checksums hashing dropbox sums summarising algorithms streaming</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:82bacd312ca4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:checksums"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dropbox"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sums"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:summarising"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:streaming"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://blog.cloudflare.com/when-bloom-filters-dont-bloom/">
    <title>When Bloom filters don't bloom</title>
    <dc:date>2020-03-03T14:46:36+00:00</dc:date>
    <link>https://blog.cloudflare.com/when-bloom-filters-dont-bloom/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A good exploration into modern CPU/memory performance behaviour, and profiling same on Linux using "perf stat -d" and "google-perftools":

<blockquote>Modern CPUs are really good at sequential memory access when it's possible to predict memory fetch patterns (see Cache prefetching). Random memory access on the other hand is very costly.

Advanced data structures are very interesting, but beware. Modern computers require cache-optimized algorithms. When working with large datasets, not fitting L3, prefer optimizing for reduced number loads, over optimizing the amount of memory used.

I guess it's fair to say that Bloom filters are great, as long as they fit into the L3 cache. The moment this assumption is broken, they are terrible. This is not news, Bloom filters optimize for memory usage, not for memory access. For example, see the Cuckoo Filters paper.</blockquote>

]]></description>
<dc:subject>cloudflare bloom-filters performance data-structures cpu cache l3 hashing perf perftools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:0d0316cd680e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cloudflare"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bloom-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cpu"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cache"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:l3"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:perf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:perftools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.infoq.com/news/2020/01/blake3-fast-crypto-hash/">
    <title>BLAKE3</title>
    <dc:date>2020-02-24T17:28:59+00:00</dc:date>
    <link>https://www.infoq.com/news/2020/01/blake3-fast-crypto-hash/</link>
    <dc:creator>jm</dc:creator><description><![CDATA['an Extremely Fast, Parallel Cryptographic Hash': BLAKE3's authors published a benchmark on an Intel Cascade Lake-SP 8275CL processor showing it to be 5x faster than BLAKE2 and 15x faster than SHA3-256.]]></description>
<dc:subject>blake3 blake hashing hashes algorithms speed performance optimization sha</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:386b7e9bbdfd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:blake3"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:blake"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:speed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://forums.aws.amazon.com/thread.jspa?threadID=22709">
    <title>Historic S3 data corruption due to a fault load balancer</title>
    <dc:date>2020-01-22T14:05:12+00:00</dc:date>
    <link>https://forums.aws.amazon.com/thread.jspa?threadID=22709</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This came up in a discussion of using hashes for end-to-end data resiliency on the og-aws slack.  Turns out AWS support staff wrote it up at the time:

<blockquote>We've isolated this issue to a single load balancer that was brought into service at 10:55pm PDT on Friday, 6/20 [2008].  It was taken out of service at 11am PDT Sunday, 6/22.  While it was in service it handled a small fraction of Amazon S3's total requests in the US.  Intermittently, under load, it was corrupting single bytes in the byte stream.  When the requests reached Amazon S3, if the Content-MD5 header was specified, Amazon S3 returned an error indicating the object did not match the MD5 supplied.  When no MD5 is specified, we are unable to determine if transmission errors occurred, and Amazon S3 must assume that the object has been correctly transmitted. Based on our investigation with both internal and external customers, the small amount of traffic received by this particular load balancer, and the intermittent nature of the above issue on this one load balancer, this appears to have impacted a very small portion of PUTs during this time frame.

One of the things we'll do is improve our logging of requests with MD5s, so that we can look for anomalies in their 400 error rates.  Doing this will allow us to provide more proactive notification on potential transmission issues in the future, for customers who use MD5s and those who do not. In addition to taking the actions noted above, we encourage all of our customers to take advantage of mechanisms designed to protect their applications from incorrect data transmission.  For all PUT requests, Amazon S3 computes its own MD5, stores it with the object, and then returns the computed MD5 as part of the PUT response code in the ETag.  By validating the ETag returned in the response, customers can verify that Amazon S3 received the correct bytes even if the Content MD5 header wasn't specified in the PUT request.  Because network transmission errors can occur at any point between the customer and Amazon S3, we recommend that all customers use the Content-MD5 header and/or validate the ETag returned on a PUT request to ensure that the object was correctly transmitted.  This is a best practice that we'll emphasize more heavily in our documentation to help customers build applications that can handle this situation.</blockquote>

]]></description>
<dc:subject>aws s3 outages postmortems load-balancing data-corruption corruption failure md5 hashing hashes</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7067b5a9a1e4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:s3"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:outages"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:postmortems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-corruption"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:corruption"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:failure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://eprint.iacr.org/2020/014">
    <title>SHA-1 is a Shambles - First Chosen-Prefix Collision on SHA-1 and Application to the PGP Web of Trust</title>
    <dc:date>2020-01-07T15:08:10+00:00</dc:date>
    <link>https://eprint.iacr.org/2020/014</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>
Abstract: The SHA-1 hash function was designed in 1995 and has been widely used during two decades. A theoretical collision attack was first proposed in 2004 [WYY05], but due to its high complexity it was only implemented in practice in 2017, using a large GPU cluster [SBK+17]. More recently, an almost practical chosen-prefix collision attack against SHA-1 has been proposed [LP19]. This more powerful attack allows to build colliding messages with two arbitrary prefixes, which is much more threatening for real protocols.

In this paper, we report the first practical implementation of this attack, and its impact on real-world security with a PGP/GnuPG impersonation attack. We managed to significantly reduce the complexity of collisions attack against SHA-1: on an Nvidia GTX 970, identical-prefix collisions can now be computed with a complexity of 261.2261.2 rather than 264.7264.7, and chosen-prefix collisions with a complexity of 263.4263.4 rather than 267.1267.1. When renting cheap GPUs, this translates to a cost of 11k US\$ for a collision, and 45k US\$ for a chosen-prefix collision, within the means of academic researchers. Our actual attack required two months of computations using 900 Nvidia GTX 1060 GPUs (we paid 75k US\$ because GPU prices were higher, and we wasted some time preparing the attack).

Therefore, the same attacks that have been practical on MD5 since 2009 are now practical on SHA-1. In particular, chosen-prefix collisions can break signature schemes and handshake security in secure channel protocols (TLS, SSH). We strongly advise to remove SHA-1 from those type of applications as soon as possible. We exemplify our cryptanalysis by creating a pair of PGP/GnuPG keys with different identities, but colliding SHA-1 certificates. A SHA-1 certification of the first key can therefore be transferred to the second key, leading to a forgery. This proves that SHA-1 signatures now offers virtually no security in practice. The legacy branch of GnuPG still uses SHA-1 by default for identity certifications, but after notifying the authors, the modern branch now rejects SHA-1 signatures (the issue is tracked as CVE-2019-14855).</blockquote>

(Via Tony Finch)]]></description>
<dc:subject>via:fanf security sha sha-1 crypto hashes hashing pgp gpg collisions</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:468127bda2ca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha-1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pgp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gpg"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html">
    <title>XXH3</title>
    <dc:date>2019-03-19T10:51:56+00:00</dc:date>
    <link>http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA['a cross-over inspired by many other great hash algorithms, which proves substantially faster than existing variants of xxHash, across basically all dimensions.'
]]></description>
<dc:subject>hashing algorithms xxhash xxh3 checksums performance</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:abc914fca9e9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:xxhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:xxh3"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:checksums"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/multiformats/multihash">
    <title>multiformats/multihash: Self describing hashes - for future proofing</title>
    <dc:date>2018-09-17T16:29:24+00:00</dc:date>
    <link>https://github.com/multiformats/multihash</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Prepend a hash ID to the hash output, as used in ipfs]]></description>
<dc:subject>ipfs hashing multihash crypto hashes sha</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c9c746b195ae/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ipfs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:multihash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/">
    <title>Fibonacci Hashing: The Optimization that the World Forgot (or: a Better Alternative to Integer Modulo)</title>
    <dc:date>2018-06-18T10:23:24+00:00</dc:date>
    <link>https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Turns out I was wrong. This is a big one. And everyone should be using it. Hash tables should not be prime number sized and they should not use an integer modulo to map hashes into slots. Fibonacci hashing is just better. Yet somehow nobody is using it and lots of big hash tables (including all the big implementations of std::unordered_map) are much slower than they should be because they don’t use Fibonacci Hashing.</blockquote>

Apparently this is binary multiplicative hashing, and Google's brotli, webp, and Snappy libs all use a constant derived heuristically from a compression test corpus along the same lines (see comments).

(Via Michael Fogleman)]]></description>
<dc:subject>algorithms hashing hash fibonacci golden-ratio coding hacks brotli webp snappy hash-tables hashmaps load-distribution</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:9fbbdd34c27e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fibonacci"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:golden-ratio"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hacks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:brotli"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:webp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:snappy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash-tables"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashmaps"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-distribution"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://hpc.ac.upc.edu/PDFs/dir05/file004529.pdf">
    <title>_Random Slicing: Efficient and Scalable Data Placement for Large-Scale Storage Systems_, ACM Transactions on Storage, July 2014</title>
    <dc:date>2018-05-29T09:53:48+00:00</dc:date>
    <link>http://hpc.ac.upc.edu/PDFs/dir05/file004529.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['The ever-growing amount of data requires highly scalable storage solutions. The most flexible approach is to use storage pools that can be expanded and scaled down by adding or removing storage devices. To make this approach usable, it is necessary to provide a solution to locate data items in such a dynamic environment. This article presents and evaluates the Random Slicing strategy, which incorporates lessons learned from table-based, rule-based, and pseudo-randomized hashing strategies and is able to provide a simple and efficient strategy that scales up to handle exascale data. Random Slicing keeps a small table with information about previous storage system insert and remove operations, drastically reducing the required amount of randomness while delivering a perfect load distribution.']]></description>
<dc:subject>randomness architecture algorithms storage hashing slicing scaling</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8a9fa65f6c59/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:randomness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:slicing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/google/highwayhash">
    <title>google/highwayhash: Fast strong hash functions: SipHash/HighwayHash</title>
    <dc:date>2018-01-12T13:43:51+00:00</dc:date>
    <link>https://github.com/google/highwayhash</link>
    <dc:creator>jm</dc:creator><description><![CDATA[HighwayHash: 'We have devised a new way of mixing inputs with AVX2 multiply and permute instructions. The multiplications are 32x32 -> 64 bits and therefore infeasible to reverse. Permuting equalizes the distribution of the resulting bytes. The internal state occupies four 256-bit AVX2 registers. Due to limitations of the instruction set, the registers are partitioned into two 512-bit halves that remain independent until the reduce phase. The algorithm outputs 64 bit digests or up to 256 bits at no extra cost. In addition to high throughput, the algorithm is designed for low finalization cost. The result is more than twice as fast as SipTreeHash.

We also provide an SSE4.1 version (80% as fast for large inputs and 95% as fast for short inputs), an implementation for VSX on POWER and a portable version (10% as fast). A third-party ARM implementation is referenced below.

Statistical analyses and preliminary cryptanalysis are given in https://arxiv.org/abs/1612.06257.'

(via Tony Finch)]]></description>
<dc:subject>siphash highwayhash via:fanf hashing hashes algorithms mac google hash</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c96748eca1a7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:siphash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:highwayhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mac"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.engadget.com/2017/11/10/the-naked-truth-about-facebook-s-revenge-porn-tool/">
    <title>The naked truth about Facebook’s revenge porn tool</title>
    <dc:date>2017-11-10T21:27:42+00:00</dc:date>
    <link>https://www.engadget.com/2017/11/10/the-naked-truth-about-facebook-s-revenge-porn-tool/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is absolutely spot on.

<blockquote>
If Facebook wanted to implement a truly trusted system for revenge porn victims, they could put the photo hashing on the user side of things -- so only the hash is transferred to Facebook. To verify the claim that the image is truly a revenge porn issue, the victim could have the images verified through a trusted revenge porn advocacy organization. Theoretically, the victim then would have a verified, privacy-safe version of the photo, and a hash that could be also sent to Google and other sites.
</blockquote>

]]></description>
<dc:subject>facebook privacy hashing pictures images revenge-porn abuse via:jwz</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:e9bd8d39864c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:privacy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pictures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:revenge-porn"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:abuse"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:jwz"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.theguardian.com/technology/2017/nov/07/facebook-revenge-porn-nude-photos">
    <title>Facebook asks users for nude photos in project to combat revenge porn</title>
    <dc:date>2017-11-08T09:44:37+00:00</dc:date>
    <link>https://www.theguardian.com/technology/2017/nov/07/facebook-revenge-porn-nude-photos</link>
    <dc:creator>jm</dc:creator><description><![CDATA[The photos are hashed, server-side, using the PhotoDNA hashing algorithm.  This would have been way way better if it ran locally, on user's phones, instead though.  Interesting to note that PhotoDNA claims to have a "1 in 10 billion" false positive rate according to https://www.itu.int/en/cop/case-studies/Documents/ICMEC_PhotoDNA.PDF]]></description>
<dc:subject>photodna hashing images facebook revenge-porn messenger nudes photos</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f9f8eec39539/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:photodna"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:revenge-porn"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:messenger"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nudes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:photos"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://stackoverflow.com/questions/843972/image-comparison-fast-algorithm/844113#844113">
    <title>Image comparison algorithms</title>
    <dc:date>2017-04-12T21:22:06+00:00</dc:date>
    <link>http://stackoverflow.com/questions/843972/image-comparison-fast-algorithm/844113#844113</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Awesome StackOverflow answer for detecting "similar" images -- promising approach to reimplement ffffound's similarity feature in mltshp, maybe]]></description>
<dc:subject>algorithms hashing comparison diff images similarity search ffffound mltshp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7cb94c5de107/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:comparison"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:diff"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:similarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ffffound"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mltshp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion">
    <title>Accidentally Quadratic — Rust hash iteration+reinsertion</title>
    <dc:date>2016-11-24T20:47:47+00:00</dc:date>
    <link>http://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>It was recently discovered that some surprising operations on Rust’s standard hash table types could go quadratic.</blockquote>

Quite a nice unexpected accidental detour into O(n^2)
]]></description>
<dc:subject>big-o hashing robin-hood-hashing siphash algorithms hashtables rust</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:927f9d69dc79/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-o"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:robin-hood-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:siphash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashtables"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rust"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://gwolf.org/node/4070">
    <title>Stop it with short PGP key IDs!</title>
    <dc:date>2016-06-08T11:03:44+00:00</dc:date>
    <link>http://gwolf.org/node/4070</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>What happened today? We still don't really know, but it seems we found a first potentially malicious collision — that is, the first "nonacademic" case. Enrico found two keys sharing the 9F6C6333 short ID, apparently belonging to the same person (as would be the case of Asheesh, mentioned above). After contacting Gustavo, though, he does not know about the second — That is, it can be clearly regarded as an impersonation attempt. Besides, what gave away this attempt are the signatures it has: Both keys are signed by what appears to be the same three keys: B29B232A, F2C850CA and 789038F2. Those three keys are not (yet?) uploaded to the keyservers, though... But we can expect them to appear at any point in the future. We don't know who is behind this, or what his purpose is. We just know this looks very evil.
Now, don't panic: Gustavo's key is safe. Same for his certifiers, Marga, Agustín and Maxy. It's just a 32-bit collision. So, in principle, the only parties that could be cheated to trust the attacker are humans, right? Nope.
Enrico tested on the PGP pathfinder & key statistics service, a keyserver that finds trust paths between any two arbitrary keys in the strong set. Surprise: The pathfinder works on the short key IDs, even when supplied full fingerprints. So, it turns out I have three faked trust paths into our impostor.</blockquote>

]]></description>
<dc:subject>pgp gpg keys collisions hashing security debian</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:67ea6e3fe421/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pgp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gpg"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:keys"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:debian"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://en.m.wikipedia.org/wiki/Rendezvous_hashing">
    <title>Rendezvous hashing - Wikipedia, the free encyclopedia</title>
    <dc:date>2016-04-13T14:01:11+00:00</dc:date>
    <link>https://en.m.wikipedia.org/wiki/Rendezvous_hashing</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>
Rendezvous or Highest Random Weight (HRW) hashing[1][2] is an algorithm that allows clients to achieve distributed agreement on a set of k options out of a possible set of n options. A typical application is when clients need to agree on which sites (or proxies) objects are to assigned to. When k is 1, it subsumes the goals of consistent hashing, using an entirely different method.</blockquote>

]]></description>
<dc:subject>hrw hashing hashes consistent-hashing rendezvous-hashing algorithms discovery distributed-computing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8be4d585c6d4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hrw"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:consistent-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rendezvous-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed-computing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://blake2.net/blake2.pdf">
    <title>BLAKE2: simpler, smaller, fast as MD5</title>
    <dc:date>2016-04-07T22:51:11+00:00</dc:date>
    <link>https://blake2.net/blake2.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['We present the cryptographic hash function BLAKE2, an improved version
of the SHA-3 finalist BLAKE optimized for speed in software. Target applications include
cloud storage, intrusion detection, or version control systems. BLAKE2 comes
in two main flavors: BLAKE2b is optimized for 64-bit platforms, and BLAKE2s for
smaller architectures. On 64-bit platforms, BLAKE2 is often faster than MD5, yet provides
security similar to that of SHA-3. We specify parallel versions BLAKE2bp and
BLAKE2sp that are up to 4 and 8 times faster, by taking advantage of SIMD and/or
multiple cores. BLAKE2 has more benefits than just speed: BLAKE2 uses up to 32%
less RAM than BLAKE, and comes with a comprehensive tree-hashing mode as well
as an efficient MAC mode.']]></description>
<dc:subject>crypto hash blake2 hashing blake algorithms sha1 sha3 simd performance mac</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:33cb0a51f577/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:blake2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:blake"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha3"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:simd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mac"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.johndcook.com/blog/2016/01/30/general-birthday-problem/">
    <title>The general birthday problem</title>
    <dc:date>2016-02-01T11:03:25+00:00</dc:date>
    <link>http://www.johndcook.com/blog/2016/01/30/general-birthday-problem/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Good explanation and scipy code for the birthday paradox and hash collisions]]></description>
<dc:subject>hashing hashes collisions birthday-problem birthday-paradox coding probability statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5e19813a6fb5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:birthday-problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:birthday-paradox"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.silentsignal.eu/2015/06/10/poisonous-md5-wolves-among-the-sheep/">
    <title>AV vendors still relying on MD5 to identify malware</title>
    <dc:date>2015-06-10T15:07:42+00:00</dc:date>
    <link>http://blog.silentsignal.eu/2015/06/10/poisonous-md5-wolves-among-the-sheep/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[oh dear. I can see how this happened -- in many cases they may not still have samples to derive new sums from :(]]></description>
<dc:subject>md5 hashing antivirus malware security via:fanf bugs</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:11ef4e54eeb8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:antivirus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:malware"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bugs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/trendmicro/tlsh">
    <title>Trend Micro Locality Sensitive Hash</title>
    <dc:date>2015-05-18T12:59:31+00:00</dc:date>
    <link>https://github.com/trendmicro/tlsh</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>a fuzzy matching library. Given a byte stream with a minimum length 
of 512 bytes, TLSH generates a hash value which can be used for similarity 
comparisons.  Similar objects will have similar hash values which allows for 
the detection of similar objects by comparing their hash values.  Note that 
the byte stream should have a sufficient amount of complexity.  For example, 
a byte stream of identical bytes will not generate a hash value.</blockquote>

Paper here: https://drive.google.com/file/d/0B6FS3SVQ1i0GTXk5eDl3Y29QWlk/edit

via adulau]]></description>
<dc:subject>nilsimsa sdhash ssdeep locality-sensitive hashing algorithm hashes trend-micro tlsh hash fuzzy-matching via:adulau</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:35798e024e53/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nilsimsa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sdhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ssdeep"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:locality-sensitive"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:trend-micro"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tlsh"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fuzzy-matching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:adulau"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf">
    <title>&quot;Cuckoo Filter: Practically Better Than Bloom&quot;</title>
    <dc:date>2015-03-09T14:29:55+00:00</dc:date>
    <link>http://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['We propose a new data structure called the cuckoo filter that can replace Bloom filters for approximate set membership
tests. Cuckoo filters support adding and removing items dynamically while achieving even higher performance than
Bloom filters. For applications that store many items and target moderately low false positive rates, cuckoo filters have
lower space overhead than space-optimized Bloom filters. Our experimental results also show that cuckoo filters outperform previous data structures that extend Bloom filters to support deletions substantially in both time and space.']]></description>
<dc:subject>algorithms paper bloom-filters cuckoo-filters cuckoo-hashing data-structures false-positives big-data probabilistic hashing set-membership approximation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a7df31b55f43/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:paper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bloom-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cuckoo-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cuckoo-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probabilistic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:set-membership"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:approximation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://davidjohnstone.net/pages/hash-collision-probability">
    <title>What's the probability of a hash collision?</title>
    <dc:date>2014-11-18T11:50:47+00:00</dc:date>
    <link>http://davidjohnstone.net/pages/hash-collision-probability</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Handy calculator]]></description>
<dc:subject>probability hashing hashes collision risk md5 sha sha1 calculators</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7941face31b6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collision"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:risk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:calculators"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://natmchugh.blogspot.co.uk/2014/10/how-i-created-two-images-with-same-md5.html">
    <title>How I created two images with the same MD5 hash</title>
    <dc:date>2014-11-04T18:14:08+00:00</dc:date>
    <link>http://natmchugh.blogspot.co.uk/2014/10/how-i-created-two-images-with-same-md5.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>I found that I was able to run the algorithm in about 10 hours on an AWS large GPU instance bringing it in at about $0.65 plus tax.</blockquote>

Bottom line: MD5 is feasibly attackable by pretty much anyone now.]]></description>
<dc:subject>crypto images md5 security hashing collisions ec2 via:hn</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:3b301b6423b9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ec2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:hn"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://corte.si/%2Fposts/code/bloom-filter-rules-of-thumb/index.html">
    <title>3 Rules of thumb for Bloom Filters</title>
    <dc:date>2014-08-25T21:06:48+00:00</dc:date>
    <link>http://corte.si/%2Fposts/code/bloom-filter-rules-of-thumb/index.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>I often need to do rough back-of-the-envelope reasoning about things, and I find that doing a bit of work to develop an intuition for how a new technique performs is usually worthwhile. So, here are three broad rules of thumb to remember when discussing Bloom filters down the pub:

One byte per item in the input set gives about a 2% false positive rate.

The optimal number of hash functions is about 0.7 times the number of bits per item.

3 - The number of hashes dominates performance.
</blockquote>

But see also http://stackoverflow.com/a/9554448 , http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf (thanks Tony Finch!)
]]></description>
<dc:subject>bloom-filters algorithm probabilistic rules reasoning via:norman-maurer false-positives hashing coding</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b369d6a01322/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bloom-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probabilistic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rules"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reasoning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:norman-maurer"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://matthewcasperson.blogspot.ie/2013/11/minhash-for-dummies.html">
    <title>MinHash for dummies</title>
    <dc:date>2014-08-05T10:28:58+00:00</dc:date>
    <link>http://matthewcasperson.blogspot.ie/2013/11/minhash-for-dummies.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A Java-oriented practical intro to the MinHash duplicate-detection shingling algo

]]></description>
<dc:subject>shingling algorithms minhash hashing duplicates duplicate-detection fuzzy-matching java</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:37541529ed34/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:shingling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:minhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:duplicates"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:duplicate-detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fuzzy-matching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/@vijayp/f6bc289679a1">
    <title>NYC generates hash-anonymised data dump, which gets reversed</title>
    <dc:date>2014-06-25T15:36:55+00:00</dc:date>
    <link>https://medium.com/@vijayp/f6bc289679a1</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>There are about 1000*26**3 = 21952000 or 22M possible medallion numbers. So, by calculating the md5 hashes of all these numbers (only 24M!), one can completely deanonymise the entire data. Modern computers are fast: so fast that computing the 24M hashes took less than 2 minutes.</blockquote>

(via Bruce Schneier)

The better fix is a HMAC (see http://benlog.com/2008/06/19/dont-hash-secrets/ ), or just to assign opaque IDs instead of hashing.]]></description>
<dc:subject>hashing sha1 md5 bruce-schneier anonymization deanonymization security new-york nyc taxis data big-data hmac keyed-hashing salting</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:86f2bc539afe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bruce-schneier"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anonymization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:deanonymization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:new-york"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nyc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:taxis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hmac"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:keyed-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:salting"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/pdf/1406.2294v1.pdf">
    <title>Jump Consistent Hash: A Fast, Minimal Memory, Consistent Hash Algorithm</title>
    <dc:date>2014-06-17T14:19:13+00:00</dc:date>
    <link>http://arxiv.org/pdf/1406.2294v1.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['a fast, minimal memory, consistent hash algorithm that can be expressed in about 5 lines of code. In comparison to the algorithm of Karger et al., jump consistent hash requires no storage, is faster, and does a better job of evenly dividing the key space among the buckets and of evenly dividing the workload when the number of buckets changes. Its main limitation is that the buckets must be numbered sequentially, which makes it more suitable for data storage applications than for distributed web caching.'

Implemented in Guava.  This is also noteworthy:

'Google has not applied for patent protection for this algorithm, and, as of this writing, has no plans to. Rather, it wishes to contribute this algorithm to the community.']]></description>
<dc:subject>hashing consistent-hashing google guava memory algorithms sharding</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7990efcb5b77/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:consistent-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:guava"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:memory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sharding"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.awsarchitectureblog.com/2014/04/shuffle-sharding.html">
    <title>Shuffle Sharding</title>
    <dc:date>2014-04-15T10:59:26+00:00</dc:date>
    <link>http://www.awsarchitectureblog.com/2014/04/shuffle-sharding.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Colm MacCarthaigh writes about a simple sharding/load-balancing algorithm which uses randomized instance selection and optional additional compartmentalization.  See also: continuous hashing, and http://aphyr.com/posts/278-timelike-2-everything-fails-all-the-time]]></description>
<dc:subject>hashing load-balancing sharding partitions dist-sys distcomp architecture coding</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:22d91731447c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sharding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:partitions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dist-sys"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://news.ycombinator.com/item?id=7506774">
    <title>Redis adds support for HyperLogLog</title>
    <dc:date>2014-04-02T10:38:41+00:00</dc:date>
    <link>https://news.ycombinator.com/item?id=7506774</link>
    <dc:creator>jm</dc:creator><description><![CDATA[good comment thread on HN, discussing hlld and bloomd as well]]></description>
<dc:subject>hll bloom-filters hyperloglog redis data-structures estimation cardinality probabilistic probability hashing random</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1231febb74e0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hll"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bloom-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hyperloglog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cardinality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probabilistic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:random"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.eecs.harvard.edu/~michaelm/postscripts/esa2006b.pdf">
    <title>_An Improved Construction For Counting Bloom Filters_</title>
    <dc:date>2013-09-18T21:43:29+00:00</dc:date>
    <link>http://www.eecs.harvard.edu/~michaelm/postscripts/esa2006b.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['A counting Bloom filter (CBF) generalizes a Bloom filter data structure so as to allow membership queries on a set that can be changing dynamically via insertions and deletions. As with a Bloom filter, a CBF obtains space savings by allowing false positives. We provide a simple hashing-based alternative based on d-left hashing called a d-left CBF (dlCBF). The dlCBF offers the same functionality as a CBF, but uses less space, generally saving a factor of two or more. We describe the construction of dlCBFs, provide an analysis, and demonstrate their effectiveness experimentally']]></description>
<dc:subject>bloom-filter data-structures algorithms counting cbf storage false-positives d-left-hashing hashing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:77b7dfebb1ae/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bloom-filter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:counting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cbf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:d-left-hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/cscotta/recordinality">
    <title>Recordinality</title>
    <dc:date>2013-08-20T20:41:05+00:00</dc:date>
    <link>https://github.com/cscotta/recordinality</link>
    <dc:creator>jm</dc:creator><description><![CDATA[a new, and interesting, sketching algorithm, with a Java implementation:

<blockquote>Recordinality is unique in that it provides cardinality estimation like HLL, but also offers "distinct value sampling." This means that Recordinality can allow us to fetch a random sample of distinct elements in a stream, invariant to cardinality. Put more succinctly, given a stream of elements containing 1,000,000 occurrences of 'A' and one occurrence each of 'B' - 'Z', the probability of any letter appearing in our sample is equal. Moreover, we can also efficiently store the number of times elements in our distinct sample have been observed. This can help us to understand the distribution of occurrences of elements in our stream. With it, we can answer questions like "do the elements we've sampled present in a power law-like pattern, or is the distribution of occurrences relatively even across the set?"
</blockquote>

]]></description>
<dc:subject>sketching coding algorithms recordinality cardinality estimation hll hashing murmurhash java</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:56d75229aca1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sketching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:recordinality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cardinality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hll"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:murmurhash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://courses.csail.mit.edu/6.851/spring12/lectures/">
    <title>Lectures in Advanced Data Structures (6.851)</title>
    <dc:date>2013-04-29T10:32:24+00:00</dc:date>
    <link>http://courses.csail.mit.edu/6.851/spring12/lectures/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Good lecture notes on the current state of the art in data structure research.

<blockquote>Data structures play a central role in modern computer science. You interact with data structures even more often than with algorithms (think Google, your mail server, and even your network routers). In addition, data structures are essential building blocks in obtaining efficient algorithms. This course covers major results and current directions of research in data structures:

TIME TRAVEL We can remember the past efficiently (a technique called persistence), but in general it's difficult to change the past and see the outcomes on the present (retroactivity). So alas, Back To The Future isn't really possible.
GEOMETRY When data has more than one dimension (e.g. maps, database tables).
DYNAMIC OPTIMALITY Is there one binary search tree that's as good as all others? We still don't know, but we're close.
MEMORY HIERARCHY Real computers have multiple levels of caches. We can optimize the number of cache misses, often without even knowing the size of the cache.
HASHING Hashing is the most used data structure in computer science. And it's still an active area of research.
INTEGERS Logarithmic time is too easy. By careful analysis of the information you're dealing with, you can often reduce the operation times substantially, sometimes even to constant. We will also cover lower bounds that illustrate when this is not possible.
DYNAMIC GRAPHS A network link went down, or you just added or deleted a friend in a social network. We can still maintain essential information about the connectivity as it changes.
STRINGS Searching for phrases in giant text (think Google or DNA).
SUCCINCT Most “linear size” data structures you know are much larger than they need to be, often by an order of magnitude. Some data structures require almost no space beyond the raw data but are still fast (think heaps, but much cooler).
</blockquote>

(via Tim Freeman)]]></description>
<dc:subject>data-structures lectures mit video data algorithms coding csail strings integers hashing sorting bst memory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5c72d87f4ea4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lectures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:video"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:csail"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:strings"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:integers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sorting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bst"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:memory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jeremydhoon.github.com/2013/03/19/abusing-hash-kernels-for-wildly-unprincipled-machine-learning/">
    <title>Abusing hash kernels for wildly unprincipled machine learning</title>
    <dc:date>2013-04-04T23:01:51+00:00</dc:date>
    <link>http://jeremydhoon.github.com/2013/03/19/abusing-hash-kernels-for-wildly-unprincipled-machine-learning/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[what, is this the first time our spam filtering approach of hashing a giant feature space is hitting mainstream machine learning?  that can't be right!]]></description>
<dc:subject>ai machine-learning python data hashing features feature-selection anti-spam spamassassin</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:28d641a0b96e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:features"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:feature-selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anti-spam"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spamassassin"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://stackoverflow.com/questions/14010906/given-that-hashmaps-in-jdk1-6-and-above-cause-problems-with-multi-threading-how">
    <title>java - Given that HashMaps in jdk1.6 and above cause problems with multi-threading, how should I fix my code - Stack Overflow</title>
    <dc:date>2013-02-01T11:49:23+00:00</dc:date>
    <link>http://stackoverflow.com/questions/14010906/given-that-hashmaps-in-jdk1-6-and-above-cause-problems-with-multi-threading-how</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Massive Java concurrency fail in recent 1.6 and 1.7 JDK releases -- the java.util.HashMap type now spin-locks on an AtomicLong in its constructor.

Here's the response from the author: 'I'll acknowledge right up front that the initialization of hashSeed is a bottleneck but it is not one we expected to be a problem since it only happens once per Hash Map instance. For this code to be a bottleneck you would have to be creating hundreds or thousands of hash maps per second. This is certainly not typical. Is there really a valid reason for your application to be doing this? How long do these hash maps live?'

Oh dear.  Assumptions of "typical" like this are not how you design a fundamental data structure.  fail.   For now there is a hacky reflection-based workaround, but this is lame and needs to be fixed as soon as possible. (Via cscotta)]]></description>
<dc:subject>java hashmap concurrency bugs fail security hashing jdk via:cscotta</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8b7f56ad583d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashmap"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:concurrency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bugs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fail"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jdk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:cscotta"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://fail0verflow.com/blog/2013/megafail.html">
    <title>fail0verflow ::</title>
    <dc:date>2013-01-23T09:37:29+00:00</dc:date>
    <link>http://fail0verflow.com/blog/2013/megafail.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Excellent demo of how use of a block cipher with a known secret key makes an insecure MAC.  "In short, CBC-MAC is a Message Authentication Code, not a strong hash function. While MACs can be built out of hash functions (e.g. HMAC), and hash functions can be built out of block ciphers like AES, not all MACs are also hash functions. CBC-MAC in particular is completely unsuitable for use as a hash function, because it only allows two parties with knowledge of a particular secret key to securely transmit messages between each other. Anyone with knowledge of that key can forge the messages in a way that keeps the MAC (“hash value”) the same. All you have to do is run the forged message through CBC-MAC as usual, then use the AES decryption operation on the original hash value to find the last intermediate state. XORing this state with the CBC-MAC for the forged message yields a new block of data which, when appended to the forged message, will cause it to have the original hash value. Because the input is taken backwards, you can either modify the first block of the file, or just run the hash function backwards until you reach the block that you want to modify. You can make a forged file pass the hash check as long as you can modify an arbitrary aligned 16-byte block in it."]]></description>
<dc:subject>crypto hashing security cbc mac sha1 aes</dc:subject>
<dc:identifier>https://pinboard.in/u:jm/b:dd79c7b9bdc3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cbc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mac"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aes"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.131002.net/siphash/">
    <title>SipHash: a fast short-input PRF</title>
    <dc:date>2012-10-28T21:33:51+00:00</dc:date>
    <link>https://www.131002.net/siphash/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[a family of pseudorandom functions optimized for short inputs. Target applications include network traffic authentication and hash-table lookups protected against hash-flooding denials-of-service attacks. 

SipHash is simpler than MACs based on universal hashing, and faster on short inputs. 

Compared to dedicated designs for hash-table lookup, SipHash has well-defined security goals and competitive performance. For example, SipHash processes a 16-byte input with a fresh key in 140 cycles on an AMD FX-8150 processor, which is much faster than state-of-the-art MACs.]]></description>
<dc:subject>hashing siphash djb security algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ed75c7d5a6ba/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:siphash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:djb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/cloudera/impala/blob/master/be/src/experiments/hashing/cache-hash-table.h">
    <title>experimental CPU-cache-aware hash table implementations in Cloudera's Impala</title>
    <dc:date>2012-10-24T16:38:11+00:00</dc:date>
    <link>https://github.com/cloudera/impala/blob/master/be/src/experiments/hashing/cache-hash-table.h</link>
    <dc:creator>jm</dc:creator><description><![CDATA[via Todd Lipcon -- https://twitter.com/tlipcon/status/261113382642532352

'another cool piece of cloudera impala source: cpu-cache-aware hash table implementations by @jackowayed'.  'L1-sized hash table that hopes to use cache well. Each bucket is a chunk list of tuples. Each chunk is a cache line.']]></description>
<dc:subject>hashing hash-tables data-structures performance c++ l1 cache cpu</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1122a37fd9d7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash-tables"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:c++"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:l1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cache"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cpu"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.headius.com/2012/09/avoiding-hash-lookups-in-ruby.html">
    <title>Avoiding Hash Lookups in a Ruby Implementation</title>
    <dc:date>2012-09-05T09:13:05+00:00</dc:date>
    <link>http://blog.headius.com/2012/09/avoiding-hash-lookups-in-ruby.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA['If I were to sum up the past 6 years I've spent optimizing JRuby it would be with the following phrase: Get Rid Of Hash Lookups.'

This has been a particular theme of some recent optimization hacks I've been working on. Hashes may be O(1) to read, on average, but that doesn't necessarily mean they're the right tool for performance...

(via Declan McGrath)]]></description>
<dc:subject>via:declanmcgrath hash optimization ruby performance jruby hashing data-structures big-o optimisation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f9de450427ec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:declanmcgrath"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ruby"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jruby"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-o"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:optimisation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.trailofbits.com/resources/flame-md5.pdf">
    <title>Analyzing Flame's MD5 Collision Attack [slides, PDF]</title>
    <dc:date>2012-06-11T23:36:36+00:00</dc:date>
    <link>http://www.trailofbits.com/resources/flame-md5.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[really detailed slide deck by Alex Sotirov, Co-Founder and Chief Scientist, Trail of Bits, Inc.  (via Tony Finch)  Plenty of security fail by MS, and also: PKI is clearly too hard]]></description>
<dc:subject>via:fanf flame security malware md5 collisions hashing pki tls ssl microsoft</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1e484697f020/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:flame"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:malware"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:collisions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pki"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tls"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ssl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:microsoft"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://petermblair.com/fbl-n-gram-analyzer/">
    <title>feedback loop n-gram analyzer</title>
    <dc:date>2011-09-29T21:10:15+00:00</dc:date>
    <link>http://petermblair.com/fbl-n-gram-analyzer/</link>
    <dc:creator>jm</dc:creator><description><![CDATA['a simple parser of ARF compliant FBL complaints, which normalizes the email complaints and generates a 6-tuple n-gram version of the message. These n-grams are stored in a Redis database, keyed by the file in which they can be found. An inverse index also exists that allow you to find all messages containing a particular n-gram word.'
]]></description>
<dc:subject>anti-spam spam fbl feedback filtering n-grams similarity hashing redis searching</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:00bea3b79665/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anti-spam"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spam"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fbl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:feedback"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:filtering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:n-grams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:similarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:searching"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html">
    <title>Dr. Neal Krawetz explains perceptual hashing</title>
    <dc:date>2011-06-07T22:42:12+00:00</dc:date>
    <link>http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[ie. TinEye and other "images like this one" search engines.  nice explanation]]></description>
<dc:subject>algorithm images analysis programming dct hashing perceptual-hash tineye via:hn image</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f0804de861e3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dct"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:perceptual-hash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tineye"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:hn"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:image"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://code.google.com/p/deeptoad/">
    <title>deeptoad - Project Hosting on Google Code</title>
    <dc:date>2010-11-30T23:29:17+00:00</dc:date>
    <link>http://code.google.com/p/deeptoad/</link>
    <dc:creator>jm</dc:creator><description><![CDATA['a (python) library and a tool to clusterize similar files using fuzzy hashing techniques. This project is inspired by the well known tool ssdeep.' Via Nelson]]></description>
<dc:subject>via:nelson deeptoad software open-source fuzzy hashing</dc:subject>
<dc:identifier>https://pinboard.in/u:jm/b:4b09934a1883/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:nelson"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:deeptoad"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:software"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:open-source"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fuzzy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://corte.si/posts/code/bloom-filter-rules-of-thumb/index.html">
    <title>3 Rules of thumb for Bloom Filters</title>
    <dc:date>2010-11-09T00:08:21+00:00</dc:date>
    <link>http://corte.si/posts/code/bloom-filter-rules-of-thumb/index.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[good to know (via Jeremy)]]></description>
<dc:subject>via:jzawodny bloom-filters hashing algorithms coding tips false-positives</dc:subject>
<dc:identifier>https://pinboard.in/u:jm/b:a6801bafe8ec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:jzawodny"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bloom-filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tips"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:false-positives"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://rdist.root.org/2009/10/29/stop-using-unsafe-keyed-hashes-use-hmac/">
    <title>Stop using unsafe keyed hashes, use HMAC</title>
    <dc:date>2009-10-30T22:23:02+00:00</dc:date>
    <link>http://rdist.root.org/2009/10/29/stop-using-unsafe-keyed-hashes-use-hmac/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[why HMAC is more secure than secret-suffix and secret-prefix keyed hashing.  good to know]]></description>
<dc:subject>hmac security crypto hashing md5 hashes sha256 sha1</dc:subject>
<dc:identifier>https://pinboard.in/u:jm/b:e18fe54cec21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hmac"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:security"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crypto"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:md5"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha256"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sha1"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>