<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (jm)</title>
    <link>https://pinboard.in/u:jm/public/</link>
    <description>recent bookmarks from jm</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://wthhyb.sacha.house/"/>
	<rdf:li rdf:resource="https://www.infoq.com/presentations/optimizing-java-app-kubernetes/"/>
	<rdf:li rdf:resource="https://www.citusdata.com/blog/2022/09/12/distributed-postgres-goes-full-open-source-with-citus/"/>
	<rdf:li rdf:resource="https://www.alexdebrie.com/posts/dynamodb-paper/"/>
	<rdf:li rdf:resource="https://github.com/apache/helix"/>
	<rdf:li rdf:resource="https://talawah.io/blog/extreme-http-performance-tuning-one-point-two-million/"/>
	<rdf:li rdf:resource="https://eng.lyft.com/operating-apache-kafka-clusters-24-7-without-a-global-ops-team-417813a5ce70"/>
	<rdf:li rdf:resource="http://perfdynamics.blogspot.com/2007/11/modeling-mythical-man-month.html"/>
	<rdf:li rdf:resource="https://apenwarr.ca/log/20190216"/>
	<rdf:li rdf:resource="https://www.dropbox.com/s/47xbjrkni9bx0g3/aurora2.pdf?dl=0"/>
	<rdf:li rdf:resource="https://github.com/awslabs/amazon-kinesis-scaling-utils"/>
	<rdf:li rdf:resource="https://aws.amazon.com/blogs/compute/running-high-scale-web-on-spot-instances/"/>
	<rdf:li rdf:resource="https://medium.com/netflix-techblog/netflix-edge-load-balancing-695308b5548c"/>
	<rdf:li rdf:resource="https://hackernoon.com/the-problems-with-dynamodb-auto-scaling-and-how-it-might-be-improved-a92029c8c10b"/>
	<rdf:li rdf:resource="https://groups.google.com/forum/#!msg/mechanical-sympathy/gchG_oQ_kQM/59BDMOdUAwAJ"/>
	<rdf:li rdf:resource="https://codahale.com/usl4j-and-you/"/>
	<rdf:li rdf:resource="https://engineering.ticketea.com/scaling-amazon-aurora-at-ticketea/?__s=gf36pf8g1gjugcqh6ppo"/>
	<rdf:li rdf:resource="http://tech.trivago.com/2017/01/25/learn-redis-the-hard-way-in-production/"/>
	<rdf:li rdf:resource="https://eng.uber.com/cherami/"/>
	<rdf:li rdf:resource="https://engineering.pinterest.com/blog/auto-scaling-pinterest"/>
	<rdf:li rdf:resource="https://twitter.com/frontstack/status/800889593855737856"/>
	<rdf:li rdf:resource="https://stripe.com/blog/service-discovery-at-stripe"/>
	<rdf:li rdf:resource="http://aseigneurin.github.io/2016/10/07/kafka-streams-scaling-up-or-down.html"/>
	<rdf:li rdf:resource="http://www.perfdynamics.com/Manifesto/USLscalability.html"/>
	<rdf:li rdf:resource="https://github.com/ifesdjeen/hashed-wheel-timer"/>
	<rdf:li rdf:resource="https://www.informatica.com/downloads/1568_high_perf_messaging_wp/Topics-in-High-Performance-Messaging.htm"/>
	<rdf:li rdf:resource="https://news.ycombinator.com/item?id=10608356"/>
	<rdf:li rdf:resource="http://blog.librato.com/posts/superchief"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2015/9/21/uber-goes-unconventional-using-driver-phones-as-a-backup-dat.html"/>
	<rdf:li rdf:resource="https://msol.io/blog/tech/2015/09/05/youre-probably-wrong-about-caching/"/>
	<rdf:li rdf:resource="https://docs.google.com/presentation/d/1OvJStE8aohGeI3y5BcYX8bBHwoHYCPu99A3KTTZElr0/edit#slide=id.gb74341dde_1_31"/>
	<rdf:li rdf:resource="https://www.youtube.com/watch?v=MKgJeqF1DHw"/>
	<rdf:li rdf:resource="http://blog.acolyer.org/2015/06/19/discretized-streams-fault-tolerant-stream-computing-at-scale/"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2015/6/8/leveraging-aws-to-build-a-scalable-data-pipeline.html"/>
	<rdf:li rdf:resource="http://www.benstopford.com/2015/04/28/elements-of-scale-composing-and-scaling-data-platforms/"/>
	<rdf:li rdf:resource="http://www.developer-tech.com/news/2014/jun/10/why-loggly-loves-apache-kafka-how-unbreakable-infinitely-scalable-messaging-makes-log-management-better/"/>
	<rdf:li rdf:resource="http://ferd.ca/lessons-learned-while-working-on-large-scale-server-software.html"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2015/3/30/how-we-scale-vividcortexs-backend-systems.html"/>
	<rdf:li rdf:resource="https://github.com/mmcgrana/services-engineering"/>
	<rdf:li rdf:resource="http://www.frankmcsherry.org/graph/scalability/cost/2015/01/15/COST.html"/>
	<rdf:li rdf:resource="http://www.awsarchitectureblog.com/2014/06/constant-work.html"/>
	<rdf:li rdf:resource="https://github.com/graphite-project/carbon/issues/235"/>
	<rdf:li rdf:resource="http://artsy.github.io/blog/2012/07/10/on-demand-jenkins-slaves-with-amazon-ec2/"/>
	<rdf:li rdf:resource="http://aws.amazon.com/blogs/aws/auto-scale-dynamodb-with-dynamic-dynamodb/"/>
	<rdf:li rdf:resource="http://googleresearch.blogspot.ie/2014/06/influential-papers-for-2013.html"/>
	<rdf:li rdf:resource="http://www.datacenterknowledge.com/archives/2014/06/25/google-dumps-mapreduce-favor-new-hyper-scale-analytics-system/"/>
	<rdf:li rdf:resource="http://bits.shutterstock.com/2014/05/22/stop-buying-load-balancers-and-start-controlling-your-traffic-flow-with-software/"/>
	<rdf:li rdf:resource="http://spark.apache.org/docs/latest/streaming-programming-guide.html#overview"/>
	<rdf:li rdf:resource="https://news.ycombinator.com/item?id=7711974"/>
	<rdf:li rdf:resource="https://code.facebook.com/posts/220956754772273/an-analysis-of-facebook-photo-caching/"/>
	<rdf:li rdf:resource="http://www.bailis.org/blog/scalable-atomic-visibility-with-ramp-transactions/"/>
	<rdf:li rdf:resource="http://www.erlang-factory.com/upload/presentations/558/efsf2012-whatsapp-scaling.pdf"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2014/2/5/littles-law-scalability-and-fault-tolerance-the-os-is-your-b.html"/>
	<rdf:li rdf:resource="http://www.inmobi.com/blog/2014/01/24/extending-graphites-mileage"/>
	<rdf:li rdf:resource="http://sigops.org/sosp/sosp13/papers/p33-david.pdf"/>
	<rdf:li rdf:resource="http://www.bailis.org/blog/non-blocking-transactional-atomicity/"/>
	<rdf:li rdf:resource="http://attentionshard.wordpress.com/2013/09/30/why-tellybug-moved-from-cassandra-to-amazon-dynamodb/"/>
	<rdf:li rdf:resource="http://www.loggly.com/behind-the-screens/"/>
	<rdf:li rdf:resource="http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p734-akidau.pdf"/>
	<rdf:li rdf:resource="https://blog.twitter.com/2013/new-tweets-per-second-record-and-how"/>
	<rdf:li rdf:resource="http://arstechnica.com/information-technology/2013/08/building-a-panopticon-the-evolution-of-the-nsas-xkeyscore/"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2013/7/8/the-architecture-twitter-uses-to-deal-with-150m-active-users.html"/>
	<rdf:li rdf:resource="https://www.facebook.com/notes/facebook-engineering/wormhole-pubsub-system-moving-data-through-space-and-time/10151504075843920"/>
	<rdf:li rdf:resource="http://www.slideshare.net/r39132/q-con-ny2013modernwebsitescalabilityfinal-22989785"/>
	<rdf:li rdf:resource="https://groups.google.com/forum/#!topic/mechanical-sympathy/ao44gonVdAY"/>
	<rdf:li rdf:resource="http://blog.cloudera.com/blog/2010/04/cap-confusion-problems-with-partition-tolerance/"/>
	<rdf:li rdf:resource="https://news.ycombinator.com/item?id=5653266"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2011/11/29/datasift-architecture-realtime-datamining-at-120000-tweets-p.html"/>
	<rdf:li rdf:resource="https://speakerdeck.com/mza/latencys-worst-nightmare-performance-tuning-tips-and-tricks"/>
	<rdf:li rdf:resource="http://highscalability.com/blog/2013/4/15/scaling-pinterest-from-0-to-10s-of-billions-of-page-views-a.html"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://wthhyb.sacha.house/">
    <title>What the hell have you built.</title>
    <dc:date>2025-11-06T10:38:19+00:00</dc:date>
    <link>https://wthhyb.sacha.house/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[▪ Did you just pick things at random?
▪ Why is Redis talking to MongoDB?
▪ Why do you even use MongoDB?

A single-use-site update for the classic, now-12-year-old architecture shitpost]]></description>
<dc:subject>shitposting funny architecture riak redis mongodb ouch scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:0420d47b34a3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:shitposting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:funny"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:riak"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mongodb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ouch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.infoq.com/presentations/optimizing-java-app-kubernetes/">
    <title>Optimizing Java Apps on Kubernetes</title>
    <dc:date>2025-01-23T14:45:35+00:00</dc:date>
    <link>https://www.infoq.com/presentations/optimizing-java-app-kubernetes/</link>
    <dc:creator>jm</dc:creator><description><![CDATA["Optimizing Java Applications on Kubernetes: beyond the Basics": Bruno Borges, at the InfoQ Dev Summit Boston, discusses the strategies for enhancing Java application performance on Kubernetes, focusing on leveraging JVM ergonomics, and managing garbage collection processes.  Some interesting tips here.]]></description>
<dc:subject>kubernetes java eks resources ops scaling scalability gc optimization jvm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2c9522ab8d41/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kubernetes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:eks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:resources"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jvm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.citusdata.com/blog/2022/09/12/distributed-postgres-goes-full-open-source-with-citus/">
    <title>Distributed Postgres goes full open source with Citus</title>
    <dc:date>2022-09-14T11:42:00+00:00</dc:date>
    <link>https://www.citusdata.com/blog/2022/09/12/distributed-postgres-goes-full-open-source-with-citus/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Postgres distributed scaler software now fully OSS]]></description>
<dc:subject>postgres citus oss scalability infrastructure</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:1f345ede6c20/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:postgres"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:citus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:oss"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:infrastructure"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.alexdebrie.com/posts/dynamodb-paper/">
    <title>Key Takeaways from the DynamoDB Paper</title>
    <dc:date>2022-08-08T10:21:17+00:00</dc:date>
    <link>https://www.alexdebrie.com/posts/dynamodb-paper/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Alex DeBrie's commentary on the "10 years of DynamoDB" paper published recently by AWS.  Together with Marc Brooker's commentary (at https://brooker.co.za/blog/2022/07/12/dynamodb.html), this is a good review.]]></description>
<dc:subject>scalability scaling dynamodb aws storage services architecture</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:3f183f470778/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dynamodb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:services"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/apache/helix">
    <title>Apache Helix</title>
    <dc:date>2021-07-30T09:37:21+00:00</dc:date>
    <link>https://github.com/apache/helix</link>
    <dc:creator>jm</dc:creator><description><![CDATA[@KishoreBytes notes: "Helix [is] not well known but widely used at LinkedIn, Airbnb, Pinterest, Uber, Yahoo to build distributed systems. Helix is probably managing hundreds of thousands of servers today!"

It is "a generic cluster management framework used for automatic management of partitioned, replicated and distributed resources hosted on a cluster of nodes, [providing] the following features:

Automatic assignment of resource/partition to nodes;

Node failure detection and recovery;

Dynamic addition of Resources;

Dynamic addition of nodes to the cluster;

Pluggable distributed state machine to manage the state of a resource via state transitions;

Automatic load balancing and throttling of transitions"

Sounds handy for automatic shard-based scaling. Built on Zookeeper.]]></description>
<dc:subject>zookeeper helix sharding scalability scaling via:kishorebytes partitioning architecture</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:558ffe51f061/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:zookeeper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:helix"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sharding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:kishorebytes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:partitioning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://talawah.io/blog/extreme-http-performance-tuning-one-point-two-million/">
    <title>Extreme HTTP Performance Tuning: 1.2M API req/s on a 4 vCPU EC2 Instance | talawah.io</title>
    <dc:date>2021-05-21T08:54:01+00:00</dc:date>
    <link>https://talawah.io/blog/extreme-http-performance-tuning-one-point-two-million/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is very cool. Updating the old "C10K" problem space to C1.2M -- the current state of Linux userspace networking -- using libreactor and a whole load of up-to-date tweaks.

Interesting to note that this scale is feasible to run in Docker (using --network=host, of course).
]]></description>
<dc:subject>http servers c10k linux performance scalability ops tuning libreactor networking tcp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:022774113a4b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:http"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:servers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:c10k"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:linux"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tuning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:libreactor"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:networking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tcp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://eng.lyft.com/operating-apache-kafka-clusters-24-7-without-a-global-ops-team-417813a5ce70">
    <title>Operating Apache Kafka Clusters 24/7 Without A Global Ops Team</title>
    <dc:date>2019-10-02T10:00:01+00:00</dc:date>
    <link>https://eng.lyft.com/operating-apache-kafka-clusters-24-7-without-a-global-ops-team-417813a5ce70</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Lyft built an autoremediation system and apparently it works :)   Good to get a detailed writeup on such an elusive beast]]></description>
<dc:subject>autoremediation failures ops kafka scalability automation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:457b3e9b4a93/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:autoremediation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:failures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:automation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://perfdynamics.blogspot.com/2007/11/modeling-mythical-man-month.html">
    <title>Modeling the Mythical Man-Month using the Universal Scalability Law</title>
    <dc:date>2019-07-16T09:45:03+00:00</dc:date>
    <link>http://perfdynamics.blogspot.com/2007/11/modeling-mythical-man-month.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[turns out the USL can apply]]></description>
<dc:subject>usl scalability scaling brooks teams mythical-man-month estimation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:45d2bc17ca47/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:usl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:brooks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:teams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mythical-man-month"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:estimation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://apenwarr.ca/log/20190216">
    <title>The log/event processing pipeline you can't have - apenwarr</title>
    <dc:date>2019-02-18T00:19:49+00:00</dc:date>
    <link>https://apenwarr.ca/log/20190216</link>
    <dc:creator>jm</dc:creator><description><![CDATA[So good. Apenwarr knows how to design a system.

<blockquote>Simple things don't break. Our friends on the "let's use structured events to make metrics" team streamed those events straight into a database, and it broke all the time, because databases have configuration options and you inevitably set those options wrong, and it'll fall over under heavy load, and you won't find out until you're right in the middle of an emergency and you really want to see those logs. Or events.</blockquote>

]]></description>
<dc:subject>logging scalability klog kernel log-processing events embedded ops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:dbdd8c40549f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:logging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:klog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kernel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:log-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:events"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:embedded"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.dropbox.com/s/47xbjrkni9bx0g3/aurora2.pdf?dl=0">
    <title>_Amazon Aurora: On Avoiding Distributed Consensus for I/Os, Commits, and Membership Changes_, SIGMOD '18</title>
    <dc:date>2019-01-16T10:08:18+00:00</dc:date>
    <link>https://www.dropbox.com/s/47xbjrkni9bx0g3/aurora2.pdf?dl=0</link>
    <dc:creator>jm</dc:creator><description><![CDATA[

<blockquote>One of the more novel differences between Aurora and other relational databases is how it pushes redo processing to a multi-tenant scale-out storage service, purpose-built for Aurora. Doing so reduces networking traffic, avoids checkpoints and crash recovery, enables failovers to replicas without loss of data, and enables fault-tolerant storage that heals without database involvement. Traditional implementations that leverage distributed storage would use distributed consensus algorithms for commits, reads, replication, and membership changes and amplify cost of underlying storage. In this paper, we describe how Aurora avoids distributed consensus under most circumstances by establishing invariants and leveraging local transient state. Doing so improves performance, reduces variability, and lowers costs.</blockquote>

]]></description>
<dc:subject>papers toread aurora amazon aws pdf scalability distcomp state sql mysql postgresql distributed-consensus</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:db64811fffaf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:toread"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aurora"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:amazon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:state"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mysql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:postgresql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed-consensus"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/awslabs/amazon-kinesis-scaling-utils">
    <title>awslabs/amazon-kinesis-scaling-utils</title>
    <dc:date>2018-12-18T12:16:24+00:00</dc:date>
    <link>https://github.com/awslabs/amazon-kinesis-scaling-utils</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>The Kinesis Scaling Utility is designed to give you the ability to scale Amazon Kinesis Streams in the same way that you scale EC2 Auto Scaling groups – up or down by a count or as a percentage of the total fleet. You can also simply scale to an exact number of Shards. There is no requirement for you to manage the allocation of the keyspace to Shards when using this API, as it is done automatically.

You can also deploy the Web Archive to a Java Application Server, and allow Scaling Utils to automatically manage the number of Shards in the Stream based on the observed PUT or GET rate of the stream.</blockquote>

]]></description>
<dc:subject>kinesis scaling scalability shards sharding ops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f4fd1b252af9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kinesis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:shards"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sharding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://aws.amazon.com/blogs/compute/running-high-scale-web-on-spot-instances/">
    <title>Running high-scale web applications on Amazon EC2 Spot Instances</title>
    <dc:date>2018-10-03T16:09:14+00:00</dc:date>
    <link>https://aws.amazon.com/blogs/compute/running-high-scale-web-on-spot-instances/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[AppNext's setup looks like quite good practice for a CPU-bound fleet]]></description>
<dc:subject>appnext spot-instances ec2 scalability aws ops architecture</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:e099fd008834/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:appnext"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spot-instances"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ec2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://medium.com/netflix-techblog/netflix-edge-load-balancing-695308b5548c">
    <title>Rethinking Netflix’s Edge Load Balancing – Netflix TechBlog</title>
    <dc:date>2018-10-02T11:43:13+00:00</dc:date>
    <link>https://medium.com/netflix-techblog/netflix-edge-load-balancing-695308b5548c</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Using Server-Reported Utilization data to improve JSQ load balancing
]]></description>
<dc:subject>netflix scaling scalability jsq load-balancing load-balancers algorithms distributed-systems architecture ops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5e6299bd9bb6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:netflix"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jsq"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://hackernoon.com/the-problems-with-dynamodb-auto-scaling-and-how-it-might-be-improved-a92029c8c10b">
    <title>The problems with DynamoDB Auto Scaling and how it might be improved</title>
    <dc:date>2018-07-12T11:56:27+00:00</dc:date>
    <link>https://hackernoon.com/the-problems-with-dynamodb-auto-scaling-and-how-it-might-be-improved-a92029c8c10b</link>
    <dc:creator>jm</dc:creator><description><![CDATA['Based on these observations, we hypothesize that you can make two modifications to the system to improve its effectiveness:

trigger scaling up after 1 threshold breach instead of 5, which is in-line with the mantra of “scale up early, scale down slowly”;
trigger scaling activity based on actual request count instead of consumed capacity units, and calculate the new provisioned capacity units using actual request count as well.

As part of this experiment, we also prototyped these changes (by hijacking the CloudWatch alarms) to demonstrate their improvement.']]></description>
<dc:subject>dynamodb autoscaling ops scalability aws scaling capacity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:6d475063c650/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dynamodb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:autoscaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:capacity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://groups.google.com/forum/#!msg/mechanical-sympathy/gchG_oQ_kQM/59BDMOdUAwAJ">
    <title>Locking, Little's Law, and the USL</title>
    <dc:date>2017-09-20T14:36:55+00:00</dc:date>
    <link>https://groups.google.com/forum/#!msg/mechanical-sympathy/gchG_oQ_kQM/59BDMOdUAwAJ</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Excellent explanatory mailing list post by Martin Thompson to the mechanical-sympathy group, discussing Little's Law vs the USL:

<blockquote>Little's law can be used to describe a system in steady state from a queuing perspective, i.e. arrival and leaving rates are balanced. In this case it is a crude way of modelling a system with a contention percentage of 100% under Amdahl's law, in that throughput is one over latency.

However this is an inaccurate way to model a system with locks. Amdahl's law does not account for coherence costs. For example, if you wrote a microbenchmark with a single thread to measure the lock cost then it is much lower than in a multi-threaded environment where cache coherence, other OS costs such as scheduling, and lock implementations need to be considered.

Universal Scalability Law (USL) accounts for both the contention and the coherence costs.
http://www.perfdynamics.com/Manifesto/USLscalability.html

When modelling locks it is necessary to consider how contention and coherence costs vary given how they can be implemented. Consider in Java how we have biased locking, thin locks, fat locks, inflation, and revoking biases which can cause safe points that bring all threads in the JVM to a stop with a significant coherence component.</blockquote>

]]></description>
<dc:subject>usl scaling scalability performance locking locks java jvm amdahls-law littles-law system-dynamics modelling systems caching threads schedulers contention</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:d64fb1279a0b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:usl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:locking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:locks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jvm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:amdahls-law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:littles-law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:system-dynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:modelling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:caching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:threads"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:schedulers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:contention"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://codahale.com/usl4j-and-you/">
    <title>usl4j And You | codahale.com</title>
    <dc:date>2017-06-01T10:08:29+00:00</dc:date>
    <link>https://codahale.com/usl4j-and-you/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Coda Hale wrote a handy java library implementing a USL solver]]></description>
<dc:subject>usl scalability java performance optimization benchmarking measurement ops coda-hale</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c184c035e80a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:usl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:benchmarking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coda-hale"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://engineering.ticketea.com/scaling-amazon-aurora-at-ticketea/?__s=gf36pf8g1gjugcqh6ppo">
    <title>Scaling Amazon Aurora at ticketea</title>
    <dc:date>2017-05-29T16:20:46+00:00</dc:date>
    <link>https://engineering.ticketea.com/scaling-amazon-aurora-at-ticketea/?__s=gf36pf8g1gjugcqh6ppo</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Ticketing is a business in which extreme traffic spikes are the norm, rather than the exception. For Ticketea, this means that our traffic can increase by a factor of 60x in a matter of seconds. This usually happens when big events (which have a fixed, pre-announced 'sale start time') go on sale.</blockquote>

]]></description>
<dc:subject>scaling scalability ops aws aurora autoscaling asg</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:78ee8d992f0b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aurora"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:autoscaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:asg"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://tech.trivago.com/2017/01/25/learn-redis-the-hard-way-in-production/">
    <title>Learn redis the hard way (in production) · trivago techblog</title>
    <dc:date>2017-03-30T10:01:30+00:00</dc:date>
    <link>http://tech.trivago.com/2017/01/25/learn-redis-the-hard-way-in-production/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[oh god this is pretty awful.  this just reads like "don't try to use Redis at scale" to me]]></description>
<dc:subject>redis scalability ops architecture horror trivago php</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:86839e5457c2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:horror"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:trivago"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:php"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://eng.uber.com/cherami/">
    <title>Cherami: Uber Engineering’s Durable and Scalable Task Queue in Go - Uber Engineering Blog</title>
    <dc:date>2016-12-14T11:21:39+00:00</dc:date>
    <link>https://eng.uber.com/cherami/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>
a competing-consumer messaging queue that is durable, fault-tolerant, highly available and scalable. We achieve durability and fault-tolerance by replicating messages across storage hosts, and high availability by leveraging the append-only property of messaging queues and choosing eventual consistency as our basic model. Cherami is also scalable, as the design does not have single bottleneck. [...]
Cherami is completely written in Go, a language that makes building highly performant and concurrent system software a lot of fun. Additionally, Cherami uses several libraries that Uber has already open sourced: TChannel for RPC and Ringpop for health checking and group membership. Cherami depends on several third-party open source technologies: Cassandra for metadata storage, RocksDB for message storage, and many other third-party Go packages that are available on GitHub. We plan to open source Cherami in the near future.
</blockquote>]]></description>
<dc:subject>cherami uber queueing tasks queues architecture scalability go cassandra rocksdb</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:08be41dbc892/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cherami"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uber"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:queueing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tasks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:queues"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:go"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cassandra"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rocksdb"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://engineering.pinterest.com/blog/auto-scaling-pinterest">
    <title>Auto scaling Pinterest</title>
    <dc:date>2016-12-02T17:44:44+00:00</dc:date>
    <link>https://engineering.pinterest.com/blog/auto-scaling-pinterest</link>
    <dc:creator>jm</dc:creator><description><![CDATA[notes on a second-system take on autoscaling -- Pinterest tried it once, it didn't take, and this is the rerun.  I like the tandem ASG approach (spots and nonspots)]]></description>
<dc:subject>spot-instances scaling aws scalability ops architecture pinterest via:highscalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f049baec271a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spot-instances"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pinterest"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:highscalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://twitter.com/frontstack/status/800889593855737856">
    <title>&quot;Solving Imaginary Scaling Issues At Scale — Getting the wrong idea from that conference talk you attended&quot;</title>
    <dc:date>2016-11-22T21:28:57+00:00</dc:date>
    <link>https://twitter.com/frontstack/status/800889593855737856</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Amazing virtuoso performance:

<blockquote>
Chapter 1: Databases with cool-sounding names.
Chapter 2: using BitTorrent for everything.
Chapter 3: forget Torrents. Use the blockchain for everything.
Chapter 4: sharding the database before adding any indexes.
Chapter 5: upgrading to faster processors without checking if you're limited by disk I/O.
Chapter 6: rewriting APIs in C for speed without compressing data on the wire.
Chapter 7: putting large blobs of binary data into SQL databases for fun and profit.
Chapter 8: using protobufs to poll 300 times per second.
Chapter 9: diagnose scaling issues by grepping 10 lines of code and guessing.
Chapter 10: putting Varnish in front of everything just in case.
Chapter 11: buying boxes with gigantic amounts of RAM.
Chapter 12: realizing your HAProxy box is still a micro instance.
Chapter 13: rewriting 3 of 10 features in Go and declaring victory.
Chapter 14: split everything into 35 microservices all maintained by 1 person.
Chapter 15: 300% performance boosts by deleting data validity checks.
Chapter 16: minifying the JS of your O(n^3) to-do list.
Chapter 17: Fuck It, Let's Try Erlang.
Chapter 18: Blaming Everything On The Last Person To Quit.
Chapter 19: A Bloom Filter Will Definitely Fix This.
Chapter 20: Move all client-side processing to the server and/or vice-versa.
Chapter 21: Putting A Node.js Proxy In Front Of Our COBOL Backend Will Definitely Improve Matters.
Chapter 22: A Type-Checked Transpilation Step Will Surely Speed Things Up.
Chapter 23: Writing A New Language Almost The Same As Your Old Language But Faster (guest chapter by Facebook).
Chapter 24: Replacing an SQL DB with a NoSQL DB then implementing SQL in your ORM.
Chapter 25: Migrating From Bare Metal To The Cloud Or Vice-Versa, Whichever You're Not Currently Doing.
Chapter 26: Putting everything behind a CDN except the slow, complicated parts.
Chapter 27: Applying distributed map-reduce to less than 1 gigabyte of data.
Chapter 28: Running exactly the same software, but in Docker.
Chapter 29: Machine learning: how it will magically fix your crappy code.
Chapter 30: Blaming your package manager for slow run-time performance.
Chapter 31: Moving processing from the CPU to the GPU without changing the algorithm.
Chapter 32: Switching To Heroku Or Away From Heroku Or A Hybrid Heroku-AWS model, whichever sounds the most fun.
Chapter 33: Loading all your dependencies from somebody else's github repo.
Chapter 34: optimizing your PNGs while hosting 300MB video ads.
Chapter 35: hosting your database in memory and your images on S3.
</blockquote>]]></description>
<dc:subject>scalability funny lol twitter oreilly</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:d6174c4d603d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:funny"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lol"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:oreilly"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://stripe.com/blog/service-discovery-at-stripe">
    <title>Service discovery at Stripe</title>
    <dc:date>2016-11-01T12:20:50+00:00</dc:date>
    <link>https://stripe.com/blog/service-discovery-at-stripe</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Writeup of their Consul-based service discovery system, a bit similar to smartstack.  Good description of the production problems that they saw with Consul too, and also they figured out that strong consistency isn't actually what you want in a service discovery system ;)

HN comments are good too: https://news.ycombinator.com/item?id=12840803]]></description>
<dc:subject>consul api microservices service-discovery dns load-balancing l7 tcp distcomp smartstack stripe cap-theorem scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c7313c149028/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:consul"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:api"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:microservices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:service-discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dns"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:l7"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tcp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:smartstack"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stripe"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cap-theorem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://aseigneurin.github.io/2016/10/07/kafka-streams-scaling-up-or-down.html">
    <title>Kafka Streams - Scaling up or down</title>
    <dc:date>2016-10-13T10:58:32+00:00</dc:date>
    <link>http://aseigneurin.github.io/2016/10/07/kafka-streams-scaling-up-or-down.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[this is a nice zero-config scaling story -- good work Kafka Streams]]></description>
<dc:subject>scaling scalability architecture kafka streams ops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:dc796f3ac598/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:streams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.perfdynamics.com/Manifesto/USLscalability.html">
    <title>How to Quantify Scalability</title>
    <dc:date>2016-09-26T10:00:52+00:00</dc:date>
    <link>http://www.perfdynamics.com/Manifesto/USLscalability.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[good page on the Universal Scalability Law and how to apply it]]></description>
<dc:subject>usl performance scalability concurrency capacity measurement excel equations metrics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ab7e86dd0bb0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:usl"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:concurrency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:capacity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:excel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:equations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/ifesdjeen/hashed-wheel-timer">
    <title>Hashed Wheel Timer</title>
    <dc:date>2016-03-29T12:02:43+00:00</dc:date>
    <link>https://github.com/ifesdjeen/hashed-wheel-timer</link>
    <dc:creator>jm</dc:creator><description><![CDATA[nice java impl of this efficient data structure, broken out from Project Reactor
]]></description>
<dc:subject>scalability java timers hashed-wheel-timers algorithms data-structures</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:6139ec69af2a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:timers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hashed-wheel-timers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-structures"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.informatica.com/downloads/1568_high_perf_messaging_wp/Topics-in-High-Performance-Messaging.htm">
    <title>Topics in High-Performance Messaging</title>
    <dc:date>2015-12-02T15:53:00+00:00</dc:date>
    <link>https://www.informatica.com/downloads/1568_high_perf_messaging_wp/Topics-in-High-Performance-Messaging.htm</link>
    <dc:creator>jm</dc:creator><description><![CDATA['We have worked together in the field of high-performance messaging for many years, and in that time, have seen some messaging systems that worked well and some that didn't. Successful deployment of a messaging system requires background information that is not easily available; most of what we know, we had to learn in the school of hard knocks. To save others a knock or two, we have collected here the essential background information and commentary on some of the issues involved in successful deployments. This information is organized as a series of topics around which there seems to be confusion or uncertainty. Please contact us if you have questions or comments.']]></description>
<dc:subject>messaging scalability scaling performance udp tcp protocols multicast latency</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:aef1848d9376/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:messaging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:udp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tcp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:protocols"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:multicast"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://news.ycombinator.com/item?id=10608356">
    <title>John Nagle on delayed ACKs and his algorithm</title>
    <dc:date>2015-11-22T22:18:18+00:00</dc:date>
    <link>https://news.ycombinator.com/item?id=10608356</link>
    <dc:creator>jm</dc:creator><description><![CDATA[love it when things like this show up]]></description>
<dc:subject>networking performance scalability nagle tcp ip</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:fe312c2af198/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:networking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nagle"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tcp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ip"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.librato.com/posts/superchief">
    <title>SuperChief: From Apache Storm to In-House Distributed Stream Processing</title>
    <dc:date>2015-10-12T09:29:22+00:00</dc:date>
    <link>http://blog.librato.com/posts/superchief</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Another sorry tale of Storm issues:

<blockquote>Storm has been successful at Librato, but we experienced many of the limitations cited in the Twitter Heron: Stream Processing at Scale paper and outlined here by Adrian Colyer, including:
Inability to isolate, reason about, or debug performance issues due to the worker/executor/task paradigm. This led to building and configuring clusters specifically designed to attempt to mitigate these problems (i.e., separate clusters per topology, only running a worker per server.), which added additional complexity to development and operations and also led to over-provisioning.
Ability of tasks to move around led to difficult to trace performance problems.
Storm’s work provisioning logic led to some tasks serving more Kafka partitions than others. This in turn created latency and performance issues that were difficult to reason about. The initial solution was to over-provision in an attempt to get a better hashing/balancing of work, but eventually we just replaced the work allocation logic.
Due to Storm’s architecture, it was very difficult to get a stack trace or heap dump because the processes that managed workers (Storm supervisor) would often forcefully kill a Java process while it was being investigated in this way.
The propensity for unexpected and subsequently unhandled exceptions to take down an entire worker led to additional defensive verbose error handling everywhere.
This nasty bug STORM-404 coupled with the aforementioned fact that a single exception can take down a worker led to several cascading failures in production, taking down entire topologies until we upgraded to 0.9.4.
Additionally, we found the performance we were getting from Storm for the amount of money we were spending on infrastructure was not in line with our expectations. Much of this is due to the fact that, depending upon how your topology is designed, a single tuple may make multiple hops across JVMs, and this is very expensive. For example, in our time series aggregation topologies a single tuple may be serialized/deserialized and shipped across the wire 3-4 times as it progresses through the processing pipeline.</blockquote>

]]></description>
<dc:subject>scalability storm kafka librato architecture heron ops</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b175a6749098/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:librato"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:heron"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2015/9/21/uber-goes-unconventional-using-driver-phones-as-a-backup-dat.html">
    <title>Uber Goes Unconventional: Using Driver Phones as a Backup Datacenter - High Scalability</title>
    <dc:date>2015-09-23T21:54:42+00:00</dc:date>
    <link>http://highscalability.com/blog/2015/9/21/uber-goes-unconventional-using-driver-phones-as-a-backup-dat.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Initially I thought they were just tracking client state on the phone, but it actually sounds like they're replicating other users' state, too.  Mad stuff!  Must cost a fortune in additional data transfer costs...]]></description>
<dc:subject>scalability failover multi-dc uber replication state crdts</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:400d153ebfed/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:failover"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:multi-dc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:uber"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:replication"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:state"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:crdts"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://msol.io/blog/tech/2015/09/05/youre-probably-wrong-about-caching/">
    <title>You're probably wrong about caching</title>
    <dc:date>2015-09-07T10:49:36+00:00</dc:date>
    <link>https://msol.io/blog/tech/2015/09/05/youre-probably-wrong-about-caching/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Excellent cut-out-and-keep guide to why you should add a caching layer.  I've been following this practice for the past few years, after I realised that #6 (recovering from a failed cache is hard) is a killer -- I've seen a few large-scale outages where a production system had gained enough scale that it required a cache to operate, and once that cache was damaged, bringing the system back online required a painful rewarming protocol.  Better to design for the non-cached case if possible.]]></description>
<dc:subject>architecture caching coding design caches ops production scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:580ce012d9e2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:caching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:caches"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:production"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://docs.google.com/presentation/d/1OvJStE8aohGeI3y5BcYX8bBHwoHYCPu99A3KTTZElr0/edit#slide=id.gb74341dde_1_31">
    <title>What does it take to make Google work at scale? [slides]</title>
    <dc:date>2015-08-31T11:29:51+00:00</dc:date>
    <link>https://docs.google.com/presentation/d/1OvJStE8aohGeI3y5BcYX8bBHwoHYCPu99A3KTTZElr0/edit#slide=id.gb74341dde_1_31</link>
    <dc:creator>jm</dc:creator><description><![CDATA[50-slide summary of Google's stack, compared vs Facebook, Yahoo!, and open-source-land, with the odd interesting architectural insight]]></description>
<dc:subject>google architecture slides scalability bigtable spanner facebook gfs storage</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:ee623f8402ef/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:slides"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bigtable"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spanner"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gfs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.youtube.com/watch?v=MKgJeqF1DHw">
    <title>Patrick Shuff - Building A Billion User Load Balancer - SCALE 13x - YouTube</title>
    <dc:date>2015-06-22T09:50:27+00:00</dc:date>
    <link>https://www.youtube.com/watch?v=MKgJeqF1DHw</link>
    <dc:creator>jm</dc:creator><description><![CDATA['Want to learn how Facebook scales their load balancing infrastructure to support more than 1.3 billion users? We will be revealing the technologies and methods we use to route and balance Facebook's traffic. The Traffic team at Facebook has built several systems for managing and balancing our site traffic, including both a DNS load balancer and a software load balancer capable of handling several protocols. This talk will focus on these technologies and how they have helped improve user performance, manage capacity, and increase reliability.'

Can't find the standalone slides, unfortunately.]]></description>
<dc:subject>facebook video talks lbs load-balancing http https scalability scale linux</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2ad80cce86ff/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:video"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:talks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lbs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:http"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:https"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scale"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:linux"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.acolyer.org/2015/06/19/discretized-streams-fault-tolerant-stream-computing-at-scale/">
    <title>Discretized Streams: Fault Tolerant Stream Computing at Scale</title>
    <dc:date>2015-06-19T07:47:04+00:00</dc:date>
    <link>http://blog.acolyer.org/2015/06/19/discretized-streams-fault-tolerant-stream-computing-at-scale/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[The paper describing the innards of Spark Streaming and its RDD-based recomputation algorithm:

<blockquote>we use a data structure called Resilient Distributed Datasets (RDDs), which keeps data in memory and can recover it without replication by tracking the lineage graph of operations that were used to build it. With RDDs, we show that we can attain sub-second end-to-end latencies. We believe that this is sufficient for many real-world big data applications, where the timescale of the events tracked (e.g., trends in social media) is much higher.</blockquote>

]]></description>
<dc:subject>rdd spark streaming fault-tolerance batch distcomp papers big-data scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:561d8372a2de/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rdd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:streaming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fault-tolerance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:batch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2015/6/8/leveraging-aws-to-build-a-scalable-data-pipeline.html">
    <title>Leveraging AWS to Build a Scalable Data Pipeline</title>
    <dc:date>2015-06-14T21:22:02+00:00</dc:date>
    <link>http://highscalability.com/blog/2015/6/8/leveraging-aws-to-build-a-scalable-data-pipeline.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Nice detailed description of an auto-scaled SQS worker pool]]></description>
<dc:subject>sqs aws ec2 auto-scaling asg worker-pools architecture scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:83cb65158dca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sqs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ec2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:auto-scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:asg"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:worker-pools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.benstopford.com/2015/04/28/elements-of-scale-composing-and-scaling-data-platforms/">
    <title>Elements of Scale: Composing and Scaling Data Platforms</title>
    <dc:date>2015-05-25T15:58:46+00:00</dc:date>
    <link>http://www.benstopford.com/2015/04/28/elements-of-scale-composing-and-scaling-data-platforms/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Great, encyclopedic blog post rounding up common architectural and algorithmic patterns using in scalable data platforms.  Cut out and keep!]]></description>
<dc:subject>architecture storage databases data big-data scaling scalability ben-stopford cqrs druid parquet columnar-stores lambda-architecture</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:50954c7dd941/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ben-stopford"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cqrs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:druid"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:parquet"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:columnar-stores"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lambda-architecture"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.developer-tech.com/news/2014/jun/10/why-loggly-loves-apache-kafka-how-unbreakable-infinitely-scalable-messaging-makes-log-management-better/">
    <title>Why Loggly loves Apache Kafka</title>
    <dc:date>2015-05-06T11:19:20+00:00</dc:date>
    <link>http://www.developer-tech.com/news/2014/jun/10/why-loggly-loves-apache-kafka-how-unbreakable-infinitely-scalable-messaging-makes-log-management-better/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Some good factoids about Loggly's Kafka usage and scales]]></description>
<dc:subject>scalability logging loggly kafka queueing ops reliabilty</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:61df5b7109a3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:logging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:loggly"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:queueing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reliabilty"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ferd.ca/lessons-learned-while-working-on-large-scale-server-software.html">
    <title>ferd.ca -&gt; Lessons Learned while Working on Large-Scale Server Software</title>
    <dc:date>2015-04-22T15:26:07+00:00</dc:date>
    <link>http://ferd.ca/lessons-learned-while-working-on-large-scale-server-software.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Good advice]]></description>
<dc:subject>distributed scalability systems coding server-side erlang devops networking reliability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:4b4817db08ed/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:server-side"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:erlang"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:devops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:networking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reliability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2015/3/30/how-we-scale-vividcortexs-backend-systems.html">
    <title>How We Scale VividCortex's Backend Systems - High Scalability</title>
    <dc:date>2015-03-30T16:55:14+00:00</dc:date>
    <link>http://highscalability.com/blog/2015/3/30/how-we-scale-vividcortexs-backend-systems.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Excellent post from Baron Schwartz about their large-scale, 1-second-granularity time series database storage system]]></description>
<dc:subject>time-series tsd storage mysql sql baron-schwartz ops performance scalability scaling go</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:fe014fc1ee1b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:time-series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tsd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mysql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:baron-schwartz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:go"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/mmcgrana/services-engineering">
    <title>Services Engineering Reading List</title>
    <dc:date>2015-03-03T10:37:29+00:00</dc:date>
    <link>https://github.com/mmcgrana/services-engineering</link>
    <dc:creator>jm</dc:creator><description><![CDATA[good list of papers/articles for fans of scalability etc.]]></description>
<dc:subject>architecture papers reading reliability scalability articles to-read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:2db5b491b523/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reading"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reliability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:articles"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:to-read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.frankmcsherry.org/graph/scalability/cost/2015/01/15/COST.html">
    <title>Are you better off running your big-data batch system off your laptop?</title>
    <dc:date>2015-01-17T21:33:33+00:00</dc:date>
    <link>http://www.frankmcsherry.org/graph/scalability/cost/2015/01/15/COST.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Heh, nice trolling.<blockquote>Here are two helpful guidelines (for largely disjoint populations):

If you are going to use a big data system for yourself, see if it is faster than your laptop.
If you are going to build a big data system for others, see that it is faster than my laptop. [...]

We think everyone should have to do this, because it leads to better systems and better research.</blockquote>

]]></description>
<dc:subject>graph coding hadoop spark giraph graph-processing hardware scalability big-data batch algorithms pagerank</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:229db78fb862/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graph"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:coding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hadoop"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:giraph"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graph-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hardware"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:batch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pagerank"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.awsarchitectureblog.com/2014/06/constant-work.html">
    <title>Doing Constant Work to Avoid Failures</title>
    <dc:date>2014-11-07T15:09:13+00:00</dc:date>
    <link>http://www.awsarchitectureblog.com/2014/06/constant-work.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[A good example of a design pattern -- by performing a relatively constant amount of work regardless of the input, we can predict scalability and reduce the risk of overload when something unexpected changes in that input]]></description>
<dc:subject>scalability scaling architecture aws route53 via:brianscanlan overload constant-load loading</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b94dca788ad3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:route53"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:brianscanlan"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:overload"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:constant-load"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:loading"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/graphite-project/carbon/issues/235">
    <title>Carbon vs Megacarbon and Roadmap ? · Issue #235 · graphite-project/carbon</title>
    <dc:date>2014-10-29T11:59:07+00:00</dc:date>
    <link>https://github.com/graphite-project/carbon/issues/235</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Carbon is a great idea, but fundamentally, twisted doesn't do what carbon-relay or carbon-aggregator were built to do when hit with sustained and heavy throughput. Much to my chagrin, concurrency isn't one of python's core competencies.</blockquote>

+1, sadly.  We are patching around the edges with half-released third-party C rewrites in our graphite setup, as we exceed the scale Carbon can support.]]></description>
<dc:subject>carbon graphite metrics ops python twisted scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c81f1dde8791/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:carbon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graphite"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twisted"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://artsy.github.io/blog/2012/07/10/on-demand-jenkins-slaves-with-amazon-ec2/">
    <title>On-Demand Jenkins Slaves With Amazon EC2</title>
    <dc:date>2014-08-29T23:01:15+00:00</dc:date>
    <link>http://artsy.github.io/blog/2012/07/10/on-demand-jenkins-slaves-with-amazon-ec2/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is very likely where we'll be going for our acceptance tests in Swrve]]></description>
<dc:subject>testing jenkins ec2 spot-instances scalability auto-scaling ops build</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:49ce775cc829/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jenkins"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ec2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spot-instances"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:auto-scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:build"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://aws.amazon.com/blogs/aws/auto-scale-dynamodb-with-dynamic-dynamodb/">
    <title>Auto Scale DynamoDB With Dynamic DynamoDB</title>
    <dc:date>2014-07-22T13:20:58+00:00</dc:date>
    <link>http://aws.amazon.com/blogs/aws/auto-scale-dynamodb-with-dynamic-dynamodb/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Nicely-packaged auto-scaler for DynamoDB]]></description>
<dc:subject>dynamodb autoscaling scalability provisioning aws ec2 cloudformation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c15cbca57e7c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dynamodb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:autoscaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:provisioning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ec2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cloudformation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://googleresearch.blogspot.ie/2014/06/influential-papers-for-2013.html">
    <title>Google's Influential Papers for 2013</title>
    <dc:date>2014-07-09T16:40:48+00:00</dc:date>
    <link>http://googleresearch.blogspot.ie/2014/06/influential-papers-for-2013.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Googlers across the company actively engage with the scientific community by publishing technical papers, contributing open-source packages, working on standards, introducing new APIs and tools, giving talks and presentations, participating in ongoing technical debates, and much more. Our publications offer technical and algorithmic advances, feature aspects we learn as we develop novel products and services, and shed light on some of the technical challenges we face at Google. Below are some of the especially influential papers co-authored by Googlers in 2013.</blockquote>

]]></description>
<dc:subject>google papers toread reading 2013 scalability machine-learning algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c2e0e542b7ca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:toread"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reading"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:2013"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.datacenterknowledge.com/archives/2014/06/25/google-dumps-mapreduce-favor-new-hyper-scale-analytics-system/">
    <title>Google Replaces MapReduce With New Hyper-Scale Cloud Analytics System</title>
    <dc:date>2014-06-26T12:42:20+00:00</dc:date>
    <link>http://www.datacenterknowledge.com/archives/2014/06/25/google-dumps-mapreduce-favor-new-hyper-scale-analytics-system/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[MR no more:

<blockquote>“We don’t really use MapReduce anymore,” [Urs] Hölzle said in his keynote presentation at the Google I/O conference in San Francisco Wednesday. The company stopped using the system “years ago.”

Cloud Dataflow, which Google will also offer as a service for developers using its cloud platform, does not have the scaling restrictions of MapReduce. “Cloud Dataflow is the result of over a decade of experience in analytics,” Hölzle said. “It will run faster and scale better than pretty much any other system out there.”</blockquote>

Gossip on the mech-sympathy list says that 'seems that the new platform taking over is a combination of FlumeJava and MillWheel: http://pages.cs.wisc.edu/~akella/CS838/F12/838-CloudPapers/FlumeJava.pdf , 
http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/41378.pdf']]></description>
<dc:subject>map-reduce google hadoop cloud-dataflow scalability big-data urs-holzle google-io</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a9ddb55ae3e1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:map-reduce"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hadoop"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cloud-dataflow"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:big-data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:urs-holzle"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google-io"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://bits.shutterstock.com/2014/05/22/stop-buying-load-balancers-and-start-controlling-your-traffic-flow-with-software/">
    <title>Shutterbits replacing hardware load balancers with local BGP daemons and anycast</title>
    <dc:date>2014-05-29T10:07:07+00:00</dc:date>
    <link>http://bits.shutterstock.com/2014/05/22/stop-buying-load-balancers-and-start-controlling-your-traffic-flow-with-software/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Interesting approach.  Potentially risky, though -- heavy use of anycast on a large-scale datacenter network could increase the scale of the OSPF graph, which scales exponentially.  This can have major side effects on OSPF reconvergence time, which creates an interesting class of network outage in the event of OSPF flapping.

Having said that, an active/passive failover LB pair will already announce a single anycast virtual IP anyway, so, assuming there are a similar number of anycast IPs in the end, it may not have any negative side effects.

There's also the inherent limitation noted in the second-to-last paragraph; 'It comes down to what your hardware router can handle for ECMP. I know a Juniper MX240 can handle 16 next-hops, and have heard rumors that a software update will bump this to 64, but again this is something to keep in mind'.  Taking a leaf from the LB design, and using BGP to load-balance across a smaller set of haproxy instances, would seem like a good approach to scale up.]]></description>
<dc:subject>scalability networking performance load-balancing bgp exabgp ospf anycast routing datacenters scaling vips juniper haproxy shutterstock</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:55674ebcedb2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:networking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:load-balancing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:bgp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:exabgp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ospf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:anycast"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:routing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:datacenters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:vips"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:juniper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:haproxy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:shutterstock"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://spark.apache.org/docs/latest/streaming-programming-guide.html#overview">
    <title>Spark Streaming</title>
    <dc:date>2014-05-16T21:35:38+00:00</dc:date>
    <link>http://spark.apache.org/docs/latest/streaming-programming-guide.html#overview</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>an extension of the core Spark API that allows enables high-throughput, fault-tolerant stream processing of live data streams. Data can be ingested from many sources like Kafka, Flume, Twitter, ZeroMQ or plain old TCP sockets and be processed using complex algorithms expressed with high-level functions like map, reduce, join and window. Finally, processed data can be pushed out to filesystems, databases, and live dashboards. In fact, you can apply Spark’s in-built machine learning algorithms, and graph processing algorithms on data streams.</blockquote>

]]></description>
<dc:subject>spark streams stream-processing cep scalability apache machine-learning graphs</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:62c1e3c0e756/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:spark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:streams"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stream-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cep"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:apache"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graphs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://news.ycombinator.com/item?id=7711974">
    <title>Why Disqus made the Python-&gt;Go switchover</title>
    <dc:date>2014-05-08T13:34:52+00:00</dc:date>
    <link>https://news.ycombinator.com/item?id=7711974</link>
    <dc:creator>jm</dc:creator><description><![CDATA[for their realtime component, from the horse's mouth:

<blockquote>at higher contention, the CPU was choking everything. Switching over to Go removed that contention for us, which was the primary issue that we were seeing.</blockquote>

]]></description>
<dc:subject>python languages concurrency go threading gevent scalability disqus realtime hn</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:7735324765d8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:languages"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:concurrency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:go"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:threading"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gevent"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:disqus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:realtime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hn"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.facebook.com/posts/220956754772273/an-analysis-of-facebook-photo-caching/">
    <title>An analysis of Facebook photo caching</title>
    <dc:date>2014-05-07T12:53:52+00:00</dc:date>
    <link>https://code.facebook.com/posts/220956754772273/an-analysis-of-facebook-photo-caching/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[excellent analysis of caching behaviour at scale, from the FB engineering blog (via Tony Finch)]]></description>
<dc:subject>via:fanf caching facebook architecture photos images cache fifo lru scalability</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:3b5a7ad7f689/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:caching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:photos"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cache"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fifo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lru"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.bailis.org/blog/scalable-atomic-visibility-with-ramp-transactions/">
    <title>Scalable Atomic Visibility with RAMP Transactions</title>
    <dc:date>2014-04-10T20:55:17+00:00</dc:date>
    <link>http://www.bailis.org/blog/scalable-atomic-visibility-with-ramp-transactions/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Great new distcomp protocol work from Peter Bailis et al:

<blockquote>We’ve developed three new algorithms—called Read Atomic Multi-Partition (RAMP) Transactions—for ensuring atomic visibility in partitioned (sharded) databases: either all of a transaction’s updates are observed, or none are. [...]

How they work: RAMP transactions allow readers and writers to proceed concurrently. Operations race, but readers autonomously detect the races and repair any non-atomic reads. The write protocol ensures readers never stall waiting for writes to arrive.

Why they scale: Clients can’t cause other clients to stall (via synchronization independence) and clients only have to contact the servers responsible for items in their transactions (via partition independence). As a consequence, there’s no mutual exclusion or synchronous coordination across servers.

The end result: RAMP transactions outperform existing approaches across a variety of workloads, and, for a workload of 95% reads, RAMP transactions scale to over 7 million ops/second on 100 servers at less than 5% overhead.</blockquote>

]]></description>
<dc:subject>scale synchronization databases distcomp distributed ramp transactions scalability peter-bailis protocols sharding concurrency atomic partitions</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:bb652343d9e6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scale"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:synchronization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ramp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:transactions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:peter-bailis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:protocols"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sharding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:concurrency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:atomic"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:partitions"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.erlang-factory.com/upload/presentations/558/efsf2012-whatsapp-scaling.pdf">
    <title>'Scaling to Millions of Simultaneous Connections' [pdf]</title>
    <dc:date>2014-02-20T14:24:00+00:00</dc:date>
    <link>http://www.erlang-factory.com/upload/presentations/558/efsf2012-whatsapp-scaling.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Presentation by Rick Reed of WhatsApp on the large-scale Erlang cluster backing the WhatsApp API, delivered at Erlang Factory SF, March 30 2012. lots of juicy innards here]]></description>
<dc:subject>erlang scaling scalability performance whatsapp freebsd presentations</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:67245dcffadb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:erlang"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:whatsapp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:freebsd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:presentations"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2014/2/5/littles-law-scalability-and-fault-tolerance-the-os-is-your-b.html">
    <title>Little’s Law, Scalability and Fault Tolerance: The OS is your bottleneck. What you can do?</title>
    <dc:date>2014-02-05T17:35:26+00:00</dc:date>
    <link>http://highscalability.com/blog/2014/2/5/littles-law-scalability-and-fault-tolerance-the-os-is-your-b.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[good blog post on Little's Law, plugging quasar, pulsar, and comsat, 3 new open-source libs offering Erlang-like lightweight threads on the JVM]]></description>
<dc:subject>jvm java quasar pulsar comsat littles-law scalability async erlang</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:bb3e77510a90/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jvm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quasar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pulsar"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:comsat"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:littles-law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:async"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:erlang"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.inmobi.com/blog/2014/01/24/extending-graphites-mileage">
    <title>Extending graphite’s mileage</title>
    <dc:date>2014-01-27T10:35:04+00:00</dc:date>
    <link>http://www.inmobi.com/blog/2014/01/24/extending-graphites-mileage</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Ad company InMobi are using graphite heavily (albeit not as heavily as $work are), ran into the usual scaling issues, and chose to fix it in code by switching from a filesystem full of whisper files to a LevelDB per carbon-cache:

<blockquote>The carbon server is now able to run without breaking a sweat even when 500K metrics per minute is being pumped into it. This has been in production since late August 2013 in every datacenter that we operate from.</blockquote>

Very nice.  I hope this gets merged/supported.]]></description>
<dc:subject>graphite scalability metrics leveldb storage inmobi whisper carbon open-source</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8df674ec27ce/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:graphite"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:leveldb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:inmobi"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:whisper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:carbon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:open-source"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://sigops.org/sosp/sosp13/papers/p33-david.pdf">
    <title>Everything You Always Wanted to Know About Synchronization but Were Afraid to Ask</title>
    <dc:date>2013-10-21T16:32:15+00:00</dc:date>
    <link>http://sigops.org/sosp/sosp13/papers/p33-david.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA['the most exhaustive study of [multi-core] synchronization to date']]></description>
<dc:subject>synchronization scalability cpus hardware papers via:fanf multicore cas</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:f3c2f37df2b0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:synchronization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cpus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hardware"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:fanf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:multicore"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cas"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.bailis.org/blog/non-blocking-transactional-atomicity/">
    <title>Non-blocking transactional atomicity</title>
    <dc:date>2013-10-07T21:01:01+00:00</dc:date>
    <link>http://www.bailis.org/blog/non-blocking-transactional-atomicity/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[interesting new distributed atomic transaction algorithm from Peter Bailis]]></description>
<dc:subject>algorithms database distributed scalability storage peter-bailis distcomp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:b97a35baf620/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:database"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:peter-bailis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://attentionshard.wordpress.com/2013/09/30/why-tellybug-moved-from-cassandra-to-amazon-dynamodb/">
    <title>Why Tellybug moved from Cassandra to Amazon DynamoDB</title>
    <dc:date>2013-10-02T12:55:23+00:00</dc:date>
    <link>http://attentionshard.wordpress.com/2013/09/30/why-tellybug-moved-from-cassandra-to-amazon-dynamodb/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Summary: poor reliability, better latencies, and cheaper (!)]]></description>
<dc:subject>aws dynamodb cassandra nosql storage tellybug counters scalability reliability latency</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8b0a38474b92/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dynamodb"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cassandra"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nosql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storage"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tellybug"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:counters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:reliability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:latency"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.loggly.com/behind-the-screens/">
    <title>Behind the Screens at Loggly</title>
    <dc:date>2013-09-09T21:11:34+00:00</dc:date>
    <link>http://www.loggly.com/behind-the-screens/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Boost ASIO at the front end (!), Kafka 0.8, Storm, and ElasticSearch]]></description>
<dc:subject>boost scalability loggly logging ingestion cep stream-processing kafka storm architecture elasticsearch</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:6dca6cd9245d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:boost"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:loggly"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:logging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ingestion"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cep"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stream-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:storm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:elasticsearch"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p734-akidau.pdf">
    <title>_MillWheel: Fault-Tolerant Stream Processing at Internet Scale_ [paper, pdf]</title>
    <dc:date>2013-08-29T23:13:55+00:00</dc:date>
    <link>http://db.disi.unitn.eu/pages/VLDBProgram/pdf/industry/p734-akidau.pdf</link>
    <dc:creator>jm</dc:creator><description><![CDATA[from VLDB 2013:

<blockquote>
MillWheel is a framework for building low-latency data-processing applications that is widely used at Google. Users specify a directed computation graph and application code for individual nodes, and the system manages persistent state and the continuous flow of records, all within the envelope of the framework’s fault-tolerance guarantees.

This paper describes MillWheel’s programming model as well as its implementation. The case study of a continuous anomaly detector in use at Google serves to motivate how many of MillWheel’s features are used. MillWheel’s programming model provides a notion of logical time, making it simple to write time-based aggregations. MillWheel was designed from the outset with fault tolerance and scalability in mind. In practice, we find that MillWheel’s unique combination of scalability, fault tolerance, and a versatile programming model lends itself to a wide variety of problems at Google.
</blockquote>]]></description>
<dc:subject>millwheel google data-processing cep low-latency fault-tolerance scalability papers event-processing stream-processing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:a3c789df54bc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:millwheel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cep"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:low-latency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fault-tolerance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:event-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:stream-processing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://blog.twitter.com/2013/new-tweets-per-second-record-and-how">
    <title>New Tweets per second record, and how | Twitter Blog</title>
    <dc:date>2013-08-17T08:21:13+00:00</dc:date>
    <link>https://blog.twitter.com/2013/new-tweets-per-second-record-and-how</link>
    <dc:creator>jm</dc:creator><description><![CDATA[How Twitter scaled up massively in 3 years -- replacing Ruby with the JVM, adopting SOA and custom sharding.  Good summary post, looking forward to more techie details soon]]></description>
<dc:subject>twitter performance scalability jvm ruby soa scaling</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:61fa933c4f21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:performance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:jvm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ruby"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:soa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arstechnica.com/information-technology/2013/08/building-a-panopticon-the-evolution-of-the-nsas-xkeyscore/">
    <title>Building a panopticon: The evolution of the NSA’s XKeyscore</title>
    <dc:date>2013-08-09T14:10:18+00:00</dc:date>
    <link>http://arstechnica.com/information-technology/2013/08/building-a-panopticon-the-evolution-of-the-nsas-xkeyscore/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[This is an amazing behind-the-scenes look at the architecture of XKeyscore, and how it evolved from an earlier large-scale packet interception system, Narus' Semantic Traffic Analyzer.

XKeyscore is a federated, distributed system, with distributed packet-capture agents running on Linux, built with protocol-specific plugins, which write 3 days of raw packet data, and 30 days of intercept metadata, to local buffer stores.  Central queries are then 'distributed across all of the XKeyscore tap sites, and any results are returned and aggregated'.

Dunno about you, but this is pretty much how I would have built something like this, IMO....]]></description>
<dc:subject>panopticon xkeyscore nsa architecture scalability packet-capture narus sniffing snooping interception lawful-interception li tapping</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:dd0ed3afe027/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:panopticon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:xkeyscore"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:nsa"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:packet-capture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:narus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sniffing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:snooping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:interception"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:lawful-interception"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:li"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tapping"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2013/7/8/the-architecture-twitter-uses-to-deal-with-150m-active-users.html">
    <title>The Architecture Twitter Uses to Deal with 150M Active Users, 300K QPS, a 22 MB/S Firehose, and Send Tweets in Under 5 Seconds</title>
    <dc:date>2013-07-09T09:01:05+00:00</dc:date>
    <link>http://highscalability.com/blog/2013/7/8/the-architecture-twitter-uses-to-deal-with-150m-active-users.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Good read.

<blockquote>Twitter is primarily a consumption mechanism, not a production mechanism. 300K QPS are spent reading timelines and only 6000 requests per second are spent on writes.</blockquote>

* their approach of precomputing the timeline for the non-search case is a good example of optimizing for the more frequently-exercised path.

* MySQL and Redis are the underlying stores.  Redis is acting as a front-line in-RAM cache.  they're pretty happy with it: https://news.ycombinator.com/item?id=6011254

* these further talks go into more detail, apparently (haven't watched them yet):

http://www.infoq.com/presentations/Real-Time-Delivery-Twitter
http://www.infoq.com/presentations/Twitter-Timeline-Scalability
http://www.infoq.com/presentations/Timelines-Twitter

* funny thread of comments on HN, from a big-iron fan: https://news.ycombinator.com/item?id=6008228]]></description>
<dc:subject>scale architecture scalability twitter high-scalability redis mysql</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5bddc42e545c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scale"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:high-scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mysql"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.facebook.com/notes/facebook-engineering/wormhole-pubsub-system-moving-data-through-space-and-time/10151504075843920">
    <title>Facebook announce Wormhole</title>
    <dc:date>2013-06-26T09:38:27+00:00</dc:date>
    <link>https://www.facebook.com/notes/facebook-engineering/wormhole-pubsub-system-moving-data-through-space-and-time/10151504075843920</link>
    <dc:creator>jm</dc:creator><description><![CDATA[<blockquote>Over the last couple of years, we have built and deployed a reliable publish-subscribe system called Wormhole. Wormhole has become a critical part of Facebook's software infrastructure. At a high level, Wormhole propagates changes issued in one system to all systems that need to reflect those changes – within and across data centers. </blockquote>

Facebook's Kafka-alike, basically, although with some additional low-latency guarantees.  FB appear to be using it for multi-region and multi-AZ replication. Proprietary.]]></description>
<dc:subject>pub-sub scalability facebook realtime low-latency multi-region replication multi-az wormhole</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:c16235547374/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pub-sub"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:facebook"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:realtime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:low-latency"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:multi-region"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:replication"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:multi-az"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:wormhole"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.slideshare.net/r39132/q-con-ny2013modernwebsitescalabilityfinal-22989785">
    <title>Building a Modern Website for Scale (QCon NY 2013) [slides]</title>
    <dc:date>2013-06-17T10:37:00+00:00</dc:date>
    <link>http://www.slideshare.net/r39132/q-con-ny2013modernwebsitescalabilityfinal-22989785</link>
    <dc:creator>jm</dc:creator><description><![CDATA[some great scalability ideas from LinkedIn.  Particularly interesting are the best practices suggested for scaling web services:

1. store client-call timeouts and SLAs in Zookeeper for each REST endpoint;
2. isolate backend calls using async/threadpools;
3. cancel work on failures;
4. avoid sending requests to GC'ing hosts;
5. rate limits on the server.

#4 is particularly cool.  They do this using a "GC scout" request before every "real" request; a cheap TCP request to a dedicated "scout" Netty port, which replies near-instantly.  If it comes back with a 1-packet response within 1 millisecond, send the real request, else fail over immediately to the next host in the failover set.

There's still a potential race condition where the "GC scout" can be achieved quickly, then a GC starts just before the "real" request is issued.  But the incidence of GC-blocking-request is probably massively reduced.

It also helps against packet loss on the rack or server host, since packet loss will cause the drop of one of the TCP packets, and the TCP retransmit timeout will certainly be higher than 1ms, causing the deadline to be missed.  (UDP would probably work just as well, for this reason.)  However, in the case of packet loss in the client's network vicinity, it will be vital to still attempt to send the request to the final host in the failover set regardless of a GC-scout failure, otherwise all requests may be skipped.

The GC-scout system also helps balance request load off heavily-loaded hosts, or hosts with poor performance for other reasons; they'll fail to achieve their 1 msec deadline and the request will be shunted off elsewhere.

For service APIs with real low-latency requirements, this is a great idea.]]></description>
<dc:subject>gc-scout gc java scaling scalability linkedin qcon async threadpools rest slas timeouts networking distcomp netty tcp udp failover fault-tolerance packet-loss</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:8766348f43f5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gc-scout"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:gc"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:linkedin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:qcon"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:async"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:threadpools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:rest"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:slas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:timeouts"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:networking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distcomp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:netty"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tcp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:udp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:failover"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fault-tolerance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:packet-loss"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://groups.google.com/forum/#!topic/mechanical-sympathy/ao44gonVdAY">
    <title>Martin Thompson, Luke &quot;Snabb Switch&quot; Gorrie etc. review the C10M presentation from Schmoocon</title>
    <dc:date>2013-05-15T09:56:39+00:00</dc:date>
    <link>https://groups.google.com/forum/#!topic/mechanical-sympathy/ao44gonVdAY</link>
    <dc:creator>jm</dc:creator><description><![CDATA[on the mechanical-sympathy mailing list.  Some really interesting discussion on handling insane quantities of TCP connections using low volumes of hardware:

<blockquote>This talk has some good points and I think the subject is really interesting.  I would take the suggested approach with serious caution.  For starters the Linux kernel is nowhere near as bad as it made out.  Last year I worked with a client and we scaled a single server to 1 million concurrent connections with async programming in Java and some sensible kernel tuning.  I've heard they have since taken this to over 5 million concurrent connections.

BTW Open Onload is an open source implementation.  Writing a network stack is a serious undertaking.  In a previous life I wrote a network probe and had to reassemble TCP streams and kept getting tripped up by edge cases.  It is a great exercise in data structures and lock-free programming.  If you need very high-end performance I'd talk to the Solarflare or Mellanox guys before writing my own.

There are some errors and omissions in this talk.  For example, his range of ephemeral ports is not quite right, and atomic operations are only 15 cycles on Sandy Bridge when hitting local cache.  A big issue for me is when he defined C10M he did not mention the TIME_WAIT issue with closing connections.  Creating and destroying 1 million connections per second is a major issue.  A protocol like HTTP is very broken in that the server closes the socket and therefore has to retain the TCB until the specified timeout occurs to ensure no older packet is delivered to a new socket connection.</blockquote>

]]></description>
<dc:subject>mechanical-sympathy hardware scaling c10m tcp http scalability snabb-switch martin-thompson</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:dfe0a86b2ec0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mechanical-sympathy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hardware"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:c10m"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:tcp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:http"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:snabb-switch"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:martin-thompson"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.cloudera.com/blog/2010/04/cap-confusion-problems-with-partition-tolerance/">
    <title>CAP Confusion: Problems with ‘partition tolerance’</title>
    <dc:date>2013-05-14T20:18:13+00:00</dc:date>
    <link>http://blog.cloudera.com/blog/2010/04/cap-confusion-problems-with-partition-tolerance/</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Another good clarification about CAP which resurfaced during last week's discussion:

<blockquote>So what causes partitions? Two things, really. The first is obvious – a network failure, for example due to a faulty switch, can cause the network to partition. The other is less obvious, but fits with the definition [...]: machine failures, either hard or soft. In an asynchronous network, i.e. one where processing a message could take unbounded time, it is impossible to distinguish between machine failures and lost messages. Therefore a single machine failure partitions it from the rest of the network. A correlated failure of several machines partitions them all from the network. Not being able to receive a message is the same as the network not delivering it. In the face of sufficiently many machine failures, it is still impossible to maintain availability and consistency, not because two writes may go to separate partitions, but because the failure of an entire ‘quorum’ of servers may render some recent writes unreadable.
</blockquote>

(sorry, catching up on old interesting things posted last week...)]]></description>
<dc:subject>failure scalability network partitions cap quorum distributed-databases fault-tolerance</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:aa948fa8adc0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:failure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:network"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:partitions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cap"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quorum"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed-databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:fault-tolerance"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://news.ycombinator.com/item?id=5653266">
    <title>Alex Feinberg's response to Damien Katz' anti-Dynamoish/pro-Couchbase blog post</title>
    <dc:date>2013-05-14T20:16:19+00:00</dc:date>
    <link>https://news.ycombinator.com/item?id=5653266</link>
    <dc:creator>jm</dc:creator><description><![CDATA[Insightful response, worth bookmarking.  (the original post is at http://damienkatz.net/2013/05/dynamo_sure_works_hard.html ).

<blockquote>while you are saving on read traffic (online reads only go to the master), you are now decreasing availability (contrary to your stated goal), and increasing system complexity.
You also do hurt performance by requiring all writes and reads to be serialized through a single node: unless you plan to have a leader election whenever the node fails to meet a read SLA (which is going to result a disaster -- I am speaking from personal experience), you will have to accept that you're bottlenecked by a single node. With a Dynamo-style quorum (for either reads or writes), a single straggler will not reduce whole-cluster latency.
The core point of Dynamo is low latency, availability and handling of all kinds of partitions: whether clean partitions (long term single node failures), transient failures (garbage collection pauses, slow disks, network blips, etc...), or even more complex dependent failures.
The reality, of course, is that availability is neither the sole, nor the principal concern of every system. It's perfect fine to trade off availability for other goals -- you just need to be aware of that trade off.</blockquote>]]></description>
<dc:subject>cap distributed-databases databases quorum availability scalability damien-katz alex-feinberg partitions network dynamo riak voldemort couchbase</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:87fd2f70fea6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:cap"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:distributed-databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:databases"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:quorum"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:availability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:damien-katz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:alex-feinberg"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:partitions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:network"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:dynamo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:riak"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:voldemort"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:couchbase"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2011/11/29/datasift-architecture-realtime-datamining-at-120000-tweets-p.html">
    <title>DataSift Architecture: Realtime Datamining at 120,000 Tweets Per Second</title>
    <dc:date>2013-04-23T13:03:14+00:00</dc:date>
    <link>http://highscalability.com/blog/2011/11/29/datasift-architecture-realtime-datamining-at-120000-tweets-p.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[250 million tweets per day, 30-node HBase cluster, 400TB of storage, Kafka and 0mq.

This is from 2011, hence this dated line: 'for a distributed application they thought AWS was too limited, especially in the network. AWS doesn’t do well when nodes are connected together and they need to talk to each other. Not low enough latency network. Their customers care about latency.'  (Nowadays, it would be damn hard to build a lower-latency network than that attached to a cc2.8xlarge instance.)]]></description>
<dc:subject>datasift architecture scalability data twitter firehose hbase kafka zeromq</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:5c07ab4273cd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:datasift"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:twitter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:firehose"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:hbase"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:kafka"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:zeromq"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://speakerdeck.com/mza/latencys-worst-nightmare-performance-tuning-tips-and-tricks">
    <title>Latency's Worst Nightmare: Performance Tuning Tips and Tricks [slides]</title>
    <dc:date>2013-04-19T20:27:52+00:00</dc:date>
    <link>https://speakerdeck.com/mza/latencys-worst-nightmare-performance-tuning-tips-and-tricks</link>
    <dc:creator>jm</dc:creator><description><![CDATA[the basics of running a service stack (web, app servers, data stores) on AWS.  some good benchmark figures in the final slides]]></description>
<dc:subject>benchmarks aws ec2 ebs piops services scaling scalability presentations</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:0dad472cef4b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:benchmarks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ec2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:ebs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:piops"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:services"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:presentations"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://highscalability.com/blog/2013/4/15/scaling-pinterest-from-0-to-10s-of-billions-of-page-views-a.html">
    <title>High Scalability - Scaling Pinterest - From 0 to 10s of Billions of Page Views a Month in Two Years</title>
    <dc:date>2013-04-15T21:17:02+00:00</dc:date>
    <link>http://highscalability.com/blog/2013/4/15/scaling-pinterest-from-0-to-10s-of-billions-of-page-views-a.html</link>
    <dc:creator>jm</dc:creator><description><![CDATA[wow, Pinterest have a pretty hardcore architecture.  Sharding to the max.  This is scary stuff for me:

<blockquote>a [Cassandra-style] Cluster Management Algorithm is a SPOF. If there’s a bug it impacts every node. This took them down 4 times.</blockquote>

yeah, so, eek ;)]]></description>
<dc:subject>clustering sharding architecture aws scalability scaling pinterest via:matt-sergeant redis mysql memcached</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:jm/b:94eb7274d2de/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:sharding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:architecture"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:aws"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scalability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:pinterest"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:via:matt-sergeant"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:mysql"/>
	<rdf:li rdf:resource="https://pinboard.in/u:jm/t:memcached"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>