<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (rahuldave)</title>
    <link>https://pinboard.in/u:rahuldave/public/</link>
    <description>recent bookmarks from rahuldave</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="http://docs.astropy.org/en/v0.2.1/changelog.html#id1"/>
	<rdf:li rdf:resource="https://pypi.python.org/pypi/astropy/0.2.1"/>
	<rdf:li rdf:resource="http://python3porting.com/improving.html"/>
	<rdf:li rdf:resource="http://www.johndcook.com/blog/2012/05/03/python-as-a-lisp-dialect/"/>
	<rdf:li rdf:resource="http://groups.google.com/group/julia-dev/t/61fb4e3847dcc2b9"/>
	<rdf:li rdf:resource="https://github.com/cschin/IPython-Notebook---d3.js-mashup/blob/master/images/example3_d3.jpg"/>
	<rdf:li rdf:resource="http://www.johndcook.com/blog/2012/02/09/python-org-mode/"/>
	<rdf:li rdf:resource="http://code.google.com/edu/languages/google-python-class/introduction.html"/>
	<rdf:li rdf:resource="http://feedproxy.google.com/~r/TheEndeavour/~3/-ocmh6wYnrg/"/>
	<rdf:li rdf:resource="http://www.dabeaz.com/generators/Generators.pdf?"/>
	<rdf:li rdf:resource="http://feedproxy.google.com/~r/TheEndeavour/~3/VqPL0m8y7Ks/"/>
	<rdf:li rdf:resource="http://www.eflorenzano.com/blog/post/technology-behind-convore/"/>
	<rdf:li rdf:resource="http://feeds.arstechnica.com/~r/arstechnica/index/~3/tGM5tqWsxfY/tutorial-use-twitters-new-real-time-stream-api-in-python.ars"/>
	<rdf:li rdf:resource="http://simonwillison.net/2010/Apr/11/surlex/"/>
	<rdf:li rdf:resource="http://simonwillison.net/2010/Mar/25/onion/"/>
	<rdf:li rdf:resource="http://feedproxy.google.com/~r/catonmat/~3/GJRqxzmBW9c/"/>
	<rdf:li rdf:resource="http://blog.doughellmann.com/2010/03/pymotw-parsing-xml-documents-with.html"/>
	<rdf:li rdf:resource="http://simonwillison.net/2010/Mar/11/cachemachine/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="http://docs.astropy.org/en/v0.2.1/changelog.html#id1">
    <title>Full Changelog — Astropy v0.2.1</title>
    <dc:date>2013-04-04T10:40:37+00:00</dc:date>
    <link>http://docs.astropy.org/en/v0.2.1/changelog.html#id1</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[RT @astropy: Astropy 0.2.1 has been released! PyPI:  - Changes/fixes:  #astropy #python]]></description>
<dc:subject>python astropy</dc:subject>
<dc:source>https://twitter.com/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:34e280c10d22/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:astropy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://pypi.python.org/pypi/astropy/0.2.1">
    <title>astropy 0.2.1 : Python Package Index</title>
    <dc:date>2013-04-04T10:40:37+00:00</dc:date>
    <link>https://pypi.python.org/pypi/astropy/0.2.1</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[RT @astropy: Astropy 0.2.1 has been released! PyPI:  - Changes/fixes:  #astropy #python]]></description>
<dc:subject>python astropy</dc:subject>
<dc:source>https://twitter.com/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:4ee9aafa5477/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:astropy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://python3porting.com/improving.html">
    <title>Improving your code with modern idioms — Porting to Python 3 - The Book Site</title>
    <dc:date>2012-05-22T17:04:50+00:00</dc:date>
    <link>http://python3porting.com/improving.html</link>
    <dc:creator>rahuldave</dc:creator><dc:subject>python programming</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:76a958c1a8a8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:programming"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.johndcook.com/blog/2012/05/03/python-as-a-lisp-dialect/">
    <title>Python as a Lisp dialect</title>
    <dc:date>2012-05-03T11:55:11+00:00</dc:date>
    <link>http://www.johndcook.com/blog/2012/05/03/python-as-a-lisp-dialect/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[From Peter Norvig:

Basically, Python can be seen as a dialect of Lisp with “traditional” syntax … Python supports all of Lisp’s essential features except macros, and you don’t miss macros all that much because it does have eval, and operator overloading, and regular expression parsing, so some — but not all — of the use cases for macros are covered.

Source: Python for Lisp Programmers

]]></description>
<dc:subject>Python</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:89bdfc335c7c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://groups.google.com/group/julia-dev/t/61fb4e3847dcc2b9">
    <title>Julia, Python and Cython -
  julia-dev |
  Google Groups</title>
    <dc:date>2012-04-22T14:03:37+00:00</dc:date>
    <link>http://groups.google.com/group/julia-dev/t/61fb4e3847dcc2b9</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[RT @statalgo: Discussions about #julialang and #python integration.  Amazing how the scientific programming communit ...]]></description>
<dc:subject>python julialang</dc:subject>
<dc:source>https://twitter.com/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:8d82c7211972/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:julialang"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/cschin/IPython-Notebook---d3.js-mashup/blob/master/images/example3_d3.jpg">
    <title>images/example3_d3.jpg at master from cschin/IPython-Notebook---d3.js-mashup - GitHub</title>
    <dc:date>2012-02-16T02:28:20+00:00</dc:date>
    <link>https://github.com/cschin/IPython-Notebook---d3.js-mashup/blob/master/images/example3_d3.jpg</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[RT @augustmuench: RT @infoecho: more #python + #d3 fun... lots of cool potential
]]></description>
<dc:subject>d3 python</dc:subject>
<dc:source>https://twitter.com/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:4f493dbee388/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:d3"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.johndcook.com/blog/2012/02/09/python-org-mode/">
    <title>Running Python and R inside Emacs</title>
    <dc:date>2012-02-09T13:00:58+00:00</dc:date>
    <link>http://www.johndcook.com/blog/2012/02/09/python-org-mode/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[Emacs org-mode lets you manage blocks of source code inside a text file. You can execute these blocks and have the output display in your text file. Or you could export the file, say to HTML or PDF, and show the code and/or the results of executing the code.

Here I’ll show some of the most basic possibilities. For much more information, see  orgmode.org. And for the use of org-mode in research, see A Multi-Language Computing Environment for Literate Programming and Reproducible Research.

Source code blocks go between lines of the form

#+begin_src
#+end_src
On the #+begin_src line, specify the programming language. Here I’ll demonstrate Python and R, but org-mode currently supports C++, Java, Perl, etc. for a total of 35 languages.

Suppose we want to compute √42 using R.

#+begin_src R
sqrt(42)
#+end_src
If we put the cursor somewhere in the code block and type C-c C-c, org-mode will add these lines:

#+results:
: 6.48074069840786
Now suppose we do the same with Python:

#+begin_src python
from math import sqrt
sqrt(42)
#+end_src
This time we get disappointing results:

#+results:
: None
What happened? The org-mode manual explains:

… code should be written as if it were the body of such a function.  In particular, note that Python does not automatically return a value from a function unless a return statement is present, and so a ‘return’ statement will usually be required in Python.

If we change sqrt(42) to return sqrt(42) then we get the same result that we got when using R.

By default, evaluating a block of code returns a single result. If you want to see the output as if you were interactively using Python from the REPL, you can add :results output :session following the language name.

#+begin_src python :results output :session
print "There are %d hours in a week." % (7*24)
2**10
#+end_src
This produces the lines

#+results:
: There are 168 hours in a week.
: 1024
Without the :session tag, the second line would not appear because there was no print statement.

I had to do a couple things before I could get the examples above to work. First, I had to upgrade org-mode. The version of org-mode that shipped with Emacs 23.3 was quite out of date. Second, the only language you can run by default is Emacs Lisp. You have to turn on support for other languages in your .emacs file. Here’s the code to turn on support for Python and R.

(org-babel-do-load-languages
    'org-babel-load-languages '((python . t) (R . t)))
Update: My next post shows how to call code in written in one language from code written in another language.

Related posts:

Personal organization software
Preventing an unpleasant Sweave surprise

]]></description>
<dc:subject>Python Emacs Literate_programming Reproducibility Rstats</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:387221004fa1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Emacs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Literate_programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Reproducibility"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Rstats"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://code.google.com/edu/languages/google-python-class/introduction.html">
    <title>Python Introduction - Google's Python Class - Google Code</title>
    <dc:date>2012-02-02T18:07:10+00:00</dc:date>
    <link>http://code.google.com/edu/languages/google-python-class/introduction.html</link>
    <dc:creator>rahuldave</dc:creator><dc:subject>google python programming tutorial</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:073050eb4a0a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:google"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:tutorial"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://feedproxy.google.com/~r/TheEndeavour/~3/-ocmh6wYnrg/">
    <title>How to compute jinc(x)</title>
    <dc:date>2012-02-02T16:01:04+00:00</dc:date>
    <link>http://feedproxy.google.com/~r/TheEndeavour/~3/-ocmh6wYnrg/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[The function jinc(x) that I wrote about yesterday is almost trivial to implement, but not quite. I’ll explain why it’s not quite as easy as it looks and how one might implement it in C and Python.

The function jinc(x) is defined as J1(x) / x, so if you have code to compute J1 then it ought to be a no-brainer. For example, why not use the following C code?


#include <math.h>
double jinc(double x) {
    return j1(x) / x;
}

The problem is that if you pass in 0, the code will divide by 0 and return a NaN. The function jinc(x) is defined to be 1/2 at x = 0 because that’s the limit of J1(x)(x) / x as x goes to 0. So we try again:


#include <math.h>
double jinc(double x) {
    return (x == 0.0) ? 0.5 : j1(x) / x;
}

Does that work? Technically, it could still fail — we’ll come back to that at the end — but we’ll assume for now that it’s OK.

We could write the analogous Python code, and it would be adequate as long as we’re only calling the function with scalars and not NumPy arrays.


from scipy.special import j1
def jinc(x):
    if x == 0.0:
        return 0.5
    return j1(x) / x

Now suppose you want to plot this function. You create an array of points, say

x = np.linspace(-1, 1, 25)
and plot jinc(x). You’ll get a warning: “ValueError: The truth value of an array with one element is ambiguous. Use a.any() or a.all().” Incidentally, if we called linspace with an even integer in the last argument, our array of points would avoid zero and the naive implementation of jinc would work.

When Python tries to apply jinc to an array, it doesn’t know how to interpret the test x == 0. The warning suggests “Do you mean if any component of x is 0? Or if all components of x are 0?” Neither option is what we want. We want to apply jinc as written to each element of x. We could do this by calling the vectorize function.

jinc = np.vectorize(jinc)
This replaces our original jinc function with one that handles NumPy arrays correctly.

There is an extremely unlikely scenario in which the code above could fail. The value of J1(x) is approximately x/2 for small values of x. If the floating point value x is so small that 0.5*x returns 0, our function will return 0, even though it should return 0.5. The C code above works for values of x as small as DBL_MIN and even values much smaller. (DBL_MIN is not the smallest value of a double, only the smallest normalized double.) But if you set

x = DBL_MIN / pow(2.0, 52);
then jinc(x) will return 0. If you want to be absolutely safe, you could change the implementation to


#include <math.h>
double jinc(double x) {
    return (fabs(x) < 1e-8) ? 0.5 : j1(x) / x;
}

Why test for whether the absolute value is less than 10-8 rather than a much smaller number? For small x, the error in approximating jinc(x) with 1/2 is on the order of x2/16. So for x as large as 10-8, the approximation error is below the resolution of a double. As a bonus, the function jinc(x) will be more efficient for |x| < 10-8 since it avoids a call to j1.

Related posts:

Jinc function
Sine approximation for small angles
Functions in math.h that seem unnecessary

]]></description>
<dc:subject>Python SciPy</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:5871a164011b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:SciPy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.dabeaz.com/generators/Generators.pdf?">
    <title>[untitled]</title>
    <dc:date>2012-01-19T16:51:37+00:00</dc:date>
    <link>http://www.dabeaz.com/generators/Generators.pdf?</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[Comments]]></description>
<dc:subject>python</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rahuldave/b:96ec7ca4409b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://feedproxy.google.com/~r/TheEndeavour/~3/VqPL0m8y7Ks/">
    <title>Benford’s law and SciPy</title>
    <dc:date>2011-10-19T11:54:00+00:00</dc:date>
    <link>http://feedproxy.google.com/~r/TheEndeavour/~3/VqPL0m8y7Ks/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[Imagine you picked up a dictionary and found that the pages with A’s were dirty and the Z’s were clean. In between there was a gradual transition with the pages becoming cleaner as you progressed through the alphabet. You might conclude that people have been looking up a lot of words that begin with letters near the beginning of the alphabet and not many near the end.

That’s what Simon Newcomb did in 1881, only he was looking at tables of logarithms. He concluded that people were most interested in looking up the logarithms of numbers that began with 1 and progressively less interested in logarithms of numbers beginning with larger digits. This sounds absolutely bizarre, but he was right. The pattern he described has been repeatedly observed and is called Benford’s law. (Benford re-discovered the the same principle in 1938, and per Stigler’s law, Newcomb’s observation was named after Benford.)

Benford’s law predicts that for data sets such as collections of physical constants, about 30% of the numbers will begin with 1 down to about 5% starting with 8 or 9. To be precise, it says the leading digit will be d with probability log10(1 + 1/d). For a good explanation of Benford’s law, see TAOCP volume 2.

A couple days ago I blogged about using SciPy’s collection of physical constants to look for values that were approximately factorials. Let’s look at that set of constants again and see whether the most significant digits of these constants follows Benford’s law.

Here’s a bar chart comparing the actual number of constants starting with each digit to the results we would expect from Benford’s law.



Here’s the code that was used to create the data for the chart.


from math import log10, floor
from scipy.constants import codata

def most_significant_digit(x):
    e = floor(log10(x))
    return int(x*10**-e)

# count how many constants have each leading digit
count = [0]*10
d = codata.physical_constants
for c in d:
    (value, unit, uncertainty) = d[c]
    x = abs(value)
    count[ most_significant_digit(x) ] += 1
total = sum(count)

# expected number of each leading digit per Benford's law
benford = [total*log10(1 + 1./i) for i in range(1, 10)]

The chart itself was produced using matplotlib, starting with this sample code.

The actual counts we see in scipy.constants line up fairly well with the predictions from Benfor’s law. The results are much closer to Benford’s prediction than to the uniform distribution that you might have expected before hearing of Benford’s law.

Update: See the next post for an explanation of why factorials also follow Benford’s law.

Related posts:

Physical constants and factorials
Slide rules

]]></description>
<dc:subject>Python SciPy</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:70769a6a8719/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:SciPy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.eflorenzano.com/blog/post/technology-behind-convore/">
    <title>The Technology Behind Convore</title>
    <dc:date>2011-02-16T12:29:35+00:00</dc:date>
    <link>http://www.eflorenzano.com/blog/post/technology-behind-convore/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[We launched Convore last week, and the first question developers tend to ask
when they find Convore is "what technology powers this site?"  It is asked so
often, in fact, that we have started to copy and paste the same short response
again and again.  That response was good enough to satisfy people who simply
wanted to know if we were Rails or Django, or whether we were using node.js for
the real-time stuff, but this article will expand upon that—not only giving
more details for the curious, but also giving us a link to point people at when
they ask the question in the future.  I always wish other people were totally
open about their architectures, so that I can learn from their good choices and
their bad, so I'd like to be as open as possible about ours.  Let's dive in!


The basics
All of our application code is powered by Python.  Our front-end html page
generation is done by Django, which we use in a surprisingly traditional way
given the real-time nature of Convore as a product.  Everything is assembled
at once: all messages, the sidebar, and the header are all rendered on the
server instead of being pulled in after-the-fact with JavaScript.  All of the
important data is canonically stored in PostgreSQL, including messages, topics,
groups, unread counts, and user profiles.  Search functionality is provided by
Solr, which is interfaced into our application by way of the handy Haystack
Django application.



The message lifecycle
When a new message comes into the system, first it's parsed by a series of
regular expressions designed to pull out interesting bits of information from
the message.  Right now all we're looking for is username references and
links (and further, whether those links point at images which should be
rendered in-line.)  At the end of this parsing stage, we have a structured
message parse list, which is converted into JSON.

So, for example if someone posted the message:

@ericflo @simonw Here's how we connect/disconnect from Redis in production: http://dpaste.com/406797/

The resulting JSON parse list would look like this:

[
    {
        "type": "username",
        "user_id": 1,
        "username": "ericflo",
        "markup": "<a href=\"/users/ericflo/\">@ericflo</a>"
    },
    {
        "type": "username",
        "user_id": 56,
        "username": "simonw",
        "markup": " <a href=\"/users/simonw/\">@simonw</a>"
    },
    {
        "type": "text",
        "markup": " Here&#39;s how we connect/disconnect from Redis in production: "
    },
    {
        "type": "url",
        "url": "http://dpaste.com/406797/",
        "markup": "<a href=\"http://dpaste.com/406797/\" target=\"_blank\">http://dpaste.com/406797/</a>"
    }
]

After this is constructed, we log all our available information about this
message, and then save to the database—both the raw message as it was received,
and the JSON-encoded parsed node list.

Now a task is sent to Celery (by way of Redis) notifying it that this new
message has been received.  This Celery task now increments the unread count
for everyone who has access to the topic that the message was posted in, and
then it publishes to a Redis pub/sub for the group that the message was posted
to.  Finally, the task scans through the message, looking for any users that
were mentioned in the message, and writes entries to the database for every
mention.

On the other end of that pub/sub are the many open http requests that our users
have initiated, which are waiting for any new messages or information.  Those
all simultaneously return the new message information, at which point they
reconnect again, waiting for the next message to arrive.



The real-time endpoint
Our live updates endpoint is actually a very simple and lightweight pure-WSGI
Python application, hosted using Eventlet.  It spawns off a coroutine for each
request, and in that coroutine, it looks up all the groups that a user is a
member of, and then opens a connection to Redis subscribing to all of those
channels.  Each of these Eventlet-hosted Python applications has the ability to
host hundreds-to-thousands of open connections, and we run several instances
on each of our front-end machines.  It has a few more responsibilities, like
marking a topic as read before it returns a response, but the most important
thing is to be a bridge between the user and Redis pub/sub.



Future improvements
There are so many places where our architecture can be improved.  This is our
first version, and now that real users are using the system, already some of
our initial assumptions are being challenged.  For instance, we thought that
pub/sub to a channel per group would be enough, but what that means is that
everyone in a group sees the exact same events as everyone else in that group.

This means we don't have the ability to customize each user's experience based
on their preferences--no way to put a user on ignore, filter certain messages,
etc.  It also means that we aren't able to sync up a user's experience across
tabs or browsers, since we don't really want to broadcast to everyone in the
group that one user has visited a topic, thereby removing any unread messages
in that topic.  So going forward we're going to have to break up that per-group
pub/sub into per-user pub/sub.

Another area that could be improved is our unread counts.  Right now they're
stored as rows in our PostgreSQL database, which makes it extremely easy to
batch update them and do aggregate queries on them, but the number of these
rows is increasing rapidly, and without some kind of sharding scheme, it will
at some point become more difficult to work with such a large amount of rows.
My feeling is that this will eventually need to be moved into a non-relational
data store, and we'll need to write a service layer in front of it to deal with
pre-aggregating and distributing updates, but nothing is set in stone just yet.

Finally, Python may not be the best language for this real-time endpoint.
Eventlet is a fantastic Python library and it allowed us to build something
extremely fast that has scaled to several thousand concurrent connections
without breaking a sweat on launch day, but it has its limits.  There is a
large body of work out there on handling a large number of open connections,
using Java's NIO framework, Erlang's mochiweb, or node.js.



That's all folks
We're pretty proud of what we've built in a very short time, and we're glad
it has held up as well as it has on our launch day and afterwards.  We're
excited about the problems we're now being faced with, both scaling the
technology, and scaling the product.  I hope this article has quenched any
curiosity out there about how Convore works.  If there are any questions,
feel free to join Convore and ask away!

(Or discuss it on Hacker News)

]]></description>
<dc:subject>Convore Django Eventlet Haystack PostgreSQL Python Realtime Redis Solr</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:a519ac9b35a0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Convore"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Django"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Eventlet"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Haystack"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:PostgreSQL"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Realtime"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Redis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Solr"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://feeds.arstechnica.com/~r/arstechnica/index/~3/tGM5tqWsxfY/tutorial-use-twitters-new-real-time-stream-api-in-python.ars">
    <title>feature: Tutorial: consuming Twitter's real-time stream API in Python</title>
    <dc:date>2010-04-21T17:45:00+00:00</dc:date>
    <link>http://feeds.arstechnica.com/~r/arstechnica/index/~3/tGM5tqWsxfY/tutorial-use-twitters-new-real-time-stream-api-in-python.ars</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[
  
  
        
    
Twitter is preparing to launch several impressive new features, including a new streaming API that will give desktop client applications real-time access to the user's message timeline. The new streaming API was announced last week at Twitter's Chirp conference, where it was made available to conference attendees on-site for some preliminary experimentation. Twitter opened it up to the broader third-party developer community on Monday so that programmers can begin testing it to offer informed feedback.


This tutorial will show you how to  consume and process data from Twitter's new streaming API.  The code examples, which are written in the Python programming language, demonstrate how to establish a long-lived HTTP connection with PyCurl, buffer the incoming data, and process it to perform the basic message display functions of a Twitter client application. We will also take a close look at how the new streaming API differs from the existing polling-based REST API.
    
          
      
        
    
      Read the comments on this post


   
]]></description>
<dc:subject>Features Guides Open-source Web programming python tutorial twitter</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:58ebd66b4b7c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Features"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Guides"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Open-source"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Web"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:tutorial"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:twitter"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://simonwillison.net/2010/Apr/11/surlex/">
    <title>Introduction to Surlex</title>
    <dc:date>2010-04-11T19:23:35+00:00</dc:date>
    <link>http://simonwillison.net/2010/Apr/11/surlex/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[Introduction to Surlex. A neat drop-in alternative for Django’s regular expression based URL parsing, providing simpler syntax for common path patterns.

]]></description>
<dc:subject>codysoyland django python regex surlex urls</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:eca0516a82c5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:codysoyland"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:django"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:regex"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:surlex"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:urls"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://simonwillison.net/2010/Mar/25/onion/">
    <title>The Onion Uses Django, And Why It Matters To Us</title>
    <dc:date>2010-03-25T18:43:24+00:00</dc:date>
    <link>http://simonwillison.net/2010/Mar/25/onion/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[The Onion Uses Django, And Why It Matters To Us. The Onion ported their main site from PHP and Drupal to Django in three months with a team of four developers, including a full migration of their archived content. Their developers answer questions about the switch in this thread on the Django sub-reddit.

]]></description>
<dc:subject>django drupal php python reddit theonion</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:a631a49f5750/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:django"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:drupal"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:php"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:reddit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:theonion"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://feedproxy.google.com/~r/catonmat/~3/GJRqxzmBW9c/">
    <title>Top Ten One-Liners from CommandLineFu Explained</title>
    <dc:date>2010-03-18T03:00:21+00:00</dc:date>
    <link>http://feedproxy.google.com/~r/catonmat/~3/GJRqxzmBW9c/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[
I love working in the shell. Mastery of shell lets you get things done in seconds, rather than minutes or hours, if you chose to write a program instead. 

In this article I’d like to explain the top one-liners from the commandlinefu.com. It’s a user-driven website where people get to choose the best and most useful shell one-liners.

But before I do that, I want to take the opportunity and link to a few of my articles that I wrote some time ago on working efficiently in the command line:


Working Efficiently in Bash (Part I).
Working Efficiently in Bash (Part II).
The Definitive Guide to Bash Command Line History.
A fun article on Set Operations in the Shell.
Another fun article on Solving Google Treasure Hunt in the Shell.

And now the explanation of top one-liners from commandlinefu.

Update: Russian translation available.

#1. Run the last command as root
$ sudo !!
We all know what the sudo command does - it runs the command as another user, in this case, it runs the command as superuser because no other user was specified. But what’s really interesting is the bang-bang !! part of the command. It’s called the event designator. An event designator references a command in shell’s history. In this case the event designator references the previous command. Writing !! is the same as writing !-1. The -1 refers to the last command. You can generalize it, and write !-n to refer to the n-th previous command. To view all your previous commands, type history.

This one-liner is actually really bash-specific, as event designators are a feature of bash.

I wrote about event designators in much more detail in my article “The Definitive Guide to Bash Command Line History.” The article also comes with a printable cheat sheet for working with the history.

#2. Serve the current directory at http://localhost:8000/
$ python -m SimpleHTTPServer
This one-liner starts a web server on port 8000 with the contents of current directory on all the interfaces (address 0.0.0.0), not just localhost. If you have “index.html” or “index.htm” files, it will serve those, otherwise it will list the contents of the currently working directory.

It works because python comes with a standard module called SimpleHTTPServer. The -m argument makes python to search for a module named SimpleHTTPServer.py in all the possible system locations (listed in sys.path and $PYTHONPATH shell variable). Once found, it executes it as a script. If you look at the source code of this module, you’ll find that this module tests if it’s run as a script if __name__ == '__main__', and if it is, it runs the test() method that makes it run a web server in the current directory.

To use a different port, specify it as the next argument:

$ python -m SimpleHTTPServer 8080
This command runs a HTTP server on all local interfaces on port 8080.

#3. Save a file you edited in vim without the needed permissions
:w !sudo tee %
This happens to me way too often. I open a system config file in vim and edit it just to find out that I don’t have permissions to save it. This one-liner saves the day. Instead of writing the while to a temporary file :w /tmp/foobar and then moving the temporary file to the right destination mv /tmp/foobar /etc/service.conf, you now just type the one-liner above in vim and it will save the file.

Here is how it works, if you look at the vim documentation (by typing :he :w in vim), you’ll find the reference to the command :w !{cmd} that says that vim runs {cmd} and passes it the contents of the file as standard input. In this one-liner the {cmd} part is the sudo tee % command. It runs tee % as superuser. But wait, what is %? Well, it’s a read-only register in vim that contains the filename of the current file! Therefore the command that vim executes becomes tee current_filename, with the current directory being whatever the current_file is in. Now what does tee do? The tee command takes standard input and write it to a file! Rephrasing, it takes the contents of the file edited in vim, and writes it to the file (while being root)! All done!

#4. Change to the previous working directory
$ cd -
Everyone knows this, right? The dash “-” is short for “previous working directory.” The previous working directory is defined by $OLDPWD shell variable. After you use the cd command, it sets the $OLDPWD environment variable, and then, if you type the short version cd -, it effectively becomes cd $OLDPWD and changes to the previous directory.

To change to a directory named “-“, you have to either cd to the parent directory and then do cd ./- or do cd /full/path/to/-.

#5. Run the previous shell command but replace string “foo” with “bar”
$ ^foo^bar^
This is another event designator. This one is for quick substitution. It replaces foo with bar and repeats the last command. It’s actually a shortcut for !!:s/foo/bar/. This one-liner applies the s modifier to the !! event designator. As we learned from one-liner #1, the !! event designator stands for the previous command. Now the s modifier stands for substitute (greetings to sed) and it substitutes the first word with the second word.

Note that this one-liner replaces just the first word in the previous command. To replace all words, add the g modifer (g for global):

$ !!:gs/foo/bar
This one-liner is also bash-specific, as event designators are a feature of bash.

Again, see my article “The Definitive Guide to Bash Command Line History.” I explain all this stuff in great detail.

#6. Quickly backup or copy a file
$ cp filename{,.bak}
This one-liner copies the file named filename to a file named filename.bak. Here is how it works. It uses brace expansion to construct a list of arguments for the cp command. Brace expansion is a mechanism by which arbitrary strings may be generated. In this one-liner filename{,.bak} gets brace expanded to filename filename.bak and puts in place of the brace expression. The command becomes cp filename filename.bak and file gets copied.

Talking more about brace expansion, you can do all kinds of combinatorics with it. Here is a fun application:

$ echo {a,b,c}{a,b,c}{a,b,c}
It generates all the possible strings 3-letter from the set {a, b, c}:


aaa aab aac aba abb abc aca acb acc
baa bab bac bba bbb bbc bca bcb bcc
caa cab cac cba cbb cbc cca ccb ccc

And here is how to generate all the possible 2-letter strings from the set of {a, b, c}:


$ echo {a,b,c}{a,b,c}

It produces:


aa ab ac ba bb bc ca cb cc

If you liked this, you may also like my article where I defined a bunch of set operations (such as intersection, union, symmetry, powerset, etc) by using just shell commands. The article is called “Set Operations in the Unix Shell.” (And since I have sets in the shell, I will soon write articles on on “Combinatorics in the Shell” and “Algebra in the Shell“. Fun topics to explore. Perhaps even “Topology in the Shell” :))

#7. mtr - traceroute and ping combined
$ mtr google.com
MTR, bettern known as “Matt’s Traceroute” combines both traceroute and ping command. After each successful hop, it sends a ping request to the found machine, this way it produces output of both traceroute and ping to better understand the quality of link. If it finds out a packet took an alternative route, it displays it, and by default it keeps updating the statistics so you knew what was going on in real time.

#8. Find the last command that begins with “whatever,” but avoid running it
$ !whatever:p
Another use of event designators. The !whatever designator searches the shell history for the most recently executed command that starts with whatever. But instead of executing it, it prints it. The :p modifier makes it print instead of executing.

This one-liner is bash-specific, as event designators are a feature of bash.

Once again, see my article “The Definitive Guide to Bash Command Line History.” I explain all this stuff in great detail.

#9. Copy your public-key to remote-machine for public-key authentication
$ ssh-copy-id remote-machine
This one-liner copies your public-key, that you generated with ssh-keygen (either SSHv1 file identity.pub or SSHv2 file id_rsa.pub) to the remote-machine and places it in ~/.ssh/authorized_keys file. This ensures that the next time you try to log into that machine, public-key authentication (commonly referred to as “passwordless authentication.”) will be used instead of the regular password authentication.

If you wished to do it yourself, you’d have to take the following steps:


your-machine$ scp ~/.ssh/identity.pub remote-machine:
your-machine$ ssh remote-machine
remote-machine$ cat identity.pub >> ~/.ssh/authorized_keys

This one-liner saves a great deal of typing. Actually I just found out that there was a shorter way to do it:


your-machine$ ssh remote-machine 'cat >> .ssh/authorized_keys' < .ssh/identity.pub

#10. Capture video of a linux desktop
$ ffmpeg -f x11grab -s wxga -r 25 -i :0.0 -sameq /tmp/out.mpg
A pure coincidence, I have done so much video processing with ffmpeg that I know what most of this command does without looking much in the manual.

The ffmpeg generally can be descibed as a command that takes a bunch of options and the last option is the output file. In this case the options are -f x11grab -s wxga -r 25 -i :0.0 -sameq and the output file is /tmp/out.mpg.

Here is what the options mean:


-f x11grab makes ffmpeg to set the input video format as x11grab. The X11 framebuffer has a specific format it presents data in and it makes ffmpeg to decode it correctly.
-s wxga makes ffmpeg to set the size of the video to wxga which is shortcut for 1366×768. This is a strange resolution to use, I’d just write -s 800x600.
-r 25 sets the framerate of the video to 25fps.
-i :0.0 sets the video input file to X11 display 0.0 at localhost.
-sameq preserves the quality of input stream. It’s best to preserve the quality and post-process it later.

You can also specify ffmpeg to grab display from another x-server by changing the -i :0.0 to -i host:0.0.

If you’re interested in ffmpeg, here are my other articles on ffmpeg that I wrote while ago:


How to Extract Audio Tracks from YouTube Videos
Converting YouTube Flash Videos to a Better Format with ffmpeg

PS. This article was so fun to write, that I decided to write several more parts. Tune in the next time for “The Next Top Ten One-Liners from CommandLineFu Explained” :)

Have fun. See ya!

PSS. Follow me on twitter for updates.


   
]]></description>
<dc:subject>Programming authorized_keys bash cd combinatorics commandlinefu cp desktop display event_designators ffmpeg history identity.pub id_rsa.pub linux mtr oldpwd one_liners passwordless_authentication ping public_key_authentication python pythonpath root sets shell simplehttpserver ssh ssh_copy_id ssh_keygen sshv1 sshv2 sudo tee traceroute vim x11</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:eb42c63da138/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:Programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:authorized_keys"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:bash"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:cd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:combinatorics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:commandlinefu"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:cp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:desktop"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:display"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:event_designators"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:ffmpeg"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:history"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:identity.pub"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:id_rsa.pub"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:linux"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:mtr"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:oldpwd"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:one_liners"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:passwordless_authentication"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:ping"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:public_key_authentication"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:pythonpath"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:root"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:shell"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:simplehttpserver"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:ssh"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:ssh_copy_id"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:ssh_keygen"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:sshv1"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:sshv2"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:sudo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:tee"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:traceroute"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:vim"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:x11"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://blog.doughellmann.com/2010/03/pymotw-parsing-xml-documents-with.html">
    <title>PyMOTW: Parsing XML Documents with ElementTree</title>
    <dc:date>2010-03-14T14:58:00+00:00</dc:date>
    <link>http://blog.doughellmann.com/2010/03/pymotw-parsing-xml-documents-with.html</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[Parsing XML Documents with ElementTreeParsed XML documents are represented in memory by ElementTree andElement objects connected into a tree structure based on the way thenodes in the XML document are nested.
Parsing an Entire DocumentWhen you parse an entire document with parse(), an ElementTreeinstance is returned.  The tree knows about all of the data in theinput document, and the nodes of the tree can be searched ormanipulated in place.  While this flexibility can make working withthe parsed document a little easier, it typically takes more memorythan an event-based parsing approach since the entire document must beloaded at one time.
The memory footprint of small, simple documents such as this list ofpodcasts represented as an OPML outline is not significant:
<?xml version="1.0" encoding="UTF-8"?><opml version="1.0"><head><title>My Podcasts</title><dateCreated>Sun, 07 Mar 2010 15:53:26 GMT</dateCreated><dateModified>Sun, 07 Mar 2010 15:53:26 GMT</dateModified></head><body>  <outline text="Science and Tech">    <outline text="APM: Future Tense" type="rss"              xmlUrl="http://www.publicradio.org/columns/futuretense/podcast.xml"              htmlUrl="http://www.publicradio.org/columns/futuretense/" /><outline text="Engines Of Our Ingenuity Podcast" type="rss"              xmlUrl="http://www.npr.org/rss/podcast.php?id=510030"              htmlUrl="http://www.uh.edu/engines/engines.htm" /><outline text="Science &#38; the City" type="rss"              xmlUrl="http://www.nyas.org/Podcasts/Atom.axd"              htmlUrl="http://www.nyas.org/WhatWeDo/SciencetheCity.aspx" />  </outline>  <outline text="Books and Fiction"><outline text="Podiobooker" type="rss"              xmlUrl="http://feeds.feedburner.com/podiobooks"              htmlUrl="http://www.podiobooks.com/blog" /><outline text="The Drabblecast" type="rss"              xmlUrl="http://web.me.com/normsherman/Site/Podcast/rss.xml"              htmlUrl="http://web.me.com/normsherman/Site/Podcast/Podcast.html" /><outline text="tor.com / category / tordotstories" type="rss"              xmlUrl="http://www.tor.com/rss/category/TorDotStories"              htmlUrl="http://www.tor.com/" />  </outline>  <outline text="Computers and Programming"><outline text="MacBreak Weekly" type="rss"              xmlUrl="http://leo.am/podcasts/mbw"              htmlUrl="http://twit.tv/mbw" /><outline text="FLOSS Weekly" type="rss"              xmlUrl="http://leo.am/podcasts/floss"              htmlUrl="http://twit.tv" /><outline text="Core Intuition" type="rss"              xmlUrl="http://www.coreint.org/podcast.xml"              htmlUrl="http://www.coreint.org/" />  </outline>  <outline text="Python">    <outline text="PyCon Podcast" type="rss"              xmlUrl="http://advocacy.python.org/podcasts/pycon.rss"              htmlUrl="http://advocacy.python.org/podcasts/" /><outline text="A Little Bit of Python" type="rss"              xmlUrl="http://advocacy.python.org/podcasts/littlebit.rss"              htmlUrl="http://advocacy.python.org/podcasts/" /><outline text="Django Dose Everything Feed" type="rss"              xmlUrl="http://djangodose.com/everything/feed/" />  </outline>  <outline text="Miscelaneous"><outline text="dhellmann's CastSampler Feed" type="rss"              xmlUrl="http://www.castsampler.com/cast/feed/rss/dhellmann/"              htmlUrl="http://www.castsampler.com/users/dhellmann/" />  </outline></body></opml>To parse the file, pass an open file handle to parse().  It willread the data, parse the XML, and return an ElementTree object.
from xml.etree import ElementTreewith open('podcasts.opml', 'rt') as f:    tree = ElementTree.parse(f)print tree$ python ElementTree_parse_opml.py<xml.etree.ElementTree.ElementTree instance at 0x82f58>Traversing the Parsed TreeNow that we have a parsed XML tree, we can iterate over it, visitingall of the children in order and examining their attributes andcontents.
from xml.etree import ElementTreewith open('podcasts.opml', 'rt') as f:    tree = ElementTree.parse(f)for node in tree.getiterator():    print node.tag, node.attribHere we print the entire tree, one tag at a time.
$ python ElementTree_dump_opml.pyopml {'version': '1.0'}head {}title {}dateCreated {}dateModified {}body {}outline {'text': 'Science and Tech'}outline {'xmlUrl': 'http://www.publicradio.org/columns/futuretense/podcast.xml', 'text': 'APM: Future Tense', 'type': 'rss', 'htmlUrl': 'http://www.publicradio.org/columns/futuretense/'}outline {'xmlUrl': 'http://www.npr.org/rss/podcast.php?id=510030', 'text': 'Engines Of Our Ingenuity Podcast', 'type': 'rss', 'htmlUrl': 'http://www.uh.edu/engines/engines.htm'}outline {'xmlUrl': 'http://www.nyas.org/Podcasts/Atom.axd', 'text': 'Science & the City', 'type': 'rss', 'htmlUrl': 'http://www.nyas.org/WhatWeDo/SciencetheCity.aspx'}outline {'text': 'Books and Fiction'}outline {'xmlUrl': 'http://feeds.feedburner.com/podiobooks', 'text': 'Podiobooker', 'type': 'rss', 'htmlUrl': 'http://www.podiobooks.com/blog'}outline {'xmlUrl': 'http://web.me.com/normsherman/Site/Podcast/rss.xml', 'text': 'The Drabblecast', 'type': 'rss', 'htmlUrl': 'http://web.me.com/normsherman/Site/Podcast/Podcast.html'}outline {'xmlUrl': 'http://www.tor.com/rss/category/TorDotStories', 'text': 'tor.com / category / tordotstories', 'type': 'rss', 'htmlUrl': 'http://www.tor.com/'}outline {'text': 'Computers and Programming'}outline {'xmlUrl': 'http://leo.am/podcasts/mbw', 'text': 'MacBreak Weekly', 'type': 'rss', 'htmlUrl': 'http://twit.tv/mbw'}outline {'xmlUrl': 'http://leo.am/podcasts/floss', 'text': 'FLOSS Weekly', 'type': 'rss', 'htmlUrl': 'http://twit.tv'}outline {'xmlUrl': 'http://www.coreint.org/podcast.xml', 'text': 'Core Intuition', 'type': 'rss', 'htmlUrl': 'http://www.coreint.org/'}outline {'text': 'Python'}outline {'xmlUrl': 'http://advocacy.python.org/podcasts/pycon.rss', 'text': 'PyCon Podcast', 'type': 'rss', 'htmlUrl': 'http://advocacy.python.org/podcasts/'}outline {'xmlUrl': 'http://advocacy.python.org/podcasts/littlebit.rss', 'text': 'A Little Bit of Python', 'type': 'rss', 'htmlUrl': 'http://advocacy.python.org/podcasts/'}outline {'xmlUrl': 'http://djangodose.com/everything/feed/', 'text': 'Django Dose Everything Feed', 'type': 'rss'}outline {'text': 'Miscelaneous'}outline {'xmlUrl': 'http://www.castsampler.com/cast/feed/rss/dhellmann/', 'text': "dhellmann's CastSampler Feed", 'type': 'rss', 'htmlUrl': 'http://www.castsampler.com/users/dhellmann/'}If we wanted to print only the groups of names and feed URLs for thepodcasts, leaving out of all of the data in the header section, wecould iterate over only just the outline nodes and print thetext and xmlUrl attributes.
from xml.etree import ElementTreewith open('podcasts.opml', 'rt') as f:    tree = ElementTree.parse(f)for node in tree.getiterator('outline'):    name = node.attrib.get('text')    url = node.attrib.get('xmlUrl')    if name and url:        print '  %s :: %s' % (name, url)    else:        print nameBecause we passed 'outline' to tree.getiterator() processing islimited to only nodes with the tag 'outline'.
$ python ElementTree_show_feed_urls.pyScience and Tech  APM: Future Tense :: http://www.publicradio.org/columns/futuretense/podcast.xml  Engines Of Our Ingenuity Podcast :: http://www.npr.org/rss/podcast.php?id=510030  Science & the City :: http://www.nyas.org/Podcasts/Atom.axdBooks and Fiction  Podiobooker :: http://feeds.feedburner.com/podiobooks  The Drabblecast :: http://web.me.com/normsherman/Site/Podcast/rss.xml  tor.com / category / tordotstories :: http://www.tor.com/rss/category/TorDotStoriesComputers and Programming  MacBreak Weekly :: http://leo.am/podcasts/mbw  FLOSS Weekly :: http://leo.am/podcasts/floss  Core Intuition :: http://www.coreint.org/podcast.xmlPython  PyCon Podcast :: http://advocacy.python.org/podcasts/pycon.rss  A Little Bit of Python :: http://advocacy.python.org/podcasts/littlebit.rss  Django Dose Everything Feed :: http://djangodose.com/everything/feed/Miscelaneous  dhellmann's CastSampler Feed :: http://www.castsampler.com/cast/feed/rss/dhellmann/Finding Nodes in a DocumentWalking the entire tree yourself like this searching for relevantnodes can be error prone.  In the example above, we had to look ateach outline node to determine if it was a group (nodes with only a“text” attribute) or podcast (with both “text” and “xmlUrl”).  If wewere writing a podcast downloader and needed to produce a simple listof the podcast feed URLs, without names or groups, we might simplifythe logic using findall() to look for nodes with more descriptivesearch characteristics.
A first pass at converting the above example might construct an XPathargument to look for all outline nodes.
from xml.etree import ElementTreewith open('podcasts.opml', 'rt') as f:    tree = ElementTree.parse(f)for node in tree.findall('.//outline'):    url = node.attrib.get('xmlUrl')    if url:        print urlThe logic in this version is not substantially different than theversion using getiterator().  We still have to check for thepresence of the URL, except that we don’t print the group name whenthe URL is not found.
$ python ElementTree_find_feeds_by_tag.pyhttp://www.publicradio.org/columns/futuretense/podcast.xmlhttp://www.npr.org/rss/podcast.php?id=510030http://www.nyas.org/Podcasts/Atom.axdhttp://feeds.feedburner.com/podiobookshttp://web.me.com/normsherman/Site/Podcast/rss.xmlhttp://www.tor.com/rss/category/TorDotStorieshttp://leo.am/podcasts/mbwhttp://leo.am/podcasts/flosshttp://www.coreint.org/podcast.xmlhttp://advocacy.python.org/podcasts/pycon.rsshttp://advocacy.python.org/podcasts/littlebit.rsshttp://djangodose.com/everything/feed/http://www.castsampler.com/cast/feed/rss/dhellmann/Another version can take advantage of the fact that we know theoutline nodes are only nested two levels deep.  If we change thesearch path to .//outline/outline we will process only the secondlevel of outline nodes.
from xml.etree import ElementTreewith open('podcasts.opml', 'rt') as f:    tree = ElementTree.parse(f)for node in tree.findall('.//outline/outline'):    url = node.attrib.get('xmlUrl')    print urlWe expect all of those outline nodes nested 2 levels deep in the inputwill have the xmlURL attribute refering to the podcast feed, so if wewere brave we could skip checking for for the attribute before usingit.
$ python ElementTree_find_feeds_by_structure.pyhttp://www.publicradio.org/columns/futuretense/podcast.xmlhttp://www.npr.org/rss/podcast.php?id=510030http://www.nyas.org/Podcasts/Atom.axdhttp://feeds.feedburner.com/podiobookshttp://web.me.com/normsherman/Site/Podcast/rss.xmlhttp://www.tor.com/rss/category/TorDotStorieshttp://leo.am/podcasts/mbwhttp://leo.am/podcasts/flosshttp://www.coreint.org/podcast.xmlhttp://advocacy.python.org/podcasts/pycon.rsshttp://advocacy.python.org/podcasts/littlebit.rsshttp://djangodose.com/everything/feed/http://www.castsampler.com/cast/feed/rss/dhellmann/This version is limited to our existing structure, though, so if theoutline nodes are ever rearranged into a deeper tree it will stopworking.
Parsed Node AttributesThe items returned by findall() and getiterator() are Elementobjects, each representing a node in the XML parse tree.  Each Elementhas attributes for accessing data pulled out of the XML.  This can beillustrated with a somewhat more contrived example input file,data.xml:
1234567<?xml version="1.0" encoding="UTF-8"?><top>  <child>This child contains text.</child>  <child_with_tail>This child has regular text.</child_with_tail>And "tail" text.  <with_attributes name="value" foo="bar" />  <entity_expansion attribute="This &#38; That">That &#38; This</entity_expansion></top>The “attributes” of a node are available in the attrib property,which acts like a dictionary.
from xml.etree import ElementTreewith open('data.xml', 'rt') as f:    tree = ElementTree.parse(f)node = tree.find('./with_attributes')print node.tagfor name, value in sorted(node.attrib.items()):    print '  %-4s = "%s"' % (name, value)    The node on line 5 of the input file has 2 attributes, name and foo.
$ python ElementTree_node_attributes.pywith_attributes  foo  = "bar"  name = "value"The text content of the nodes is available, along with the “tail” textthat comes after the end of a close tag.
from xml.etree import ElementTreewith open('data.xml', 'rt') as f:    tree = ElementTree.parse(f)for path in [ './child', './child_with_tail' ]:    node = tree.find(path)    print node.tag    print '  child node text:', node.text    print '  and tail text  :', node.tailThe child node on line 3 contains embedded text, and the node online 4 has text with a tail (including any whitespace).
$ python ElementTree_node_text.pychild  child node text: This child contains text.  and tail text  :child_with_tail  child node text: This child has regular text.  and tail text  : And "tail" text.Conveniently, XML entity references embedded in the document areconverted to the appropriate characters before values are returned.
from xml.etree import ElementTreewith open('data.xml', 'rt') as f:    tree = ElementTree.parse(f)node = tree.find('entity_expansion')print node.tagprint '  in attribute:', node.attrib['attribute']print '  in text     :', node.textThe conversion saves you from having to worry about an implementationdetail of representing certain characters in an XML document.
$ python ElementTree_entity_references.pyentity_expansion  in attribute: This & That  in text     : That & ThisWatching Events While ParsingThe other API useful for processing XML documents is event-based.  Theparser generates start events for opening tags and end eventsfor closing tags.  Iterating over the event stream lets you extractdata from the document while parsing it, which is convenient if youdon’t need to manipulate the entire document afterwards and if youwant to avoid holding the entire parsed document in memory.
iterparse() returns an iterable that produces tuples containingthe name of the event and the node triggering the event.  Events canbe one of:
startA new tag has been encountered.  The closing angle bracket of thetag was processed, but not the contents.endThe closing angle bracket of a closing tag has been processed.  Allof the children were already processed.start-nsStart a namespace declaration.end-nsEnd a namespace declaration.from xml.etree.ElementTree import iterparsedepth = 0prefix_width = 8prefix_dots = '.' * prefix_widthline_template = '{prefix:<0.{prefix_len}}{event:<8}{suffix:<{suffix_len}} {node.tag:<12} {node_id}'for (event, node) in iterparse('podcasts.opml', ['start', 'end', 'start-ns', 'end-ns']):    if event == 'end':        depth -= 1    prefix_len = depth * 2        print line_template.format(prefix=prefix_dots,                               prefix_len=prefix_len,                               suffix='',                               suffix_len=(prefix_width - prefix_len),                               node=node,                               node_id=id(node),                               event=event,                               )        if event == 'start':        depth += 1By default, only end events are generated.  To see other events,pass the list of event names you want to receive to iterparse(),as in this example:
$ python ElementTree_show_all_events.pystart            opml         876256..start          head         876336....start        title        888920....end          title        888920....start        dateCreated  889280....end          dateCreated  889280....start        dateModified 889320....end          dateModified 889320..end            head         876336..start          body         889400....start        outline      889560......start      outline      889600......end        outline      889600......start      outline      889480......end        outline      889480......start      outline      889680......end        outline      889680....end          outline      889560....start        outline      889720......start      outline      889760......end        outline      889760......start      outline      889840......end        outline      889840......start      outline      889920......end        outline      889920....end          outline      889720....start        outline      889880......start      outline      890040......end        outline      890040......start      outline      890120......end        outline      890120......start      outline      890200......end        outline      890200....end          outline      889880....start        outline      890240......start      outline      890360......end        outline      890360......start      outline      890440......end        outline      890440......start      outline      890520......end        outline      890520....end          outline      890240....start        outline      890640......start      outline      890720......end        outline      890720....end          outline      890640..end            body         889400end              opml         876256The event-style of processing may be more natural for some operations,such as converting XML input to some other format.  For example,suppose we want to convert the list of podcasts we have been workingwith from an XML file to a data file we can load into a spreadsheet ordatabase application.  We don’t need to hold the entire data set inmemory at a time, since we’re simply changing the format.
import csvfrom xml.etree.ElementTree import iterparseimport syswriter = csv.writer(sys.stdout, quoting=csv.QUOTE_NONNUMERIC)group_name = ''for (event, node) in iterparse('podcasts.opml', events=['start']):    if node.tag != 'outline':        # Ignore anything not part of the outline        continue    if not node.attrib.get('xmlUrl'):        # Remember the current group        group_name = node.attrib['text']    else:        # Output a podcast entry        writer.writerow( (group_name, node.attrib['text'],                          node.attrib['xmlUrl'],                          node.attrib.get('htmlUrl', ''),                          )                         )This example program converts our podcast list to a CSV file, ready tobe imported into another application.
$ python ElementTree_write_podcast_csv.py"Science and Tech","APM: Future Tense","http://www.publicradio.org/columns/futuretense/podcast.xml","http://www.publicradio.org/columns/futuretense/""Science and Tech","Engines Of Our Ingenuity Podcast","http://www.npr.org/rss/podcast.php?id=510030","http://www.uh.edu/engines/engines.htm""Science and Tech","Science & the City","http://www.nyas.org/Podcasts/Atom.axd","http://www.nyas.org/WhatWeDo/SciencetheCity.aspx""Books and Fiction","Podiobooker","http://feeds.feedburner.com/podiobooks","http://www.podiobooks.com/blog""Books and Fiction","The Drabblecast","http://web.me.com/normsherman/Site/Podcast/rss.xml","http://web.me.com/normsherman/Site/Podcast/Podcast.html""Books and Fiction","tor.com / category / tordotstories","http://www.tor.com/rss/category/TorDotStories","http://www.tor.com/""Computers and Programming","MacBreak Weekly","http://leo.am/podcasts/mbw","http://twit.tv/mbw""Computers and Programming","FLOSS Weekly","http://leo.am/podcasts/floss","http://twit.tv""Computers and Programming","Core Intuition","http://www.coreint.org/podcast.xml","http://www.coreint.org/""Python","PyCon Podcast","http://advocacy.python.org/podcasts/pycon.rss","http://advocacy.python.org/podcasts/""Python","A Little Bit of Python","http://advocacy.python.org/podcasts/littlebit.rss","http://advocacy.python.org/podcasts/""Python","Django Dose Everything Feed","http://djangodose.com/everything/feed/","""Miscelaneous","dhellmann's CastSampler Feed","http://www.castsampler.com/cast/feed/rss/dhellmann/","http://www.castsampler.com/users/dhellmann/"Creating Your Own Tree BuilderA potentially more efficient means of handling parse events is toreplace the standard tree builder behavior with your own.  TheElementTree parser uses an XMLTreeBuilder to process the XML and callmethods on a target class to save the results.  The usual output is anElementTree instance created by the default TreeBuilder class.  Byreplacing TreeBuilder with your own class, you can receive the eventsbefore the Element nodes are instantiated, saving that portion of theoverhead.
The XML-to-CSV app from the previous section can be translated to atree builder.
import csvfrom xml.etree.ElementTree import XMLTreeBuilderimport sysclass PodcastListToCSV(object):    def __init__(self, outputFile):        self.writer = csv.writer(outputFile, quoting=csv.QUOTE_NONNUMERIC)        self.group_name = ''        return    def start(self, tag, attrib):        if tag != 'outline':            # Ignore anything not part of the outline            return        if not attrib.get('xmlUrl'):            # Remember the current group            self.group_name = attrib['text']        else:            # Output a podcast entry            self.writer.writerow( (self.group_name, attrib['text'],                                   attrib['xmlUrl'],                                   attrib.get('htmlUrl', ''),                                   )                                  )    def end(self, tag):        # Ignore closing tags        pass    def data(self, data):        # Ignore data inside nodes        pass    def close(self):        # Nothing special to do here        returntarget = PodcastListToCSV(sys.stdout)parser = XMLTreeBuilder(target=target)with open('podcasts.opml', 'rt') as f:    for line in f:        parser.feed(line)parser.close()PodcastListToCSV implements the TreeBuilder protocol.  Each time anew XML tag is encountered, start() is called with the tag nameand attributes.  When a closing tag is seen end() is called withthe name.  In between, data() is called when a node has content(the tree builder is expected to keep up with the “current” node).When all of the input is processed, close() is called.  It canreturn a value, which will be returned to the user of theXMLTreeBuilder.
$ python ElementTree_podcast_csv_treebuilder.py"Science and Tech","APM: Future Tense","http://www.publicradio.org/columns/futuretense/podcast.xml","http://www.publicradio.org/columns/futuretense/""Science and Tech","Engines Of Our Ingenuity Podcast","http://www.npr.org/rss/podcast.php?id=510030","http://www.uh.edu/engines/engines.htm""Science and Tech","Science & the City","http://www.nyas.org/Podcasts/Atom.axd","http://www.nyas.org/WhatWeDo/SciencetheCity.aspx""Books and Fiction","Podiobooker","http://feeds.feedburner.com/podiobooks","http://www.podiobooks.com/blog""Books and Fiction","The Drabblecast","http://web.me.com/normsherman/Site/Podcast/rss.xml","http://web.me.com/normsherman/Site/Podcast/Podcast.html""Books and Fiction","tor.com / category / tordotstories","http://www.tor.com/rss/category/TorDotStories","http://www.tor.com/""Computers and Programming","MacBreak Weekly","http://leo.am/podcasts/mbw","http://twit.tv/mbw""Computers and Programming","FLOSS Weekly","http://leo.am/podcasts/floss","http://twit.tv""Computers and Programming","Core Intuition","http://www.coreint.org/podcast.xml","http://www.coreint.org/""Python","PyCon Podcast","http://advocacy.python.org/podcasts/pycon.rss","http://advocacy.python.org/podcasts/""Python","A Little Bit of Python","http://advocacy.python.org/podcasts/littlebit.rss","http://advocacy.python.org/podcasts/""Python","Django Dose Everything Feed","http://djangodose.com/everything/feed/","""Miscelaneous","dhellmann's CastSampler Feed","http://www.castsampler.com/cast/feed/rss/dhellmann/","http://www.castsampler.com/users/dhellmann/"Parsing StringsTo work with smaller bits of XML text, especially string literals asmight be embedded in the source of a program, usexml.etree.ElementTree.XML and pass a single argument, the stringcontaining the XML to be parsed.
from xml.etree.ElementTree import XMLparsed = XML('''<root>  <group>    <child id="a">This is child "a".</child>    <child id="b">This is child "b".</child>  </group>  <group>    <child id="c">This is child "c".</child>  </group></root>''')print 'parsed =', parsedfor elem in parsed.getiterator():    print elem.tag    if elem.text is not None and elem.text.strip():        print '  text: "%s"' % elem.text    if elem.tail is not None and elem.tail.strip():        print '  tail: "%s"' % elem.tail    for name, value in sorted(elem.attrib.items()):        print '  %-4s = "%s"' % (name, value)    printNotice that unlike with parse(), the return value is an Elementinstance instead of an ElementTree.
$ python ElementTree_XML.pyparsed = <Element root at d4e40>rootgroupchild  text: "This is child "a"."  id   = "a"child  text: "This is child "b"."  id   = "b"groupchild  text: "This is child "c"."  id   = "c"For structured XML that uses the “id” attribute to identify uniquenodes of interest, XMLID() is a convenient way to access the parseresults.
from xml.etree.ElementTree import XMLIDtree, id_map = XMLID('''<root>  <group>    <child id="a">This is child "a".</child>    <child id="b">This is child "b".</child>  </group>  <group>    <child id="c">This is child "c".</child>  </group></root>''')for key, value in sorted(id_map.items()):    print '%s = %s' % (key, value)    XMLID() returns the parsed tree as an Element object,along with a dictionary mapping the id attribute strings to theindividual nodes in the tree.
$ python ElementTree_XMLID.pya = <Element child at d3eb8>b = <Element child at d3d78>c = <Element child at d9030>See also
Outline Processor Markup Language, OPMLDave Winer’s OPML specification and documentation.XPath Support in ElementTreePart of Fredrick Lundh’s original documentation for ElementTree.csvRead and write comma-separated-value filesPyMOTW Home
The canonical version of this article

  
]]></description>
<dc:subject>python PyMOTW</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:e457a23f8452/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:PyMOTW"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://simonwillison.net/2010/Mar/11/cachemachine/">
    <title>Cache Machine: Automatic caching for your Django models</title>
    <dc:date>2010-03-11T19:35:32+00:00</dc:date>
    <link>http://simonwillison.net/2010/Mar/11/cachemachine/</link>
    <dc:creator>rahuldave</dc:creator><description><![CDATA[Cache Machine: Automatic caching for your Django models. This is the third new ORM caching layer for Django I’ve seen in the past month! Cache Machine was developed for zamboni, the port of addons.mozilla.org to Django. Caching is enabled using a model mixin class (to hook up some post_delete hooks) and a custom caching manager. Invalidation works by maintaining a “flush list” of dependent cache entries for each object—this is currently stored in memcached and hence has potential race conditions, but a comment in the source code suggests that this could be solved by moving to redis.

]]></description>
<dc:subject>cachemachine caching django memcached mozilla orm ormcaching python redis</dc:subject>
<dc:identifier>https://pinboard.in/u:rahuldave/b:f6a05c5e2b20/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:cachemachine"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:caching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:django"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:memcached"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:mozilla"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:orm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:ormcaching"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rahuldave/t:redis"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>