<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (rybesh)</title>
    <link>https://pinboard.in/u:rybesh/public/</link>
    <description>recent bookmarks from rybesh</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://github.com/Living-with-machines/DiachronicEmb-BigHistData"/>
	<rdf:li rdf:resource="https://langchain.readthedocs.io/en/latest/"/>
	<rdf:li rdf:resource="https://kazemnejad.com/blog/transformer_architecture_positional_encoding/"/>
	<rdf:li rdf:resource="http://jalammar.github.io/illustrated-transformer/"/>
	<rdf:li rdf:resource="https://towardsdatascience.com/illustrated-guide-to-transformers-step-by-step-explanation-f74876522bc0"/>
	<rdf:li rdf:resource="https://textual-inversion.github.io/"/>
	<rdf:li rdf:resource="https://github.com/lueck/standoff-mode"/>
	<rdf:li rdf:resource="https://markusstrasser.org/p/bcd8bded-7136-4bb4-8f97-e8a3a7b6d926/"/>
	<rdf:li rdf:resource="https://github.com/ryanjgallagher/shifterator"/>
	<rdf:li rdf:resource="https://github.com/dbamman/book-nlp"/>
	<rdf:li rdf:resource="https://github.com/priyaradhakrishnan0/ELDEN"/>
	<rdf:li rdf:resource="https://prodi.gy/"/>
	<rdf:li rdf:resource="https://github.com/allenai/allennlp"/>
	<rdf:li rdf:resource="https://github.com/google/sling"/>
	<rdf:li rdf:resource="https://github.com/JasonKessler/scattertext/"/>
	<rdf:li rdf:resource="http://textract.readthedocs.io/en/stable/"/>
	<rdf:li rdf:resource="https://meta.wikimedia.org/wiki/Grants:IEG/StrepHit:_Wikidata_Statements_Validation_via_References"/>
	<rdf:li rdf:resource="http://www.umiacs.umd.edu/~hal/sayit.py"/>
	<rdf:li rdf:resource="http://honnibal.github.io/spaCy/"/>
	<rdf:li rdf:resource="http://cs224d.stanford.edu/"/>
	<rdf:li rdf:resource="http://whoo.ps/2015/02/23/futures-of-text"/>
	<rdf:li rdf:resource="https://github.com/StanfordHCI/termite/blob/master/pipeline/compute_similarity.py"/>
	<rdf:li rdf:resource="http://www2007.org/papers/paper342.pdf"/>
	<rdf:li rdf:resource="http://brenocon.com/te/"/>
	<rdf:li rdf:resource="http://conll.cemantix.org/2012/data.html"/>
	<rdf:li rdf:resource="https://github.com/dlwh/puck"/>
	<rdf:li rdf:resource="http://nlp.stanford.edu/software/dcoref.shtml"/>
	<rdf:li rdf:resource="http://nlp.cs.berkeley.edu/berkeleycoref.shtml"/>
	<rdf:li rdf:resource="https://code.google.com/p/berkeley-coreference-analyser/"/>
	<rdf:li rdf:resource="http://www.ims.uni-stuttgart.de/forschung/ressourcen/werkzeuge/HOTCoref.en.html"/>
	<rdf:li rdf:resource="http://www.cs.utah.edu/nlp/reconcile/"/>
	<rdf:li rdf:resource="http://cogcomp.cs.illinois.edu/page/software_view/18"/>
	<rdf:li rdf:resource="http://www.hlt.utdallas.edu/~altaf/cherrypicker.html"/>
	<rdf:li rdf:resource="https://sandbox.htrc.illinois.edu/HTRC-UI-Portal2/Features"/>
	<rdf:li rdf:resource="http://sourceforge.net/projects/topictiling/"/>
	<rdf:li rdf:resource="http://www.site.uottawa.ca/~ankazant/Annas_page/Downloads.html"/>
	<rdf:li rdf:resource="https://code.google.com/p/uima-text-segmenter/"/>
	<rdf:li rdf:resource="https://github.com/jacobeisenstein/bayes-seg"/>
	<rdf:li rdf:resource="https://code.google.com/p/whatswrong/"/>
	<rdf:li rdf:resource="http://ir.inf.ed.ac.uk/wiki/doku.php?id=yari:mtx"/>
	<rdf:li rdf:resource="http://nlp.stanford.edu/~manning/papers/CICLing2011-manning-tagging.pdf"/>
	<rdf:li rdf:resource="http://persistence.uni-leipzig.org/nlp2rdf/"/>
	<rdf:li rdf:resource="https://code.google.com/p/word2vec/"/>
	<rdf:li rdf:resource="http://ftp.cs.toronto.edu/pub/gh/Hirst-TSD-2008.pdf"/>
	<rdf:li rdf:resource="http://www.isi.edu/natural-language/amr/a.pdf"/>
	<rdf:li rdf:resource="http://www.cs.columbia.edu/~scohen/naacl13tutorial/"/>
	<rdf:li rdf:resource="http://www1.cs.columbia.edu/nlp/tools.cgi#LCseg"/>
	<rdf:li rdf:resource="http://ceur-ws.org/Vol-779/derive2011_submission_1.pdf"/>
	<rdf:li rdf:resource="http://nlp.stanford.edu/courses/lsa354/"/>
	<rdf:li rdf:resource="http://braque.cc/ShowItem?handle=2RS2ML03"/>
	<rdf:li rdf:resource="http://www.atala.org/IMG/pdf/0-Introduction-TAL53-2.pdf"/>
	<rdf:li rdf:resource="http://research.microsoft.com/en-us/um/people/hoifung/papers/pfi13.pdf"/>
	<rdf:li rdf:resource="http://www.cs.cmu.edu/~dbamman/pubs/pdf/bamman+oconnor+smith.acl13.pdf"/>
	<rdf:li rdf:resource="http://www.cis.temple.edu/~yates/papers/2013-comp-ling-rep-learning-preprint.pdf"/>
	<rdf:li rdf:resource="http://www.cis.temple.edu/~yates/papers/open-sem-parsing.pdf"/>
	<rdf:li rdf:resource="https://code.google.com/p/relation-extraction-corpus/downloads/list"/>
	<rdf:li rdf:resource="http://people.csail.mit.edu/yklee/papers/temp06.pdf"/>
	<rdf:li rdf:resource="http://nlp.stanford.edu/pubs/discourse-referent-lifespans.pdf"/>
	<rdf:li rdf:resource="http://library.ahima.org/xpedio/groups/public/documents/ahima/bok1_040449.html"/>
	<rdf:li rdf:resource="http://arxiv.org/pdf/1301.7738v1.pdf"/>
	<rdf:li rdf:resource="http://pypln.org/"/>
	<rdf:li rdf:resource="http://snowball.tartarus.org/"/>
	<rdf:li rdf:resource="http://www.socher.org/index.php/DeepLearningTutorial/DeepLearningTutorial"/>
	<rdf:li rdf:resource="http://brat.nlplab.org/"/>
	<rdf:li rdf:resource="http://summly.com/"/>
	<rdf:li rdf:resource="http://www.let.vu.nl/en/events/news/2012/vu-to-develop-history-recorder.asp"/>
	<rdf:li rdf:resource="http://www.umiacs.umd.edu/~resnik/pubs/gibbs.pdf"/>
	<rdf:li rdf:resource="http://code.google.com/p/dkpro-core-asl/"/>
	<rdf:li rdf:resource="http://aclweb.org/anthology-new/J/J11/J11-4004.pdf"/>
	<rdf:li rdf:resource="http://aclweb.org/anthology-new/P/P12/P12-1091.pdf"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://github.com/Living-with-machines/DiachronicEmb-BigHistData">
    <title>Living-with-machines/DiachronicEmb-BigHistData: Tools to train and explore diachronic word embeddings from Big Historical Data</title>
    <dc:date>2023-03-20T16:21:27+00:00</dc:date>
    <link>https://github.com/Living-with-machines/DiachronicEmb-BigHistData</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The scripts presented in this repository were created to train and explore diachronic word embeddings (Word2Vec) from very large historical data for which metadata on the year of publication of each text file is available. While the mapping between texts and year of publication is essential (to get diachronic embeddings), the methods presented can in principle be applied to any other diachronic collection.

]]></description>
<dc:subject>history nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:6ad68d10802c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:history"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://langchain.readthedocs.io/en/latest/">
    <title>Welcome to LangChain — 🦜🔗 LangChain 0.0.116</title>
    <dc:date>2023-03-20T00:57:54+00:00</dc:date>
    <link>https://langchain.readthedocs.io/en/latest/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Large language models (LLMs) are emerging as a transformative technology, enabling developers to build applications that they previously could not. But using these LLMs in isolation is often not enough to create a truly powerful app - the real power comes when you are able to combine them with other sources of computation or knowledge.

This library is aimed at assisting in the development of those types of applications.]]></description>
<dc:subject>llm api nlp query tools ai</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:53165156a945/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:llm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:api"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:query"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:ai"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://kazemnejad.com/blog/transformer_architecture_positional_encoding/">
    <title>Transformer Architecture: The Positional Encoding - Amirhossein Kazemnejad's Blog</title>
    <dc:date>2022-11-23T22:29:16+00:00</dc:date>
    <link>https://kazemnejad.com/blog/transformer_architecture_positional_encoding/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Transformer architecture was introduced as a novel pure attention-only sequence-to-sequence architecture by Vaswani et al. Its ability for parallelizable training and its general performance improvement made it a popular option among NLP (and recently CV) researchers.

Thanks to the several implementations in common deep learning frameworks, it became an easy option to experiment with for many students (including myself). Even though making it more accessible is a great thing, but on the downside it may cause the details of the model to be ignored.

In this article, I don’t plan to explain its architecture in depth as there are currently several great tutorials on this topic (here, here, and here), but alternatively, I want to discuss one specific part of the transformer’s architecture - the positional encoding.]]></description>
<dc:subject>attention transformer nlp machinelearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:680713f3fa5b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:attention"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:transformer"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jalammar.github.io/illustrated-transformer/">
    <title>The Illustrated Transformer – Jay Alammar – Visualizing machine learning one concept at a time.</title>
    <dc:date>2022-11-23T22:24:44+00:00</dc:date>
    <link>http://jalammar.github.io/illustrated-transformer/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[In the previous post, we looked at Attention – a ubiquitous method in modern deep learning models. Attention is a concept that helped improve the performance of neural machine translation applications. In this post, we will look at The Transformer – a model that uses attention to boost the speed with which these models can be trained. The Transformer outperforms the Google Neural Machine Translation model in specific tasks. The biggest benefit, however, comes from how The Transformer lends itself to parallelization. It is in fact Google Cloud’s recommendation to use The Transformer as a reference model to use their Cloud TPU offering. So let’s try to break the model apart and look at how it functions.

]]></description>
<dc:subject>nlp machinelearning transformer</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:71c2dba415ba/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:transformer"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://towardsdatascience.com/illustrated-guide-to-transformers-step-by-step-explanation-f74876522bc0">
    <title>Illustrated Guide to Transformers- Step by Step Explanation | by Michael Phi | Towards Data Science</title>
    <dc:date>2022-11-23T22:24:30+00:00</dc:date>
    <link>https://towardsdatascience.com/illustrated-guide-to-transformers-step-by-step-explanation-f74876522bc0</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Transformers are taking the natural language processing world by storm. These incredible models are breaking multiple NLP records and pushing the state of the art. They are used in many applications like machine language translation, conversational chatbots, and even to power better search engines. Transformers are the rage in deep learning nowadays, but how do they work? Why have they outperform the previous king of sequence problems, like recurrent neural networks, GRU’s, and LSTM’s? You’ve probably heard of different famous transformers models like BERT, GPT, and GPT2. In this post, we’ll focus on the one paper that started it all, “Attention is all you need”.
]]></description>
<dc:subject>nlp machinelearning transformer</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:810f7a205027/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:transformer"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://textual-inversion.github.io/">
    <title>An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion</title>
    <dc:date>2022-11-01T21:39:18+00:00</dc:date>
    <link>https://textual-inversion.github.io/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Text-to-image models offer unprecedented freedom to guide creation through natural language. Yet, it is unclear how such freedom can be exercised to generate images of specific unique concepts, modify their appearance, or compose them in new roles and novel scenes. In other words, we ask: how can we use language-guided models to turn our cat into a painting, or imagine a new product based on our favorite toy? Here we present a simple approach that allows such creative freedom.

Using only 3-5 images of a user-provided concept, like an object or a style, we learn to represent it through new "words" in the embedding space of a frozen text-to-image model. These "words" can be composed into natural language sentences, guiding personalized creation in an intuitive way. Notably, we find evidence that a single word embedding is sufficient for capturing unique and varied concepts.]]></description>
<dc:subject>ai images nlp concepts</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:8eb67d04ce25/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:ai"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:images"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:concepts"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/lueck/standoff-mode">
    <title>lueck/standoff-mode: a major mode for GNU Emacs for annotations in a stand-off manner</title>
    <dc:date>2022-02-02T02:30:16+00:00</dc:date>
    <link>https://github.com/lueck/standoff-mode</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[standoff-mode turns GNU Emacs into a tagger and lets you create (semantic) annotations on texts in a stand-off manner. It is written for use in the field of digital humanities and the manual annotation of training data for e.g. named-entity recognition.]]></description>
<dc:subject>annotation emacs nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:2c4d93b3f2ea/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:annotation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:emacs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://markusstrasser.org/p/bcd8bded-7136-4bb4-8f97-e8a3a7b6d926/">
    <title>The Business of Extracting Knowledge from Academic Publications</title>
    <dc:date>2021-12-05T13:42:39+00:00</dc:date>
    <link>https://markusstrasser.org/p/bcd8bded-7136-4bb4-8f97-e8a3a7b6d926/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[TL;DR: I worked on biomedical literature search, discovery and recommender web applications for many months and concluded that extracting, structuring or synthesizing "insights" from academic publications (papers) or building knowledge bases from a domain corpus of literature has negligible value in industry.]]></description>
<dc:subject>search recommendation selection nlp scholarlycommunication</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:6b242f135bf3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:recommendation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:scholarlycommunication"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/ryanjgallagher/shifterator">
    <title>ryanjgallagher/shifterator: Interpretable data visualizations for understanding how texts differ at the word level</title>
    <dc:date>2020-08-22T22:29:37+00:00</dc:date>
    <link>https://github.com/ryanjgallagher/shifterator</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The Shifterator package provides functionality for constructing word shift graphs, vertical bart charts that quantify which words contribute to a pairwise difference between two texts and how they contribute. By allowing you to look at changes in how words are used, word shifts help you to conduct analyses of sentiment, entropy, and divergence that are fundamentally more interpretable.]]></description>
<dc:subject>nlp python text visualization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:7f52d198099a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:visualization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/dbamman/book-nlp">
    <title>dbamman/book-nlp: Natural language processing pipeline for book-length documents</title>
    <dc:date>2020-08-22T22:29:07+00:00</dc:date>
    <link>https://github.com/dbamman/book-nlp</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[BookNLP is a natural language processing pipeline that scales to books and other long documents (in English), including:

Part-of-speech tagging (Stanford)
Dependency parsing (MaltParser)
Named entity recognition (Stanford)
Character name clustering (e.g., "Tom", "Tom Sawyer", "Mr. Sawyer", "Thomas Sawyer" -> TOM_SAWYER)
Quotation speaker identification
Pronominal coreference resolution
Supersense tagging (e.g., "animal", "artifact", "body", "cognition", etc.)
This pipeline is described in the following paper; please cite if you write a research paper using this software:

David Bamman, Ted Underwood and Noah Smith, "A Bayesian Mixed Effects Model of Literary Character," ACL 2014.

]]></description>
<dc:subject>nlp java digitalhumanities</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:d003234e44f7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:digitalhumanities"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/priyaradhakrishnan0/ELDEN">
    <title>priyaradhakrishnan0/ELDEN</title>
    <dc:date>2019-04-01T12:02:36+00:00</dc:date>
    <link>https://github.com/priyaradhakrishnan0/ELDEN</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Entity Linking (EL) systems aim to automati- cally map mentions of an entity in text to the corresponding entity in a Knowledge Graph (KG). Degree of connectivity of an entity in the KG directly affects an EL system’s abil- ity to correctly link mentions in text to the entity in KG. This causes many EL systems to perform well for entities well connected to other entities in KG, bringing into focus the role of KG density in EL. In this paper, we propose Entity Linking using Densified Knowledge Graphs (ELDEN). ELDEN is an EL system which first densifies the KG with co-occurrence statistics from a large text cor- pus, and then uses the densified KG to train entity embeddings. Entity similarity measured using these trained entity embeddings result in improved EL. ELDEN outperforms state- of-the-art EL system on benchmark datasets. Due to such densification, ELDEN performs well for sparsely connected entities in the KG too. ELDEN’s approach is simple, yet effec- tive. We have made ELDEN’s code and data publicly available.]]></description>
<dc:subject>nlp entitydetection linkeddata extraction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:2472d80dd871/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:entitydetection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:linkeddata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:extraction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://prodi.gy/">
    <title>Prodigy · An annotation tool for radically efficient machine teaching</title>
    <dc:date>2018-10-22T15:01:53+00:00</dc:date>
    <link>https://prodi.gy/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Prodigy is an annotation tool so efficient that data scientists can do the annotation themselves, enabling a new level of rapid iteration. Whether you're working on entity recognition, intent detection or image classification, Prodigy can help you train and evaluate your models faster. Stream in your own examples or real-world data from live APIs, update your model in real-time and chain models together to build more complex systems.]]></description>
<dc:subject>nlp tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:57ad9c482b47/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/allenai/allennlp">
    <title>allenai/allennlp: An open-source NLP research library, built on PyTorch.</title>
    <dc:date>2018-02-21T18:57:14+00:00</dc:date>
    <link>https://github.com/allenai/allennlp</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[An Apache 2.0 NLP research library, built on PyTorch, for developing state-of-the-art deep learning models on a wide variety of linguistic tasks.]]></description>
<dc:subject>nlp python</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:43ea379357f9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/google/sling">
    <title>google/sling: SLING - A natural language frame semantics parser</title>
    <dc:date>2017-11-21T15:02:55+00:00</dc:date>
    <link>https://github.com/google/sling</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[SLING is a parser for annotating text with frame semantic annotations. It is trained on an annotated corpus using Tensorflow and Dragnn.]]></description>
<dc:subject>nlp frames deeplearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:90b919dd7c5e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:frames"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:deeplearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/JasonKessler/scattertext/">
    <title>JasonKessler/scattertext: Beautiful visualizations of how language differs among document types</title>
    <dc:date>2017-02-04T16:51:08+00:00</dc:date>
    <link>https://github.com/JasonKessler/scattertext/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[A tool for finding distinguishing terms in small-to-medium-sized corpora, and presenting them in a sexy, interactive scatter plot with non-overlapping term labels.]]></description>
<dc:subject>infoviz nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:847e68e5efa8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:infoviz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://textract.readthedocs.io/en/stable/">
    <title>textract — textract 1.5.0 documentation</title>
    <dc:date>2017-02-04T16:05:43+00:00</dc:date>
    <link>http://textract.readthedocs.io/en/stable/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[As undesireable as it might be, more often than not there is extremely useful information embedded in Word documents, PowerPoint presentations, PDFs, etc—so-called “dark data”—that would be valuable for further textual analysis and visualization. While several packages exist for extracting content from each of these formats on their own, this package provides a single interface for extracting content from any type of file, without any irrelevant markup.]]></description>
<dc:subject>text process tools nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:c161c2e79cfd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://meta.wikimedia.org/wiki/Grants:IEG/StrepHit:_Wikidata_Statements_Validation_via_References">
    <title>Grants:IEG/StrepHit: Wikidata Statements Validation via References - Meta</title>
    <dc:date>2016-07-02T18:46:55+00:00</dc:date>
    <link>https://meta.wikimedia.org/wiki/Grants:IEG/StrepHit:_Wikidata_Statements_Validation_via_References</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[StrepHit (pronounced "strep hit", means "Statement? repherence it!")[1] is a Natural Language Processing pipeline that harvests structured data from raw text and produces Wikidata statements with reference URLs. Its datasets will feed the primary sources tool.[2]
In this way, we believe StrepHit will dramatically improve the data quality of Wikidata through a reference suggestion mechanism for statement validation, and will help Wikidata become the gold-standard hub of the Open Data landscape.]]></description>
<dc:subject>opendata wikipedia nlp extraction reference bibliography</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:a199179873d2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:opendata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:wikipedia"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:reference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:bibliography"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.umiacs.umd.edu/~hal/sayit.py">
    <title>sayit.py</title>
    <dc:date>2015-09-12T01:14:11+00:00</dc:date>
    <link>http://www.umiacs.umd.edu/~hal/sayit.py</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[“Given a string of text in some language you might want to know how long it would take to speak it.”]]></description>
<dc:subject>nlp speech tools python</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:cd06d9a368a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:speech"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://honnibal.github.io/spaCy/">
    <title>spaCy: Industrial-strength NLP — spaCy 0.85 documentation</title>
    <dc:date>2015-06-20T23:53:42+00:00</dc:date>
    <link>http://honnibal.github.io/spaCy/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[spaCy’s parser is faster than most taggers, and its tokenizer is fast enough for any workload. And the tokenizer doesn’t just give you a list of strings. A spaCy token is a pointer to a Lexeme struct, from which you can access a wide range of pre-computed features, including embedded word representations.]]></description>
<dc:subject>python nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:7118b1578358/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://cs224d.stanford.edu/">
    <title>Stanford University CS224d: Deep Learning for Natural Language Processing</title>
    <dc:date>2015-03-16T13:18:45+00:00</dc:date>
    <link>http://cs224d.stanford.edu/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Natural language processing (NLP) is one of the most important technologies of the information age. Understanding complex language utterances is also a crucial part of artificial intelligence. Applications of NLP are everywhere because people communicate most everything in language: web search, advertisement, emails, customer service, language translation, radiology reports, etc. There are a large variety of underlying tasks and machine learning models powering NLP applications. Recently, deep learning approaches have obtained very high performance across many different NLP tasks. These models can often be trained with a single end-to-end model and do not require traditional, task-specific feature engineering. In this spring quarter course students will learn to implement, train, debug, visualize and invent their own neural network models. The course provides a deep excursion into cutting-edge research in deep learning applied to NLP. The final project will involve training a complex recurrent neural network and applying it to a large scale NLP problem. On the model side we will cover word vector representations, window-based neural networks, recurrent neural networks, long-short-term-memory models, recursive neural networks, convolutional neural networks as well as some very novel models involving a memory component. Through lectures and programming assignments students will learn the necessary engineering tricks for making neural networks work on practical problems.]]></description>
<dc:subject>deeplearning nlp class stanford</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:d6a74d359d44/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:deeplearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:class"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:stanford"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://whoo.ps/2015/02/23/futures-of-text">
    <title>Futures of text | Whoops by Jonathan Libov</title>
    <dc:date>2015-03-01T17:05:44+00:00</dc:date>
    <link>http://whoo.ps/2015/02/23/futures-of-text</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Text is an incredibly comfortable medium. Text-based interaction is fast, fun, funny, flexible, intimate, descriptive and even consistent in ways that voice and user interface often are not. Always bet on text:

Text is the most socially useful communication technology. It works well in 1:1, 1:N, and M:N modes. It can be indexed and searched efficiently, even by hand. It can be translated. It can be produced and consumed at variable speeds. It is asynchronous. It can be compared, diffed, clustered, corrected, summarized and filtered algorithmically. It permits multiparty editing. It permits branching conversations, lurking, annotation, quoting, reviewing, summarizing, structured responses, exegesis, even fan fic. The breadth, scale and depth of ways people use text is unmatched by anything.]]></description>
<dc:subject>text ui messaging nlp analysis design mobile AI</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:c46daf81681d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:ui"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:messaging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:mobile"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:AI"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/StanfordHCI/termite/blob/master/pipeline/compute_similarity.py">
    <title>termite/compute_similarity.py at master · StanfordHCI/termite</title>
    <dc:date>2015-02-14T17:46:20+00:00</dc:date>
    <link>https://github.com/StanfordHCI/termite/blob/master/pipeline/compute_similarity.py</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Compute term similarity based on co-occurrence and collocation likelihoods.]]></description>
<dc:subject>python nlp similarity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:3438bb9d7470/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:similarity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www2007.org/papers/paper342.pdf">
    <title>Scaling Up All Pairs Similarity Search</title>
    <dc:date>2014-09-16T15:34:38+00:00</dc:date>
    <link>http://www2007.org/papers/paper342.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Given a large collection of sparse vector data in a high dimensional space, we investigate the problem of finding all pairs of vectors whose similarity score (as determined by a function such as cosine distance) is above a given threshold. We propose a simple algorithm based on novel indexing and optimization strategies that solves this problem without relying on approximation methods or extensive parameter tuning. We show the approach efficiently handles a variety of datasets across a wide setting of similarity thresholds, with large speedups over previous state-of-the-art approaches.]]></description>
<dc:subject>nlp similarity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:274063aa744c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:similarity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://brenocon.com/te/">
    <title>MiTextExplorer: Mutual information text analysis</title>
    <dc:date>2014-06-27T22:23:39+00:00</dc:date>
    <link>http://brenocon.com/te/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The Mutual information Text Explorer is a tool that allows interactive exploration of text data and document covariates.]]></description>
<dc:subject>nlp text infoviz visualization tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:b698c23116eb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:text"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:infoviz"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:visualization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://conll.cemantix.org/2012/data.html">
    <title>CoNLL-2012 Shared Task: Data</title>
    <dc:date>2014-06-11T17:20:58+00:00</dc:date>
    <link>http://conll.cemantix.org/2012/data.html</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The contents of each of these files comprises of a set of columns. Each column either representing a linear annotation on a sentence, for example, a part of speech annotation which is one part of speech per word, and so one column per layer (in this case part of speech), or there are multiple columns — taken in sync with another column and representing the part that all other words in the sentence play with respect to that word. This is the classic case of predicate argument structure as introduced in the CoNLL-2005 shared task. In this case the number of columns that represent that layer of annotation is variable — one per each predicate. For convenience, we have kept the coreference layer information in the very last column and the predicate argument structure information in a variable number of columns preceeding that.]]></description>
<dc:subject>nlp data coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:1a117c13b19f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/dlwh/puck">
    <title>dlwh/puck</title>
    <dc:date>2014-06-09T22:07:51+00:00</dc:date>
    <link>https://github.com/dlwh/puck</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Puck is a high-speed, high-accuracy parser for natural languages. It's (currently) designed for use with grammars trained with the Berkeley Parser and on NVIDIA cards. On recent-ish NVIDIA cards (e.g. a GTX 680), around 400 sentences a second with a full Berkeley grammar for length <= 40 sentences.]]></description>
<dc:subject>nlp parsing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:60fb76c95d7a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:parsing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nlp.stanford.edu/software/dcoref.shtml">
    <title>Stanford Deterministic Coreference Resolution System</title>
    <dc:date>2014-06-09T22:06:53+00:00</dc:date>
    <link>http://nlp.stanford.edu/software/dcoref.shtml</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The papers to cite for this system are as follows:

Marta Recasens, Marie-Catherine de Marneffe, and Christopher Potts. The Life and Death of Discourse Entities: Identifying Singleton Mentions. In Proceedings of NAACL 2013. 

Heeyoung Lee, Angel Chang, Yves Peirsman, Nathanael Chambers, Mihai Surdeanu and Dan Jurafsky. Deterministic coreference resolution based on entity-centric, precision-ranked rules. 
Computational Linguistics 39(4), 2013.

Heeyoung Lee, Yves Peirsman, Angel Chang, Nathanael Chambers, Mihai Surdeanu, Dan Jurafsky. Stanford's Multi-Pass Sieve Coreference Resolution System at the CoNLL-2011 Shared Task. In Proceedings of the CoNLL-2011 Shared Task, 2011.

Karthik Raghunathan, Heeyoung Lee, Sudarshan Rangarajan, Nathanael Chambers, Mihai Surdeanu, Dan Jurafsky, Christopher Manning A Multi-Pass Sieve for Coreference Resolution 
EMNLP-2010, Boston, USA. 2010.]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:a08a61a051d6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nlp.cs.berkeley.edu/berkeleycoref.shtml">
    <title>Berkeley NLP Group - Berkeley Coreference Resolution System</title>
    <dc:date>2014-06-09T22:06:21+00:00</dc:date>
    <link>http://nlp.cs.berkeley.edu/berkeleycoref.shtml</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The Berkeley Coreference Resolution System is a state-of-the-art English coreference system described in the following papers:

Easy Victories and Uphill Battles in Coreference Resolution [PDF], [BibTeX]
Greg Durrett and Dan Klein. 
EMNLP 2013.

Decentralized Entity-Level Modeling for Coreference Resolution [PDF], [BibTeX]
Greg Durrett, David Hall, and Dan Klein. 
ACL 2013.]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:1aa51de903a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/p/berkeley-coreference-analyser/">
    <title>berkeley-coreference-analyser - A tool for classifying errors in coreference resolution - Google Project Hosting</title>
    <dc:date>2014-06-09T22:06:07+00:00</dc:date>
    <link>https://code.google.com/p/berkeley-coreference-analyser/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Error-Driven Analysis of Challenges in Coreference Resolution 
Jonathan K. Kummerfeld and Dan Klein 
EMNLP 2013]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:3da6d515fee3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.ims.uni-stuttgart.de/forschung/ressourcen/werkzeuge/HOTCoref.en.html">
    <title>HOTCoref | Institute for Natural Language Processing | University of Stuttgart</title>
    <dc:date>2014-06-09T22:05:53+00:00</dc:date>
    <link>http://www.ims.uni-stuttgart.de/forschung/ressourcen/werkzeuge/HOTCoref.en.html</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Anders Björkelund and Jonas Kuhn. Learning Structured Perceptrons for Coreference Resolution with Latent Antecedents and Non-local Features. To appear in ACL 2014.]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:4d8ca6b3c4da/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.utah.edu/nlp/reconcile/">
    <title>Reconcile - Coreference Resolution Engine</title>
    <dc:date>2014-06-09T22:05:35+00:00</dc:date>
    <link>http://www.cs.utah.edu/nlp/reconcile/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Stoyanov, V., Cardie, C., Gilbert, N., Riloff, E., Buttler, D. and Hysom, D. (2010) "Coreference Resolution with Reconcile", Proceedings of the Conference of the 48th Annual Meeting of the Association for Computational Linguistics (ACL 2010), Short Paper. [PDF]]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:d0e6105cd28c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://cogcomp.cs.illinois.edu/page/software_view/18">
    <title>CCG: Software - Illinois Coreference Package</title>
    <dc:date>2014-06-09T22:04:57+00:00</dc:date>
    <link>http://cogcomp.cs.illinois.edu/page/software_view/18</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[E. Bengtson and D. Roth, Understanding the Value of Features for Coreference Resolution. EMNLP  (2008)]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:9f7be3da8a64/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.hlt.utdallas.edu/~altaf/cherrypicker.html">
    <title>CherryPicker : A Coreference Resolution Tool</title>
    <dc:date>2014-06-09T22:04:34+00:00</dc:date>
    <link>http://www.hlt.utdallas.edu/~altaf/cherrypicker.html</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Altaf Rahman and Vincent Ng.
      Supervised Models for Coreference Resolution.
      Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing, pp. 968-977, 2009.]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:de95bd7097fe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://sandbox.htrc.illinois.edu/HTRC-UI-Portal2/Features">
    <title>HTRC Portal - About</title>
    <dc:date>2014-06-04T17:06:53+00:00</dc:date>
    <link>https://sandbox.htrc.illinois.edu/HTRC-UI-Portal2/Features</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[A great deal of fruitful research can be performed using non-consumptive pre-extracted features. For this reason, HTRC has put together a select set of page-level features extracted from the HathiTrust's non-Google-digitized public domain volumes. The source texts for this set of feature files are primarily in English.

Features are notable or informative characteristics of the text. We have processed a number of useful features, including part-of-speech tagged token counts, header and footer identification, and various line-level information. This is all provided per-page. Providing token information at the page level makes it possible to separate text from paratext. (An example of the latter may be: thirty pages of publishers’ ads at the back of a book). We have also decided to break each page into a collection of three parts: header, body, and footer.]]></description>
<dc:subject>nlp books data HathiTrust</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:faed2dfde5b3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:books"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:HathiTrust"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://sourceforge.net/projects/topictiling/">
    <title>TopicTiling | Free software downloads at SourceForge.net</title>
    <dc:date>2014-05-05T21:17:25+00:00</dc:date>
    <link>http://sourceforge.net/projects/topictiling/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[TopicTiling is a text segmentation algorithm with is based on TextTiling but uses LDA instead of the words itself.]]></description>
<dc:subject>segmentation nlp code</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:27a8de4076f8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:code"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.site.uottawa.ca/~ankazant/Annas_page/Downloads.html">
    <title>Software and Data</title>
    <dc:date>2014-05-05T21:16:08+00:00</dc:date>
    <link>http://www.site.uottawa.ca/~ankazant/Annas_page/Downloads.html</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Affinity Propagation for Segmentation (APS). The archive contains the java code and the data.]]></description>
<dc:subject>segmentation nlp code</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:47c36a24dcbe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:code"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/p/uima-text-segmenter/">
    <title>uima-text-segmenter - text segmentation at the discourse level essentially based on lexical cohesion measures - Google Project Hosting</title>
    <dc:date>2014-05-05T21:03:54+00:00</dc:date>
    <link>https://code.google.com/p/uima-text-segmenter/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[UIMA Text Segmenter is a UIMA wrapper for the java implementations of the segmentation algorithms C99 and TextTiling, written by Freddy Choi.]]></description>
<dc:subject>segmentation code java nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:9c0d4127cb2a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:code"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:java"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/jacobeisenstein/bayes-seg">
    <title>jacobeisenstein/bayes-seg</title>
    <dc:date>2014-05-05T20:59:01+00:00</dc:date>
    <link>https://github.com/jacobeisenstein/bayes-seg</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Java code from the 2008 EMNLP paper "Bayesian Unsupervised Topic Segmentation" by Eisenstein and Barzilay. Includes implementations of BayesSeg and MinCutSeg, and Utiyama and Isahara's TextSeg.]]></description>
<dc:subject>segmentation code nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:d4aa7a0d937b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:segmentation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:code"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/p/whatswrong/">
    <title>whatswrong - What's Wrong With My NLP? - Google Project Hosting</title>
    <dc:date>2014-05-01T13:06:05+00:00</dc:date>
    <link>https://code.google.com/p/whatswrong/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[What's Wrong With My NLP?: A visualizer for Natural Language Processing problems.]]></description>
<dc:subject>nlp debugging tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:0d8bc4d508d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:debugging"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ir.inf.ed.ac.uk/wiki/doku.php?id=yari:mtx">
    <title>yari:mtx [Information Retrieval Lab]</title>
    <dc:date>2014-04-08T19:49:31+00:00</dc:date>
    <link>http://ir.inf.ed.ac.uk/wiki/doku.php?id=yari:mtx</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[mtx is a command-line tool for rapidly trying new ideas in Information Retrieval and Machine Learning.]]></description>
<dc:subject>IR cli tools clustering nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:380a739a72bc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:IR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:cli"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nlp.stanford.edu/~manning/papers/CICLing2011-manning-tagging.pdf">
    <title>Part-of-Speech Tagging from 97% to 100%: Is It Time for Some Linguistics?</title>
    <dc:date>2014-03-29T11:27:30+00:00</dc:date>
    <link>http://nlp.stanford.edu/~manning/papers/CICLing2011-manning-tagging.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[I examine what would be necessary to move part-of-speech tagging performance from its current level of about 97.3% token accuracy (56% sentence accuracy) to close to 100% accuracy. I suggest that it must still be possible to greatly increase tagging performance and examine some useful improvements that have recently been made to the Stanford Part-of-Speech Tagger. However, an error analysis of some of the remaining errors suggests that there is limited further mileage to be had either from better machine learning or better features in a discriminative sequence classifier. The prospects for further gains from semi- supervised learning also seem quite limited. Rather, I suggest and begin to demonstrate that the largest opportunity for further progress comes from improving the taxonomic basis of the linguistic resources from which taggers are trained. That is, from improved descriptive linguistics. How- ever, I conclude by suggesting that there are also limits to this process. The status of some words may not be able to be adequately captured by assigning them to one of a small number of categories. While conventions can be used in such cases to improve tagging consistency, they lack a strong linguistic basis.]]></description>
<dc:subject>nlp linguistics modeling representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:4467b9981603/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:linguistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:modeling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://persistence.uni-leipzig.org/nlp2rdf/">
    <title>NLP Interchange Format (NIF) - Overview</title>
    <dc:date>2013-09-04T14:46:28+00:00</dc:date>
    <link>http://persistence.uni-leipzig.org/nlp2rdf/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[This document contains pointers to all the important resources relevant for the NLP Interchange Format (NIF). NIF is an RDF/OWL-based format that aims to achieve interoperability between Natural Language Processing (NLP) tools, language resources and annotations.]]></description>
<dc:subject>nlp annotation rdf standard</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:b269f99bbf67/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:annotation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:rdf"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:standard"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/p/word2vec/">
    <title>word2vec - Tool for computing continuous distributed representations of words. - Google Project Hosting</title>
    <dc:date>2013-08-16T04:10:02+00:00</dc:date>
    <link>https://code.google.com/p/word2vec/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The word2vec tool takes a text corpus as input and produces the word vectors as output. It first constructs a vocabulary from the training text data and then learns vector representation of words. The resulting word vector file can be used as features in many natural language processing and machine learning applications.

A simple way to investigate the learned representations is to find the closest words for a user-specified word. The distance tool serves that purpose. For example, if you enter 'france', distance will display the most similar words and their distances to 'france'.]]></description>
<dc:subject>nlp similarity textanalysis semantics inls520</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:766699a08a09/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:similarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:textanalysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:semantics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:inls520"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ftp.cs.toronto.edu/pub/gh/Hirst-TSD-2008.pdf">
    <title>The Future of Text-Meaning in Computational Linguistics</title>
    <dc:date>2013-08-09T23:58:37+00:00</dc:date>
    <link>http://ftp.cs.toronto.edu/pub/gh/Hirst-TSD-2008.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Writer-based and reader-based views of text-meaning are reflected by the respective questions "What is the author trying to tell me?" and "What does this text mean to me personally?" Contemporary computational linguistics, however, generally takes neither view. But this is not adequate for the development of sophisticated applications such as intelligence gathering and question answering. I discuss different views of text-meaning from the perspective of the needs of computational text analysis and the collaborative repair of misunderstanding.]]></description>
<dc:subject>nlp language meaning interpretation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:0fe65be2fb7d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:language"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:meaning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:interpretation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.isi.edu/natural-language/amr/a.pdf">
    <title>Abstract Meaning Representation for Sembanking</title>
    <dc:date>2013-06-23T01:22:21+00:00</dc:date>
    <link>http://www.isi.edu/natural-language/amr/a.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[We describe Abstract Meaning Representation (AMR), a semantic representation language in which we are writing down the meanings of thousands of English sentences. We hope that a sembank of simple, whole-sentence semantic structures will spur new work in statistical natural language understanding and generation, like the Penn Treebank encouraged work on statistical parsing. This paper gives an overview of AMR and tools associated with it.]]></description>
<dc:subject>nlp semantics representation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:72a6311c6426/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:semantics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:representation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.columbia.edu/~scohen/naacl13tutorial/">
    <title>Spectral Learning Algorithms for Natural Language Processing</title>
    <dc:date>2013-06-11T02:02:02+00:00</dc:date>
    <link>http://www.cs.columbia.edu/~scohen/naacl13tutorial/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Recent work in machine learning and NLP has developed spectral algorithms for many learning tasks involving latent variables. Spectral algorithms rely on singular value decomposition as a basic operation, usually followed by some simple estimation method based on the method of moments. From a theoretical point of view, these methods are appealing in that they offer consistent estimators (and PAC-style guarantees of sample complexity) for several important latent-variable models. This is in contrast to the EM algorithm, which is an extremely successful approach, but which only has guarantees of reaching a local maximum of the likelihood function.

From a practical point of view, the methods (unlike EM) have no need for careful initialization, and have recently been shown to be highly efficient (as one example, in work under submission by the authors on learning of latent-variable PCFGs, a spectral algorithm performs at identical accuracy to EM, but is around 20 times faster).

In this tutorial we will aim to give a broad overview of spectral methods, describing theoretical guarantees, as well as practical issues.]]></description>
<dc:subject>nlp spectral</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:c281a38f7f69/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:spectral"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www1.cs.columbia.edu/nlp/tools.cgi#LCseg">
    <title>NLP at Columbia University: LCseg</title>
    <dc:date>2013-06-09T13:42:13+00:00</dc:date>
    <link>http://www1.cs.columbia.edu/nlp/tools.cgi#LCseg</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[A domain-independent discourse segmenter based on lexical cohesion.]]></description>
<dc:subject>nlp tools segmentation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:0d540d552f4f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:segmentation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ceur-ws.org/Vol-779/derive2011_submission_1.pdf">
    <title>An Overview of Event Extraction from Text</title>
    <dc:date>2013-06-03T19:22:27+00:00</dc:date>
    <link>http://ceur-ws.org/Vol-779/derive2011_submission_1.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[One common application of text mining is event extraction, which encompasses deducing speci c knowledge concerning incidents re- ferred to in texts. Event extraction can be applied to various types of written text, e.g., (online) news messages, blogs, and manuscripts. This literature survey reviews text mining techniques that are employed for various event extraction purposes. It provides general guidelines on how to choose a particular event extraction technique depending on the user, the available content, and the scenario of use.]]></description>
<dc:subject>events extraction nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:e398da10a9b2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:events"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nlp.stanford.edu/courses/lsa354/">
    <title>LSA 354: Statistical Parsing</title>
    <dc:date>2013-06-03T19:02:22+00:00</dc:date>
    <link>http://nlp.stanford.edu/courses/lsa354/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Over the last decade, statistical parsing has transformed our ability to produce automatic, high-accuracy parses of arbitrary human language text. This course aims to teach from the basics up to the state-of-the-art in this domain. It will begin by reviewing the phenomena that motivated statistical approaches to parsing, context-free grammars (CFGs), and probabilistic CFGs. Next it will present basic parsing algorithms, concentrating on generalized CKY and A* parsing algorithms, and discuss treebanks, their design and nature, and the methods of building and evaluating parsers based on them. The course will then turn to the well-known and successful Collins and Charniak generative parsing models of the late 1990s, and discuss issues such as smoothing, head lexicalization, engineering for efficiency, and what kinds of information parsers use and need. Finally, we will turn to discriminative methods of parsing, and discuss both parse re-ranking techniques and the direct construction of discriminative parsers.]]></description>
<dc:subject>nlp parsing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:8cefefcd4959/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:parsing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://braque.cc/ShowItem?handle=2RS2ML03">
    <title>Large-scale author coreference via hierarchical entity representations</title>
    <dc:date>2013-06-01T22:03:41+00:00</dc:date>
    <link>http://braque.cc/ShowItem?handle=2RS2ML03</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Large-scale author coreference, the problem of ascribing research papers to real-world authors in bibliographic databases, is critical for mining the scientific community. However, traditional pairwise approaches, which measure coreference similarity between pairs of author mentions, scale poorly to large databases; and streaming approaches, which lack the ability to retroactively correct errors, can suffer from chronically low accuracy. In this paper we present a hierarchical model for solving author coreference that overcomes these issues. First, our model enables scalability over rich entity representations by compactly organizing the mentions of each author into trees. Second, we employ Markov chain Monte Carlo (MCMC) inference which is able to retroactively correct existing coreference errors when processing new mentions. We validate these two properties empirically, and demonstrate further scalability through asynchronous parallel MCMC (allowing us to scale to all 150,000,000 author mentions in Web of Science).]]></description>
<dc:subject>nlp coreference</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:9697f733d4f8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.atala.org/IMG/pdf/0-Introduction-TAL53-2.pdf">
    <title>Introduction to the special issue on Processing of Temporal and Spatial Information in Language</title>
    <dc:date>2013-05-19T17:04:53+00:00</dc:date>
    <link>http://www.atala.org/IMG/pdf/0-Introduction-TAL53-2.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[The objective of this special issue is to present new developments in the processing of temporal and spatial information in language, from theoretical, practical and methodological points of view.]]></description>
<dc:subject>temporal spatial nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:c366378b4a4a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:temporal"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:spatial"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://research.microsoft.com/en-us/um/people/hoifung/papers/pfi13.pdf">
    <title>Probabilistic Frame Induction</title>
    <dc:date>2013-05-16T13:18:35+00:00</dc:date>
    <link>http://research.microsoft.com/en-us/um/people/hoifung/papers/pfi13.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[In natural-language discourse, related events tend to appear near each other to describe a larger scenario. Such structures can be formalized by the notion of a frame (a.k.a. template), which comprises a set of related events and prototypical participants and event transitions. Identifying frames is a prerequisite for information extraction and natural language generation, and is usually done manually. Methods for inducing frames have been proposed recently, but they typically use ad hoc procedures and are difficult to diagnose or extend. In this paper, we propose the first probabilistic approach to frame induction, which incorporates frames, events, and participants as latent topics and learns those frame and event transitions that best explain the text. The number of frame components is inferred by a novel application of a split-merge method from syntactic parsing. In end-to-end evaluations from text to induced frames and extracted facts, our method produces state-of-the-art results while substantially reducing engineering effort.]]></description>
<dc:subject>frames semantics parsing discourse analysis nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:b733df6495c6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:frames"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:semantics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:parsing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:discourse"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.cmu.edu/~dbamman/pubs/pdf/bamman+oconnor+smith.acl13.pdf">
    <title>Learning Latent Personas of Film Characters</title>
    <dc:date>2013-05-08T14:22:10+00:00</dc:date>
    <link>http://www.cs.cmu.edu/~dbamman/pubs/pdf/bamman+oconnor+smith.acl13.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[We present two latent variable models for learning character types, or personas, in ﬁlm, in which a persona is deﬁned as a set of mixtures over latent lexical classes. These lexical classes capture the stereotypical actions of which a character is the agent and patient, as well as attributes by which they are described. As the ﬁrst attempt to solve this problem explicitly, we also present a new dataset for the text-driven analysis of ﬁlm, along with a benchmark testbed to help drive future work in this area.]]></description>
<dc:subject>narrative nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:54898b589ff7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:narrative"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cis.temple.edu/~yates/papers/2013-comp-ling-rep-learning-preprint.pdf">
    <title>Learning Representations for Weakly Supervised Natural Language Processing Tasks</title>
    <dc:date>2013-04-24T18:51:07+00:00</dc:date>
    <link>http://www.cis.temple.edu/~yates/papers/2013-comp-ling-rep-learning-preprint.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Finding the right representations for words is critical for building accurate NLP systems when domain-speciﬁc labeled data for the task is scarce. This paper investigates novel techniques for extracting features from n-gram models, Hidden Markov Models, and other statistical language models, including a novel Partial Lattice Markov Random Field model. Experiments on partof-speech tagging and information extraction, among other tasks, indicate that features taken from statistical language models, in combination with more traditional features, outperform traditional representations alone, and that graphical model representations outperform n-gram models, especially on sparse and polysemous words.]]></description>
<dc:subject>nlp representation machinelearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:290ca6962b9f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cis.temple.edu/~yates/papers/open-sem-parsing.pdf">
    <title>Semantic Parsing Freebase: Towards Open-domain Semantic Parsing</title>
    <dc:date>2013-04-24T18:49:32+00:00</dc:date>
    <link>http://www.cis.temple.edu/~yates/papers/open-sem-parsing.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Existing semantic parsing research has steadily improved accuracy on a few domains and their corresponding databases. This paper introduces FreeParser, a system that trains on one domain and one set of predicate and constant symbols, and then can parse sentences for any new domain, including sentences that refer to symbols never seen during training. FreeParser uses a domain-independent architecture to automatically identify sentences relevant to each new database symbol, which it uses to supplement its manually-annotated training data from the training domain. In cross-domain experiments involving 23 domains, FreeParser can parse sentences for which it has seen comparable unannotated sentences with an F1 of 0.71.]]></description>
<dc:subject>semantics parsing extraction nlp freebase structure linkeddata semweb</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:267a99b7112a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:semantics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:parsing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:freebase"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:structure"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:linkeddata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:semweb"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://code.google.com/p/relation-extraction-corpus/downloads/list">
    <title>Downloads - relation-extraction-corpus - Relation Extraction Corpus - Google Project Hosting</title>
    <dc:date>2013-04-12T11:00:16+00:00</dc:date>
    <link>https://code.google.com/p/relation-extraction-corpus/downloads/list</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[To help researchers investigate relation extraction, we’re releasing a human-judged dataset of two relations about public figures on Wikipedia: nearly 10,000 examples of “place of birth”, and over 40,000 examples of “attended or graduated from an institution”. Each of these was judged by at least 5 raters, and can be used to train or evaluate relation extraction systems. We also plan to release more relations of new types in the coming months.

Each relation is in the form of a triple: the relation in question, called a predicate; the subject of the relation; and the object of the relation. In the relation “Stephen Hawking graduated from Oxford,” Stephen Hawking is the subject, graduated from is the relation, and Oxford University is the object. Subjects and objects are represented by their Freebase MID’s, and the relation is defined as a Freebase property. So in this case, the triple would be represented as:

"pred":"/education/education/institution"
"sub":"/m/01tdnyh"
"obj":"/m/07tgn"

Just having the triples is interesting enough if you want a database of entities and relations, but doesn’t make much progress towards training or evaluation a relation extraction system. So we’ve also included the evidence for the relation, in the form of a URL and an excerpt from the web page that our raters judged. We’re also including examples where the evidence does not support the relation, so you have negative examples for use in training better extraction systems. Finally, we included ID’s and actual judgments of individual raters, so that you can filter triples by agreement.]]></description>
<dc:subject>relationships corpus data nlp extraction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:149ead5056b5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:relationships"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:corpus"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:extraction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://people.csail.mit.edu/yklee/papers/temp06.pdf">
    <title>Inducing Temporal Graphs</title>
    <dc:date>2013-04-09T03:14:56+00:00</dc:date>
    <link>http://people.csail.mit.edu/yklee/papers/temp06.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[We consider the problem of constructing a directed acyclic graph that encodes temporal relations found in a text. The unit of our analysis is a temporal segment, a fragment of text that maintains temporal coherence. The strength of our approach lies in its ability to simultaneously optimize pairwise ordering preferences and global constraints on the graph topology. Our learning method achieves 83% F-measure in temporal segmentation and 84% accuracy in inferring temporal relations between two segments.]]></description>
<dc:subject>temporal graph nlp narrative</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:02b2782e27f5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:temporal"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:graph"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:narrative"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://nlp.stanford.edu/pubs/discourse-referent-lifespans.pdf">
    <title>The Life and Death of Discourse Entities: Identifying Singleton Mentions</title>
    <dc:date>2013-03-28T13:05:31+00:00</dc:date>
    <link>http://nlp.stanford.edu/pubs/discourse-referent-lifespans.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[A discourse typically involves numerous entities, but few are mentioned more than once. Distinguishing discourse entities that die out after just one mention (singletons) from those that lead longer lives (coreferent) would benefit NLP applications such as coreference resolution, protagonist identification, topic modeling, and discourse coherence. We build a logistic regression model for predicting the singleton/coreferent distinction, drawing on linguistic insights about how discourse entity lifespans are affected by syntactic and semantic features. The model is effective in its own right (78% accuracy), and incorporating it into a state-of-the-art coreference resolution system yields a significant improvement.]]></description>
<dc:subject>discourse analysis coreference nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:44386d4fc741/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:discourse"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:coreference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://library.ahima.org/xpedio/groups/public/documents/ahima/bok1_040449.html">
    <title>AHIMA - Perspectives in Health Information Management</title>
    <dc:date>2013-03-25T18:12:51+00:00</dc:date>
    <link>http://library.ahima.org/xpedio/groups/public/documents/ahima/bok1_040449.html</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[We report on a study comparing unrestricted physician dictations with structured input in electronic health records. The results suggest a need for attention to the contrast between physicians’ naturally occurring language and the information permitted by structured data entry. We suggest that technology that automatically populates structured EHR fields directly from physician dictations, rather than controlling physician input, may answer the needs of the EHR without sacrificing physicians’ ability to fully communicate clinically relevant information.]]></description>
<dc:subject>metadata nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:17d75463b310/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:metadata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/pdf/1301.7738v1.pdf">
    <title>PyPLN: a Distributed Platform for Natural Language Processing</title>
    <dc:date>2013-02-10T16:32:29+00:00</dc:date>
    <link>http://arxiv.org/pdf/1301.7738v1.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[This paper presents a distributed platform for Natural Language Processing called PyPLN. PyPLN leverages a vast array of NLP and text processing open source tools, managing the distribution of the workload on a variety of configurations: from a single server to a cluster of linux servers. PyPLN is developed using Python 2.7.3 but makes it very easy to incorporate other softwares for specific tasks as long as a linux version is available. PyPLN facilitates analyses both at document and corpus level, simplifying management and publication of corpora and analytical results through an easy to use web interface. In the current (beta) release, it supports English and Portuguese languages with support to other languages planned for future releases. To support the Portuguese language PyPLN uses the PALAVRAS parsercitep{Bick2000}. Currently PyPLN offers the following features: Text extraction with encoding normalization (to UTF-8), part-of-speech tagging, token frequency, semantic annotation, n-gram extraction, word and sentence repertoire, and full-text search across corpora. The platform is licensed as GPL-v3.]]></description>
<dc:subject>python nlp tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:8c89fed33489/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://pypln.org/">
    <title>PyPLN - Distributed Natural Language Processing, with Python</title>
    <dc:date>2013-02-10T16:30:25+00:00</dc:date>
    <link>http://pypln.org/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[PyPLN is a platform for processing and extracting useful information from text. It was conceived to run in the cloud, scale quickly and be easy to use. It integrates many text mining and natural language processing tools, which can be acessed via an easy-to-use Web interface, where you can manage documents, corpora and interact with its analysis/visualizations.

As its main feature, you can visualize analysis like part-of-speech tags, word frequency statistics and other useful information. It also offers a full-text search on your corpora so you can easily find information and then visualize its analysis.]]></description>
<dc:subject>python nlp tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:a10dd554835d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://snowball.tartarus.org/">
    <title>Snowball</title>
    <dc:date>2013-01-08T14:02:32+00:00</dc:date>
    <link>http://snowball.tartarus.org/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Snowball is a small string processing language designed for creating stemming algorithms for use in Information Retrieval.]]></description>
<dc:subject>nlp search algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:a9bbef8cb92a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.socher.org/index.php/DeepLearningTutorial/DeepLearningTutorial">
    <title>Deep Learning Tutorial - www.socher.org</title>
    <dc:date>2012-12-22T02:37:43+00:00</dc:date>
    <link>http://www.socher.org/index.php/DeepLearningTutorial/DeepLearningTutorial</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Machine learning is everywhere in today's NLP, but by and large machine learning amounts to numerical optimization of weights for human designed representations and features. The goal of deep learning is to explore how computers can take advantage of data to develop features and representations appropriate for complex interpretation tasks. This tutorial aims to cover the basic motivation, ideas, models and learning algorithms in deep learning for natural language processing. Recently, these methods have been shown to perform very well on various NLP tasks such as language modeling, POS tagging, named entity recognition, sentiment analysis and paraphrase detection, among others. The most attractive quality of these techniques is that they can perform well without any external hand-designed resources or time-intensive feature engineering. Despite these advantages, many researchers in NLP are not familiar with these methods. Our focus is on insight and understanding, using graphical illustrations and simple, intuitive derivations. The goal of the tutorial is to make the inner workings of these techniques transparent, intuitive and their results interpretable, rather than black boxes labeled "magic here". The first part of the tutorial presents the basics of neural networks, neural word vectors, several simple models based on local windows and the math and algorithms of training via backpropagation. In this section applications include language modeling and POS tagging. In the second section we present recursive neural networks which can learn structured tree outputs as well as vector representations for phrases and sentences. We cover both equations as well as applications. We show how training can be achieved by a modified version of the backpropagation algorithm introduced before. These modifications allow the algorithm to work on tree structures. Applications include sentiment analysis and paraphrase detection. We also draw connections to recent work in semantic compositionality in vector spaces. The principle goal, again, is to make these methods appear intuitive and interpretable rather than mathematically confusing. By this point in the tutorial, the audience members should have a clear understanding of how to build a deep learning system for word-, sentence- and document-level tasks. The last part of the tutorial gives a general overview of the different applications of deep learning in NLP, including bag of words models. We will provide a discussion of NLP-oriented issues in modeling, interpretation, representational power, and optimization.]]></description>
<dc:subject>machinelearning nlp tutorial deeplearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:6c28a672bea5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tutorial"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:deeplearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://brat.nlplab.org/">
    <title>brat rapid annotation tool</title>
    <dc:date>2012-12-20T19:37:07+00:00</dc:date>
    <link>http://brat.nlplab.org/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[brat is a web-based tool for text annotation; that is, for adding notes to existing text documents.

brat is designed in particular for structured annotation, where the notes are not freeform text but have a fixed form that can be automatically processed and interpreted by a computer.]]></description>
<dc:subject>nlp annotation tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:254dfc4ee500/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:annotation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://summly.com/">
    <title>Summly | Pocket sized news for iPhone</title>
    <dc:date>2012-12-11T17:42:36+00:00</dc:date>
    <link>http://summly.com/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Simple, intuitive and elegant. Summly redefines news for the mobile world with algorithmically generated summaries from hundreds of sources. Innovative gestures, animations and great summaries make reading the news fun: easy to use, easy to scan, easy to read, clear and concise. Watch an animated demonstration to the left.
]]></description>
<dc:subject>news summarization nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:d162cab58793/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:news"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:summarization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.let.vu.nl/en/events/news/2012/vu-to-develop-history-recorder.asp">
    <title>VU to develop history recorder - 2012 - Faculty of Arts, VU University Amsterdam</title>
    <dc:date>2012-11-30T16:30:01+00:00</dc:date>
    <link>http://www.let.vu.nl/en/events/news/2012/vu-to-develop-history-recorder.asp</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Trying to build Danto's "Ideal Chronicle":

The Faculty of Arts at VU University Amsterdam has received a European grant of 2.8M euro to develop a ‘history recorder’.

A history recorder is a computer program that “reads” daily streams of news and stores exactly what happened, where and when in the world, and who was involved. The program uses the same strategy as humans by building up a story and merging it with previously stored information.

Rather than storing separate events, it stores a chain of events according to a story-line. Like humans, the program thus removes duplicate information and complements incomplete information in the news while reading. The result is a single story-line for all the events. Unlike humans, the recorder will not forget any detail, will be able to recall the complete and true story as it was told, know who told what part of the story, and identify what sources contradict each other.]]></description>
<dc:subject>events extraction narrative research news nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:34591a4b2b95/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:events"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:narrative"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:research"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:news"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.umiacs.umd.edu/~resnik/pubs/gibbs.pdf">
    <title>GIBBS SAMPLING FOR THE UNINITIATED</title>
    <dc:date>2012-11-09T17:06:34+00:00</dc:date>
    <link>http://www.umiacs.umd.edu/~resnik/pubs/gibbs.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[This document is intended for computer scientists who would like to try out a Markov Chain Monte Carlo (MCMC) technique, particularly in order to do inference with Bayesian models on problems related to text processing. We try to keep theory to the absolute minimum needed, though we work through the details much more explicitly than you usually see even in introductory" explanations. That means we've attempted to be ridiculously explicit in our exposition and notation. After providing the reasons and reasoning behind Gibbs sampling (and at least nodding our heads in the direction of theory), we work through an example application in detail|the derivation of a Gibbs sampler for a Nave Bayes model. Along with the example, we discuss some practical implementation issues, including the integrating out of continuous parameters when possible. We conclude with some pointers to literature that we've found to be somewhat more friendly to uninitiated readers. Note: as of June 3, 2010 we have corrected some small errors in the original April 2010 report.]]></description>
<dc:subject>nlp bayes topicmodels statistics howto</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:5a4ae23d670e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:bayes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:topicmodels"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:howto"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://code.google.com/p/dkpro-core-asl/">
    <title>dkpro-core-asl - DKPro Core ASL - Google Project Hosting</title>
    <dc:date>2012-11-09T17:01:04+00:00</dc:date>
    <link>http://code.google.com/p/dkpro-core-asl/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Many powerful and state-of-the-art NLP components are already freely available in the NLP research community. New and improved components are being developed and released continuously. The components cover the whole range of NLP-related processing tasks. DKPro Core provides wrappers for such third-party tool as well as original NLP components. DKPro Core builds heavily on uimaFIT which allows for rapid and easy development of NLP processing pipelines.]]></description>
<dc:subject>nlp code tools</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:fdd29df5003d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:code"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://aclweb.org/anthology-new/J/J11/J11-4004.pdf">
    <title>What Determines Inter-Coder Agreement in Manual Annotations? A Meta-Analytic Investigation</title>
    <dc:date>2012-10-09T01:04:06+00:00</dc:date>
    <link>http://aclweb.org/anthology-new/J/J11/J11-4004.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Recent discussions of annotator agreement have mostly centered around its calculation and interpretation, and the correct choice of indices. Although these discussions are important, they only consider the “back-end” of the story, namely, what to do once the data are collected. Just as important in our opinion is to know how agreement is reached in the ﬁrst place and what factors inﬂuence coder agreement as part of the annotation process or setting, as this knowledge can provide concrete guidelines for the planning and set-up of annotation projects. To investigate whether there are factors that consistently impact annotator agreement we conducted a meta-analytic investigation of annotation studies reporting agreement percentages. Our meta-analysis synthesized factors reported in 96 annotation studies from three domains (word-sense disambiguation, prosodic transcriptions, and phonetic transcriptions) and was based on a total of 346 agreement indices. Our analysis identiﬁed seven factors that inﬂuence reported agreement values: annotation domain, number of categories in a coding scheme, number of annotators in a project, whether annotators received training, the intensity of annotator training, the annotation purpose, and the method used for the calculation of percentage agreements. Based on our results we develop practical recommendations for the assessment, interpretation, calculation, and reporting of coder agreement. We also brieﬂy discuss theoretical implications for the concept of annotation quality.]]></description>
<dc:subject>annotation agreement metrics evaluation nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:38f8292d3059/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:annotation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:agreement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:metrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:evaluation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://aclweb.org/anthology-new/P/P12/P12-1091.pdf">
    <title>ACL 2012/Modeling Sentences in the Latent Space</title>
    <dc:date>2012-10-06T23:48:23+00:00</dc:date>
    <link>http://aclweb.org/anthology-new/P/P12/P12-1091.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Sentence Similarity is the process of computing a similarity score between two sentences. Previous sentence similarity work finds that latent semantics approaches to the problem do not perform well due to insufficient information in single sentences. In this paper, we show that by carefully handling words that are not in the sentences (missing words), we can train a reliable latent variable model on sentences. In the process, we propose a new evaluation framework for sentence similarity: Concept Definition Retrieval. The new framework allows for large scale tuning and testing of Sentence Similarity models. Experiments on the new task and previous data sets show significant improvement of our model over baselines and other traditional latent variable models. Our results indicate comparable and even better performance than current state of the art systems addressing the problem of sentence similarity.]]></description>
<dc:subject>similarity nlp textanalysis datamining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:eb476f9cb3b3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:similarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:textanalysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:datamining"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>