<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (rybesh)</title>
    <link>https://pinboard.in/u:rybesh/public/</link>
    <description>recent bookmarks from rybesh</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://github.com/unum-cloud/usearch"/>
	<rdf:li rdf:resource="http://freediscovery.io/doc/stable/"/>
	<rdf:li rdf:resource="https://github.com/ChicagoHarris/blobs"/>
	<rdf:li rdf:resource="http://ir.inf.ed.ac.uk/wiki/doku.php?id=yari:mtx"/>
	<rdf:li rdf:resource="http://shapeofdata.wordpress.com/2014/02/25/duality-and-coclustering/"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1203.6402"/>
	<rdf:li rdf:resource="http://jmlr.csail.mit.edu/proceedings/papers/v22/hoai12/hoai12.pdf"/>
	<rdf:li rdf:resource="http://www.cs.princeton.edu/~blei/papers/Blei2011.pdf"/>
	<rdf:li rdf:resource="http://glaros.dtc.umn.edu/gkhome/views/cluto"/>
	<rdf:li rdf:resource="http://sappingattention.blogspot.com/2011/02/fresh-set-of-eyes.html#more"/>
	<rdf:li rdf:resource="http://cran.r-project.org/web/packages/lda/index.html"/>
	<rdf:li rdf:resource="http://lucene.apache.org/mahout/"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://github.com/unum-cloud/usearch">
    <title>unum-cloud/USearch: Fast Open-Source Search &amp; Clustering engine × for Vectors &amp; Arbitrary Objects × in C++, C, Python, JavaScript, Rust, Java, Objective-C, Swift, C#, GoLang, and Wolfram 🔍</title>
    <dc:date>2025-11-13T19:53:54+00:00</dc:date>
    <link>https://github.com/unum-cloud/usearch</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Smaller & Faster Single-File Similarity Search & Clustering Engine for Vectors & 🔜 Texts]]></description>
<dc:subject>clustering search database vectors</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:3b2a6b016f20/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:database"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:vectors"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://freediscovery.io/doc/stable/">
    <title>FreeDiscovery — FreeDiscovery 1.1.2 documentation</title>
    <dc:date>2017-06-14T19:49:21+00:00</dc:date>
    <link>http://freediscovery.io/doc/stable/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[FreeDiscovery is built on top of existing machine learning libraries (scikit-learn) and provides a REST API for information retrieval applications. It aims to benefit existing e-Discovery and information retrieval platforms with a focus on text categorization, semantic search, document clustering, duplicates detection and e-mail threading.]]></description>
<dc:subject>IR categorization clustering python</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:1b9454699775/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:IR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:categorization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://github.com/ChicagoHarris/blobs">
    <title>ChicagoHarris/blobs</title>
    <dc:date>2015-10-13T23:56:18+00:00</dc:date>
    <link>https://github.com/ChicagoHarris/blobs</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Spatial analytics is often hampered by the arbitrary choice of units, allowing local heterogeneity to obscure true patterns. Blobs is a new “smart clustering” technique that lets us use large quantities of open municipal data from Plenario to redraw city maps to reflect facts on the ground, not administrative boundaries.

The algorithm, built on the max-p regions implementation by researchers at Arizona State University, creates spatial clusters using only one input parameter, the minimum size of each cluster (defined in any of several ways). This nonparametric approach creates clusters that fit the data as closely as possible, fully isolating regions based only on the variables of interest.]]></description>
<dc:subject>clustering gis spatial</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:a6d00a9f7445/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:gis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:spatial"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ir.inf.ed.ac.uk/wiki/doku.php?id=yari:mtx">
    <title>yari:mtx [Information Retrieval Lab]</title>
    <dc:date>2014-04-08T19:49:31+00:00</dc:date>
    <link>http://ir.inf.ed.ac.uk/wiki/doku.php?id=yari:mtx</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[mtx is a command-line tool for rapidly trying new ideas in Information Retrieval and Machine Learning.]]></description>
<dc:subject>IR cli tools clustering nlp</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:380a739a72bc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:IR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:cli"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:tools"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:nlp"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://shapeofdata.wordpress.com/2014/02/25/duality-and-coclustering/">
    <title>Duality and Coclustering | The Shape of Data</title>
    <dc:date>2014-02-25T18:38:54+00:00</dc:date>
    <link>http://shapeofdata.wordpress.com/2014/02/25/duality-and-coclustering/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[In general, an algorithm that picks out clusters of both data points and features is called a co-clustering or biclustering algorithm, for hopefully obvious reasons. Forming a bipartite graph and running a standard clustering algorithm, like we did here, is a common approach to co-clustering, though by no means the only approach. While we could get similar information by running a standard clustering algorithm and then carefully analyzing the feature values in each of the resulting clusters, co-clustering can in many cases find better clusters. Moreover, co-clustering gives you direct evidence of which features were most important in determining the clusters, rather than having to infer this after the fact. So, while co-clustering won’t necessarily make sense in all situations, it can be a powerful tool, particularly for things like market-basket data, where there is a strong sense of data point/feature duality.]]></description>
<dc:subject>data analysis graph clustering</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:e5232294a2a0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:graph"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1203.6402">
    <title>[1203.6402] Scalable K-Means++</title>
    <dc:date>2012-03-30T12:51:53+00:00</dc:date>
    <link>http://arxiv.org/abs/1203.6402</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Over half a century old and showing no signs of aging, k-means remains one of the most popular data processing algorithms. As is well-known, a proper initialization of k-means is crucial for obtaining a good final solution. The recently proposed k-means++ initialization algorithm achieves this, obtaining an initial set of centers that is provably close to the optimum solution. A major downside of the k-means++ is its inherent sequential nature, which limits its applicability to massive data: one must make k passes over the data to find a good initial set of centers. In this work we show how to drastically reduce the number of passes needed to obtain, in parallel, a good initialization. This is unlike prevailing efforts on parallelizing k-means that have mostly focused on the post-initialization phases of k-means. We prove that our proposed initialization algorithm k-means|| obtains a nearly optimal solution after a logarithmic number of passes, and then show that in practice a constant number of passes suffices. Experimental evaluation on real-world large-scale data demonstrates that k-means|| outperforms k-means++ in both sequential and parallel settings.]]></description>
<dc:subject>clustering machinelearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:7f7c9a8f68dd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.csail.mit.edu/proceedings/papers/v22/hoai12/hoai12.pdf">
    <title>Maximum Margin Temporal Clustering</title>
    <dc:date>2012-03-26T22:25:06+00:00</dc:date>
    <link>http://jmlr.csail.mit.edu/proceedings/papers/v22/hoai12/hoai12.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Temporal Clustering (TC) refers to the factorization of multiple time series into a set of non-overlapping segments that belong to k temporal clusters. Existing methods based on extensions of generative models such as k -means or Switching Linear Dynamical Systems (SLDS) often lead to intractable inference and lack a mechanism for feature selection, critical when dealing with high dimensional data. To overcome these limitations, this paper proposes Maximum Margin Temporal Clustering (MMTC). MMTC simultaneously determines the start and the end of each segment, while learning a multi-class Support Vector Machine (SVM) to discriminate among temporal clusters. MMTC extends Maximum Margin Clustering in two ways: first, it incorporates the notion of TC, and second, it introduces additional constraints to achieve better balance between clusters. Experiments on clustering human actions and bee dancing motions illustrate the benefits of our approach compared to state-of-the-art methods.]]></description>
<dc:subject>temporality actions events clustering supervised machinelearning</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:ce95da4ebd75/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:temporality"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:actions"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:events"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:supervised"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.princeton.edu/~blei/papers/Blei2011.pdf">
    <title>Blei - Introduction to Probabilistic Topic Models</title>
    <dc:date>2012-03-19T19:09:41+00:00</dc:date>
    <link>http://www.cs.princeton.edu/~blei/papers/Blei2011.pdf</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Probabilistic topic models are a suite of algorithms whose aim is to discover the hidden thematic structure in large archives of documents. In this article, we review the main ideas of this field, survey the current state-of-the-art, and describe some promising future directions. We first describe latent Dirichlet allocation (LDA) [8], which is the simplest kind of topic model. We discuss its connections to probabilistic modeling, and describe two kinds of algorithms for topic discovery. We then survey the growing body of research that extends and applies topic models in interesting ways. These extensions have been developed by relaxing some of the statistical assumptions of LDA, incorporating meta-data into the analysis of the documents, and using similar kinds of models on a diversity of data types such as social networks, images and genetics.  Finally, we give our thoughts as to some of the important unexplored directions for topic modeling. These include rigorous methods for checking models built for data exploration, new approaches to visualizing text and other high dimensional data, and moving beyond traditional information engineering applications towards using topic models for more scientific ends.]]></description>
<dc:subject>topicmodels unsupervised machinelearning clustering</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:10fbdf70aca0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:topicmodels"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:unsupervised"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://glaros.dtc.umn.edu/gkhome/views/cluto">
    <title>Data Clustering Software | Karypis Lab</title>
    <dc:date>2012-01-18T20:55:36+00:00</dc:date>
    <link>http://glaros.dtc.umn.edu/gkhome/views/cluto</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[CLUTO is a software package for clustering low- and high-dimensional datasets and for analyzing the characteristics of the various clusters. CLUTO is well-suited for clustering data sets arising in many diverse application areas including information retrieval, customer purchasing transactions, web, GIS, science, and biology.]]></description>
<dc:subject>clustering datamining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:7cfce741ff68/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:datamining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://sappingattention.blogspot.com/2011/02/fresh-set-of-eyes.html#more">
    <title>Sapping Attention: Fresh set of eyes</title>
    <dc:date>2011-02-18T13:59:23+00:00</dc:date>
    <link>http://sappingattention.blogspot.com/2011/02/fresh-set-of-eyes.html#more</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[If we treat each lettered heading in the Library of Congress Catalog as a single, long text, we can ask the computer to find similar genres based on word usage.]]></description>
<dc:subject>classification clustering inls520</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:rybesh/b:f2329c5e8c14/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:inls520"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://cran.r-project.org/web/packages/lda/index.html">
    <title>lda: Collapsed Gibbs sampling methods for topic models</title>
    <dc:date>2009-11-24T04:32:02+00:00</dc:date>
    <link>http://cran.r-project.org/web/packages/lda/index.html</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[This package implements latent Dirichlet allocation (LDA) and related models. This includes (but is not limited to) sLDA, corrLDA, and the mixed-membership stochastic blockmodel.
]]></description>
<dc:subject>clustering textanalysis datamining R topicmodels</dc:subject>
<dc:identifier>https://pinboard.in/u:rybesh/b:90b9dd8919f3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:textanalysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:datamining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:R"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:topicmodels"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://lucene.apache.org/mahout/">
    <title>Apache Mahout</title>
    <dc:date>2009-11-24T02:32:26+00:00</dc:date>
    <link>http://lucene.apache.org/mahout/</link>
    <dc:creator>rybesh</dc:creator><description><![CDATA[Mahout's goal is to build scalable machine learning libraries.
]]></description>
<dc:subject>machinelearning opensource hadoop apache recommendation clustering classification datamining</dc:subject>
<dc:identifier>https://pinboard.in/u:rybesh/b:4c3d3e9932a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:machinelearning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:opensource"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:hadoop"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:apache"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:recommendation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:classification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:rybesh/t:datamining"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>