<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (cshalizi)</title>
    <link>https://pinboard.in/u:cshalizi/public/</link>
    <description>recent bookmarks from cshalizi</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://arxiv.org/abs/2501.15896"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.05269"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1607677231"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1710.08511"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1907.03783"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1906.09410"/>
	<rdf:li rdf:resource="https://www.tandfonline.com/doi/full/10.1080/10618600.2019.1609976"/>
	<rdf:li rdf:resource="http://www.tandfonline.com/doi/abs/10.1080/03610926.2013.790450#.VTRKX85_SF4"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1409.4813"/>
	<rdf:li rdf:resource="http://press.princeton.edu/titles/10286.html"/>
	<rdf:li rdf:resource="http://projecteuclid.org/euclid.ba/1340370402"/>
	<rdf:li rdf:resource="http://jmlr.org/proceedings/papers/v33/liu14.html"/>
	<rdf:li rdf:resource="http://jmlr.org/proceedings/papers/v33/park14.html"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1401.2490"/>
	<rdf:li rdf:resource="http://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-022513-115657"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1310.5034"/>
	<rdf:li rdf:resource="http://www.tandfonline.com/doi/abs/10.1080/01621459.2013.772897#.Ukmy_RbPUlM"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1307.5599"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1307.0253"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1306.3185"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1305.0626"/>
	<rdf:li rdf:resource="http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.ejs/1364220670"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1207.5938"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1207.4162"/>
	<rdf:li rdf:resource="http://www.tandfonline.com/doi/abs/10.1080/10618600.2012.672115"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.4768"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1203.5181"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0712.4273"/>
	<rdf:li rdf:resource="http://www.mitpressjournals.org/doi/abs/10.1162/neco.2008.10-06-351"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1201.5913"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1111.4954"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1104.3590"/>
	<rdf:li rdf:resource="http://www.springerlink.com/content/y70g818n051643g8/"/>
	<rdf:li rdf:resource="http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.ssu/1272547280"/>
	<rdf:li rdf:resource="http://www.jstatsoft.org/v32/i06/"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0910.2034"/>
	<rdf:li rdf:resource="http://www.mitpressjournals.org/doi/abs/10.1162/neco.2008.02-07-478"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://arxiv.org/abs/2501.15896">
    <title>[2501.15896] A mirror descent approach to maximum likelihood estimation in latent variable models</title>
    <dc:date>2025-02-03T00:31:16+00:00</dc:date>
    <link>https://arxiv.org/abs/2501.15896</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We introduce an approach based on mirror descent and sequential Monte Carlo (SMC) to perform joint parameter inference and posterior estimation in latent variable models. This approach is based on minimisation of a functional over the parameter space and the space of probability distributions and, contrary to other popular approaches, can be implemented when the latent variable takes values in discrete spaces. We provide a detailed theoretical analysis of both the mirror descent algorithm and its approximation via SMC. We experimentally show that the proposed algorithm outperforms standard expectation maximisation algorithms and is competitive with other popular methods for real-valued latent variables."]]></description>
<dc:subject>to:NB computational_statistics optimization latent_variables em_algorithm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4996f85a32ed/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:latent_variables"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.05269">
    <title>[2012.05269] Hard and Soft EM in Bayesian Network Learning from Incomplete Data</title>
    <dc:date>2020-12-12T18:13:27+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.05269</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Incomplete data are a common feature in many domains, from clinical trials to industrial applications. Bayesian networks (BNs) are often used in these domains because of their graphical and causal interpretations. BN parameter learning from incomplete data is usually implemented with the Expectation-Maximisation algorithm (EM), which computes the relevant sufficient statistics ("soft EM") using belief propagation. Similarly, the Structural Expectation-Maximisation algorithm (Structural EM) learns the network structure of the BN from those sufficient statistics using algorithms designed for complete data. However, practical implementations of parameter and structure learning often impute missing data ("hard EM") to compute sufficient statistics instead of using belief propagation, for both ease of implementation and computational speed. In this paper, we investigate the question: what is the impact of using imputation instead of belief propagation on the quality of the resulting BNs? From a simulation study using synthetic data and reference BNs, we find that it is possible to recommend one approach over the other in several scenarios based on the characteristics of the data. We then use this information to build a simple decision tree to guide practitioners in choosing the EM algorithm best suited to their problem."]]></description>
<dc:subject>to:NB missing_data causal_discovery graphical_models em_algorithm statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4910988c3fe1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:missing_data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1607677231">
    <title>Dwivedi , Ho , Khamaru , Wainwright , Jordan , Yu : Singularity, misspecification and the convergence rate of EM</title>
    <dc:date>2020-12-11T17:29:46+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1607677231</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A line of recent work has analyzed the behavior of the Expectation-Maximization (EM) algorithm in the well-specified setting, in which the population likelihood is locally strongly concave around its maximizing argument. Examples include suitably separated Gaussian mixture models and mixtures of linear regressions. We consider over-specified settings in which the number of fitted components is larger than the number of components in the true distribution. Such mis-specified settings can lead to singularity in the Fisher information matrix, and moreover, the maximum likelihood estimator based on nn i.i.d. samples in dd dimensions can have a nonstandard ((d/n)14)O((d/n)14) rate of convergence. Focusing on the simple setting of two-component mixtures fit to a dd-dimensional Gaussian distribution, we study the behavior of the EM algorithm both when the mixture weights are different (unbalanced case), and are equal (balanced case). Our analysis reveals a sharp distinction between these two cases: in the former, the EM algorithm converges geometrically to a point at Euclidean distance of ((d/n)12)O((d/n)12) from the true parameter, whereas in the latter case, the convergence rate is exponentially slower, and the fixed point has a much lower ((d/n)14)O((d/n)14) accuracy. Analysis of this singular case requires the introduction of some novel techniques: in particular, we make use of a careful form of localization in the associated empirical process, and develop a recursive argument to progressively sharpen the statistical rate."]]></description>
<dc:subject>to:NB em_algorithm mixture_models statistics misspecification jordan.michael_i. yu.bin wainwright.martin_j.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:20670a01bf4c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:misspecification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:jordan.michael_i."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:yu.bin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wainwright.martin_j."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1710.08511">
    <title>[1710.08511] An Expectation Maximization Framework for Yule-Simon Preferential Attachment Models</title>
    <dc:date>2020-11-18T17:18:09+00:00</dc:date>
    <link>https://arxiv.org/abs/1710.08511</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper we develop an Expectation Maximization(EM) algorithm to estimate the parameter of a Yule-Simon distribution. The Yule-Simon distribution exhibits the "rich get richer" effect whereby an 80-20 type of rule tends to dominate. These distributions are ubiquitous in industrial settings. The EM algorithm presented provides both frequentist and Bayesian estimates of the λ parameter. By placing the estimation method within the EM framework we are able to derive Standard errors of the resulting estimate. Additionally, we prove convergence of the Yule-Simon EM algorithm and study the rate of convergence. An explicit, closed form solution for the rate of convergence of the algorithm is given. Applications including graph node degree distribution estimation are listed."]]></description>
<dc:subject>to:NB estimation em_algorithm heavy_tails statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b20d7aa0a1ee/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heavy_tails"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1907.03783">
    <title>[1907.03783] Comparing EM with GD in Mixture Models of Two Components</title>
    <dc:date>2019-10-30T13:41:37+00:00</dc:date>
    <link>https://arxiv.org/abs/1907.03783</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The expectation-maximization (EM) algorithm has been widely used in minimizing the negative log likelihood (also known as cross entropy) of mixture models. However, little is understood about the goodness of the fixed points it converges to. In this paper, we study the regions where one component is missing in two-component mixture models, which we call one-cluster regions. We analyze the propensity of such regions to trap EM and gradient descent (GD) for mixtures of two Gaussians and mixtures of two Bernoullis. In the case of Gaussian mixtures, EM escapes one-cluster regions exponentially fast, while GD escapes them linearly fast. In the case of mixtures of Bernoullis, we find that there exist one-cluster regions that are stable for GD and therefore trap GD, but those regions are unstable for EM, allowing EM to escape. Those regions are local minima that appear universally in experiments and can be arbitrarily bad. This work implies that EM is less likely than GD to converge to certain bad local optima in mixture models."]]></description>
<dc:subject>to:NB mixture_models em_algorithm optimization computational_statistics statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:aadd0dd5820a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1906.09410">
    <title>[1906.09410] A reaction network scheme which implements inference and learning for Hidden Markov Models</title>
    <dc:date>2019-08-20T15:27:24+00:00</dc:date>
    <link>https://arxiv.org/abs/1906.09410</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["With a view towards molecular communication systems and molecular multi-agent systems, we propose the Chemical Baum-Welch Algorithm, a novel reaction network scheme that learns parameters for Hidden Markov Models (HMMs). Each reaction in our scheme changes only one molecule of one species to one molecule of another. The reverse change is also accessible but via a different set of enzymes, in a design reminiscent of futile cycles in biochemical pathways. We show that every fixed point of the Baum-Welch algorithm for HMMs is a fixed point of our reaction network scheme, and every positive fixed point of our scheme is a fixed point of the Baum-Welch algorithm. We prove that the "Expectation" step and the "Maximization" step of our reaction network separately converge exponentially fast. We simulate mass-action kinetics for our network on an example sequence, and show that it learns the same parameters for the HMM as the Baum-Welch algorithm."]]></description>
<dc:subject>to:NB em_algorithm markov_models state-space_models biochemical_networks wiuf.carsten pointless_but_nonetheless_awesome biological_computers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:df0502950c8a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state-space_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:biochemical_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wiuf.carsten"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:pointless_but_nonetheless_awesome"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:biological_computers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.tandfonline.com/doi/full/10.1080/10618600.2019.1609976">
    <title>An Expectation Conditional Maximization Approach for Gaussian Graphical Models: Journal of Computational and Graphical Statistics: Vol 0, No 0</title>
    <dc:date>2019-06-23T17:27:57+00:00</dc:date>
    <link>https://www.tandfonline.com/doi/full/10.1080/10618600.2019.1609976</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Bayesian graphical models are a useful tool for understanding dependence relationships among many variables, particularly in situations with external prior information. In high-dimensional settings, the space of possible graphs becomes enormous, rendering even state-of-the-art Bayesian stochastic search computationally infeasible. We propose a deterministic alternative to estimate Gaussian and Gaussian copula graphical models using an expectation conditional maximization (ECM) algorithm, extending the EM approach from Bayesian variable selection to graphical model estimation. We show that the ECM approach enables fast posterior exploration under a sequence of mixture priors, and can incorporate multiple sources of information."]]></description>
<dc:subject>to:NB graphical_models causal_discovery em_algorithm computational_statistics statistics mccormick.tyler</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9e6660cb9d63/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mccormick.tyler"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.tandfonline.com/doi/abs/10.1080/03610926.2013.790450#.VTRKX85_SF4">
    <title>Taylor &amp; Francis Online :: Generalized Linear Factor Models: A New Local EM Estimation Algorithm - Communications in Statistics - Theory and Methods - Volume 42, Issue 16</title>
    <dc:date>2015-04-20T00:48:20+00:00</dc:date>
    <link>http://www.tandfonline.com/doi/abs/10.1080/03610926.2013.790450#.VTRKX85_SF4</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this article, a general approach to latent variable models based on an underlying generalized linear model (GLM) with factor analysis observation process is introduced. We call these models Generalized Linear Factor Models (GLFM). The observations are produced from a general model framework that involves observed and latent variables that are assumed to be distributed in the exponential family. More specifically, we concentrate on situations where the observed variables are both discretely measured (e.g., binomial, Poisson) and continuously distributed (e.g., gamma). The common latent factors are assumed to be independent with a standard multivariate normal distribution. Practical details of training such models with a new local expectation-maximization (EM) algorithm, which can be considered as a generalized EM-type algorithm, are also discussed. In conjunction with an approximated version of the Fisher score algorithm (FSA), we show how to calculate maximum likelihood estimates of the model parameters, and to yield inferences about the unobservable path of the common factors. The methodology is illustrated by an extensive Monte Carlo simulation study and the results show promising performance."

--- Minus: someone went and did the idea I just had.
--- Plus: I no longer have to write that.  (The latent Gaussian distribution seems unnecessarily limiting, though; one could probably do something where the only fixed parametric assumptions were generalized-linearity and the link function itself.)]]></description>
<dc:subject>statistics factor_analysis em_algorithm in_NB have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ca4cd201ee4f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:factor_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1409.4813">
    <title>[1409.4813] Identification of core-periphery structure in networks</title>
    <dc:date>2014-11-22T16:58:46+00:00</dc:date>
    <link>http://arxiv.org/abs/1409.4813</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Many networks can be usefully decomposed into a dense core plus an outlying, loosely-connected periphery. Here we propose an algorithm for performing such a decomposition on empirical network data using methods of statistical inference. Our method fits a generative model of core-periphery structure to observed data using a combination of an expectation--maximization algorithm for calculating the parameters of the model and a belief propagation algorithm for calculating the decomposition itself. We find the method to be efficient, scaling easily to networks with a million or more nodes and we test it on a range of networks, including real-world examples as well as computer-generated benchmarks, for which it successfully identifies known core-periphery structure with low error rate. We also demonstrate that the method is immune from the detectability transition observed in the related community detection problem, which prevents the detection of community structure when that structure is too weak. There is no such transition for core-periphery structure, which is detectable, albeit with some statistical error, no matter how weak it is."]]></description>
<dc:subject>to:NB network_data_analysis statistics em_algorithm kith_and_kin newman.mark to_teach:baby-nets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:724b4e8923a5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:newman.mark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://press.princeton.edu/titles/10286.html">
    <title>Vidyasagar, M.: Hidden Markov Processes: Theory and Applications to Biology</title>
    <dc:date>2014-10-02T20:16:09+00:00</dc:date>
    <link>http://press.princeton.edu/titles/10286.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This book explores important aspects of Markov and hidden Markov processes and the applications of these ideas to various problems in computational biology. The book starts from first principles, so that no previous knowledge of probability is necessary. However, the work is rigorous and mathematical, making it useful to engineers and mathematicians, even those not interested in biological applications. A range of exercises is provided, including drills to familiarize the reader with concepts and more advanced problems that require deep thinking about the theory. Biological applications are taken from post-genomic biology, especially genomics and proteomics.
"The topics examined include standard material such as the Perron-Frobenius theorem, transient and recurrent states, hitting probabilities and hitting times, maximum likelihood estimation, the Viterbi algorithm, and the Baum-Welch algorithm. The book contains discussions of extremely useful topics not usually seen at the basic level, such as ergodicity of Markov processes, Markov Chain Monte Carlo (MCMC), information theory, and large deviation theory for both i.i.d and Markov processes. The book also presents state-of-the-art realization theory for hidden Markov models. Among biological applications, it offers an in-depth look at the BLAST (Basic Local Alignment Search Technique) algorithm, including a comprehensive explanation of the underlying theory. Other applications such as profile hidden Markov models are also explored."]]></description>
<dc:subject>books:noted markov_models state-space_models em_algorithm large_deviations stochastic_processes statistical_inference_for_stochastic_processes statistics genomics bioinformatics books:owned in_NB vidyasagar.mathukumalli</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:47a239958696/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:books:noted"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state-space_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:large_deviations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistical_inference_for_stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:genomics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bioinformatics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:books:owned"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:vidyasagar.mathukumalli"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://projecteuclid.org/euclid.ba/1340370402">
    <title>Rydén : EM versus Markov chain Monte Carlo for estimation of hidden Markov models: a computational perspective</title>
    <dc:date>2014-07-31T13:21:49+00:00</dc:date>
    <link>http://projecteuclid.org/euclid.ba/1340370402</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Hidden Markov models (HMMs) and related models have become standard in statistics during the last 15--20 years, with applications in diverse areas like speech and other statistical signal processing, hydrology, financial statistics and econometrics, bioinformatics etc. Inference in HMMs is traditionally often carried out using the EM algorithm, but examples of Bayesian estimation, in general implemented through Markov chain Monte Carlo (MCMC) sampling are also frequent in the HMM literature. The purpose of this paper is to compare the EM and MCMC approaches in three cases of different complexity; the examples include model order selection, continuous-time HMMs and variants of HMMs in which the observed data depends on many hidden variables in an overlapping fashion. All these examples in some way or another originate from real-data applications. Neither EM nor MCMC analysis of HMMs is a black-box methodology without need for user-interaction, and we will illustrate some of the problems, like poor mixing and long computation times, one may expect to encounter."]]></description>
<dc:subject>to:NB em_algorithm monte_carlo markov_models state-space_models estimation statistics computational_statistics ryden.tobias</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a2dacf87ada3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:monte_carlo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state-space_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ryden.tobias"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.org/proceedings/papers/v33/liu14.html">
    <title>Learning Heterogeneous Hidden Markov Random Fields | AISTATS 2014 | JMLR W&amp;CP</title>
    <dc:date>2014-04-20T17:46:32+00:00</dc:date>
    <link>http://jmlr.org/proceedings/papers/v33/liu14.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Hidden Markov random fields (HMRFs) are conventionally assumed to be homogeneous in the sense that the potential functions are invariant across different sites. However in some biological applications, it is desirable to make HMRFs heterogeneous, especially when there exists some background knowledge about how the potential functions vary. We formally define heterogeneous HMRFs and propose an EM algorithm whose M-step combines a contrastive divergence learner with a kernel smoothing step to incorporate the background knowledge. Simulations show that our algorithm is effective for learning heterogeneous HMRFs and outperforms alternative binning methods. We learn a heterogeneous HMRF in a real-world study."

- It seems to me that heterogeneity (in this sense) is always a second-best modeling strategy to actually accounting for the variation using improved covariates, but...]]></description>
<dc:subject>to:NB markov_models random_fields statistics computational_statistics em_algorithm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:082c1582939f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_fields"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.org/proceedings/papers/v33/park14.html">
    <title>LAMORE: A Stable, Scalable Approach to Latent Vector Autoregressive Modeling of Categorical Time Series | AISTATS 2014 | JMLR W&amp;CP</title>
    <dc:date>2014-04-20T17:42:06+00:00</dc:date>
    <link>http://jmlr.org/proceedings/papers/v33/park14.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Latent vector autoregressive models for categorical time series have a wide range of potential applications from marketing research to healthcare analytics. However, a brute-force particle filter implementation of the Expectation-Maximization (EM) algorithm often fails to estimate the maximum likelihood parameters due to the Monte Carlo approximation of the E-step and multiple local optima of the log-likelihood function. This paper proposes two auxiliary techniques that help stabilize and calibrate the estimated parameters. These two techniques, namely asymptotic mean regularization and low-resolution augmentation, do not require any additional parameter tuning, and can be implemented by modifying the brute-force EM algorithm. Experiments with simulated data show that the proposed techniques effectively stabilize the parameter estimation process. Also, experimental results using Medicare and MIMIC-II datasets illustrate various potential applications of the proposed model and methods."]]></description>
<dc:subject>to:NB time_series em_algorithm particle_filters statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:222eebdefe1f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:particle_filters"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1401.2490">
    <title>[1401.2490] An Online Expectation-Maximisation Algorithm for Nonnegative Matrix Factorisation Models</title>
    <dc:date>2014-03-10T01:28:57+00:00</dc:date>
    <link>http://arxiv.org/abs/1401.2490</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper we formulate the nonnegative matrix factorisation (NMF) problem as a maximum likelihood estimation problem for hidden Markov models and propose online expectation-maximisation (EM) algorithms to estimate the NMF and the other unknown static parameters. We also propose a sequential Monte Carlo approximation of our online EM algorithm. We show the performance of the proposed method with two numerical examples."]]></description>
<dc:subject>to:NB low-rank_approximation em_algorithm markov_models time_series statistics singh.sumeetpal</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3d558000643c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:low-rank_approximation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:singh.sumeetpal"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-022513-115657">
    <title>Build, Compute, Critique, Repeat: Data Analysis with Latent Variable Models - Annual Review of Statistics and Its Application, 1(1):203</title>
    <dc:date>2014-01-16T00:03:34+00:00</dc:date>
    <link>http://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-022513-115657</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We survey latent variable models for solving data-analysis problems. A latent variable model is a probabilistic model that encodes hidden patterns in the data. We uncover these patterns from their conditional distribution and use them to summarize data and form predictions. Latent variable models are important in many fields, including computational biology, natural language processing, and social network analysis. Our perspective is that models are developed iteratively: We build a model, use it to analyze data, assess how it succeeds and fails, revise it, and repeat. We describe how new research has transformed these essential activities. First, we describe probabilistic graphical models, a language for formulating latent variable models. Second, we describe mean field variational inference, a generic algorithm for approximating conditional distributions. Third, we describe how to use our analyses to solve problems: exploring the data, forming predictions, and pointing us in the direction of improved models."]]></description>
<dc:subject>to:NB inference_to_latent_objects statistics variational_inference em_algorithm blei.david model_checking to_read to_teach:undergrad-ADA entableted</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:631d0be465c3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variational_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:blei.david"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_checking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:entableted"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1310.5034">
    <title>[1310.5034] A Theoretical and Experimental Comparison of the EM and SEM Algorithm</title>
    <dc:date>2013-10-23T14:26:47+00:00</dc:date>
    <link>http://arxiv.org/abs/1310.5034</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper we provide a new analysis of the SEM algorithm. Unlike the original work, we focus on the analysis of a single run of the algorithm. First, we discuss the algorithm for general mixture distributions. Second, we consider Gaussian mixture models and show that with high probability the update equations of the EM algorithm and its stochastic variant are almost the same, given that the input set is sufficiently large. Our experiments confirm that this still holds for a large number of successive update steps. In particular, for Gaussian mixture models, we show that the stochastic variant runs nearly twice as fast."]]></description>
<dc:subject>em_algorithm mixture_models statistics computational_statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:49f2f3cbb147/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.tandfonline.com/doi/abs/10.1080/01621459.2013.772897#.Ukmy_RbPUlM">
    <title>Taylor &amp; Francis Online :: Nonparametric Mixture of Regression Models - Journal of the American Statistical Association - Volume 108, Issue 503</title>
    <dc:date>2013-09-30T17:45:58+00:00</dc:date>
    <link>http://www.tandfonline.com/doi/abs/10.1080/01621459.2013.772897#.Ukmy_RbPUlM</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Motivated by an analysis of U.S. house price index (HPI) data, we propose nonparametric finite mixture of regression models. We study the identifiability issue of the proposed models, and develop an estimation procedure by employing kernel regression. We further systematically study the sampling properties of the proposed estimators, and establish their asymptotic normality. A modified EM algorithm is proposed to carry out the estimation procedure. We show that our algorithm preserves the ascent property of the EM algorithm in an asymptotic sense. Monte Carlo simulations are conducted to examine the finite sample performance of the proposed estimation procedure. An empirical analysis of the U.S. HPI data is illustrated for the proposed methodology."]]></description>
<dc:subject>to:NB mixture_models ensemble_methods regression nonparametrics statistics em_algorithm to_teach:undergrad-ADA</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c403f3f21748/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1307.5599">
    <title>[1307.5599] Performance comparison of State-of-the-art Missing Value Imputation Algorithms on Some Bench mark Datasets</title>
    <dc:date>2013-07-26T14:21:16+00:00</dc:date>
    <link>http://arxiv.org/abs/1307.5599</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Decision making from data involves identifying a set of attributes that contribute to effective decision making through computational intelligence. The presence of missing values greatly influences the selection of right set of attributes and this renders degradation in classification accuracies of the classifiers. As missing values are quite common in data collection phase during field experiments or clinical trails appropriate handling would improve the classifier performance. In this paper we present a review of recently developed missing value imputation algorithms and compare their performance on some bench mark datasets."]]></description>
<dc:subject>to:NB inference_to_latent_objects em_algorithm missing_data imputation statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2e1e771e472a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:missing_data"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:imputation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1307.0253">
    <title>[1307.0253] Exploratory Learning</title>
    <dc:date>2013-07-02T03:19:48+00:00</dc:date>
    <link>http://arxiv.org/abs/1307.0253</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In multiclass semi-supervised learning (SSL), it is sometimes the case that the number of classes present in the data is not known, and hence no labeled examples are provided for some classes. In this paper we present variants of well-known semi-supervised multiclass learning methods that are robust when the data contains an unknown number of classes. In particular, we present an "exploratory" extension of expectation-maximization (EM) that explores different numbers of classes while learning. "Exploratory" SSL greatly improves performance on three datasets in terms of F1 on the classes with seed examples i.e., the classes which are expected to be in the data. Our Exploratory EM algorithm also outperforms a SSL method based non-parametric Bayesian clustering."]]></description>
<dc:subject>to:NB semi-supervised_learning em_algorithm machine_learning statistics inference_to_latent_objects kith_and_kin cohen.william_w.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a8edd30a7cbd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:semi-supervised_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cohen.william_w."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1306.3185">
    <title>[1306.3185] Robust regression modeling and predictive recursion maximum likelihood</title>
    <dc:date>2013-06-27T15:00:42+00:00</dc:date>
    <link>http://arxiv.org/abs/1306.3185</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In a regression context, robustness to specification of the error distribution is important. In this paper, we propose a general nonparametric scale mixture model for the error distribution. For such mixtures, the predictive recursion method has been shown to be a simple and computationally efficient alternative to existing methods. Here, a predictive recursion-based likelihood function is constructed, and estimation of the regression parameters proceeds by maximizing this function. A hybrid predictive recursion--EM algorithm is proposed for this purpose, and simulations and real data analyses compare its performance to a variety of existing methods. A useful by-product of the hybrid algorithm is a sequence of scores which can be used to identify outliers.']]></description>
<dc:subject>to:NB statistics regression nonparametrics density_estimation prediction em_algorithm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:141f049599ea/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:density_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1305.0626">
    <title>[1305.0626] An Improved EM algorithm</title>
    <dc:date>2013-05-07T22:16:36+00:00</dc:date>
    <link>http://arxiv.org/abs/1305.0626</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper, we firstly give a brief introduction of expectation maximization (EM) algorithm, and then discuss the initial value sensitivity of expectation maximization algorithm. Subsequently, we give a short proof of EM's convergence. Then, we implement experiments with the expectation maximization algorithm (We implement all the experiments on Gaussion mixture model (GMM)). Our experiment with expectation maximization is performed in the following three cases: initialize randomly; initialize with result of K-means; initialize with result of K-medoids. The experiment result shows that expectation maximization algorithm depend on its initial state or parameters. And we found that EM initialized with K-medoids performed better than both the one initialized with K-means and the one initialized randomly."]]></description>
<dc:subject>to:NB em_algorithm computational_statistics statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c3b2ad02cb35/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.ejs/1364220670">
    <title>Le Corff , Fort : Online Expectation Maximization based algorithms for inference in Hidden Markov Models</title>
    <dc:date>2013-03-25T16:52:51+00:00</dc:date>
    <link>http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.ejs/1364220670</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Expectation Maximization (EM) algorithm is a versatile tool for model parameter estimation in latent data models. When processing large data sets or data stream however, EM becomes intractable since it requires the whole data set to be available at each iteration of the algorithm. In this contribution, a new generic online EM algorithm for model parameter inference in general Hidden Markov Model is proposed. This new algorithm updates the parameter estimate after a block of observations is processed (online). The convergence of this new algorithm is established, and the rate of convergence is studied showing the impact of the block-size sequence. An averaging procedure is also proposed to improve the rate of convergence. Finally, practical illustrations are presented to highlight the performance of these algorithms in comparison to other online maximum likelihood procedures."]]></description>
<dc:subject>to:NB time_series markov_models em_algorithm estimation filtering state_estimation state-space_models statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4c3bb13a47a2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:filtering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state-space_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1207.5938">
    <title>[1207.5938] Convergent Stochastic Expectation Maximization algorithm with efficient sampling in high dimension. Application to deformable template model estimation</title>
    <dc:date>2012-08-24T13:41:31+00:00</dc:date>
    <link>http://arxiv.org/abs/1207.5938</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Estimation in the deformable template model is a big challenge in image analysis. The issue is to estimate an atlas of a population. This atlas contains a template and the corresponding geometrical variability of the observed shapes. The goal is to propose an accurate algorithm with low computational cost and with theoretical guaranties of relevance. This becomes very demanding when dealing with high dimensional data which is particularly the case of medical images. We propose to use an optimized Monte Carlo Markov Chain (MCMC) method into a stochastic Expectation Maximization (EM) algorithm in order to estimate the model parameters by maximizing the likelihood. We present a new Anisotropic Metropolis Adjusted Langevin Algorithm (AMALA) which is used as transition in the MCMC method. We first prove that this new sampler leads to a geometrically uniformly ergodic Markov chain. We prove also that under mild conditions, the estimated parameters converge almost surely and are asymptotically Gaussian distributed. The methodology developed is then tested on handwritten digits and some 2D and 3D medical images for the deformable model estimation. More widely, the proposed algorithm can be used for a large range of models in many field of applications such as pharmacology or genetic."]]></description>
<dc:subject>em_algorithm statistics machine_learning spatial_statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c534c5a5cf05/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spatial_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1207.4162">
    <title>[1207.4162] ARMA Time-Series Modeling with Graphical Models</title>
    <dc:date>2012-08-08T23:40:23+00:00</dc:date>
    <link>http://arxiv.org/abs/1207.4162</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We express the classic ARMA time-series model as a directed graphical model. In doing so, we find that the deterministic relationships in the model make it effectively impossible to use the EM algorithm for learning model parameters. To remedy this problem, we replace the deterministic relationships with Gaussian distributions having a small variance, yielding the stochastic ARMA (ARMA) model. This modification allows us to use the EM algorithm to learn parmeters and to forecast,even in situations where some data is missing. This modification, in conjunction with the graphicalmodel approach, also allows us to include cross predictors in situations where there are multiple times series and/or additional nontemporal covariates. More surprising,experiments suggest that the move to stochastic ARMA yields improved accuracy through better smoothing. We demonstrate improvements afforded by cross prediction and better smoothing on real data."]]></description>
<dc:subject>heard_the_talk time_series em_algorithm graphical_models to_teach:undergrad-ADA statistics in_NB have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1f5cda2e1f65/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.tandfonline.com/doi/abs/10.1080/10618600.2012.672115">
    <title>Monotonically Overrelaxed EM Algorithms - Journal of Computational and Graphical Statistics - Volume 21, Issue 2</title>
    <dc:date>2012-06-23T14:59:11+00:00</dc:date>
    <link>http://www.tandfonline.com/doi/abs/10.1080/10618600.2012.672115</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We explore the idea of overrelaxation for accelerating the expectation-maximization (EM) algorithm, focusing on preserving its simplicity and monotonic convergence properties. It is shown that in many cases, a trivial modification in the M-step results in an algorithm that maintains monotonic increase in the log-likelihood, but can have an appreciably faster convergence rate, especially when EM is very slow. The method is applicable to more general fixed point algorithms. Its simplicity and effectiveness are illustrated with several statistical problems, including probit regression, least absolute deviations regression, Poisson inverse problems, and finite mixtures. This article has supplemental materials available online."]]></description>
<dc:subject>to:NB em_algorithm computational_statistics statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ca1b21dcda6b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.4768">
    <title>[1206.4768] On Convergence Properties of the Monte Carlo EM Algorithm</title>
    <dc:date>2012-06-23T14:26:42+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.4768</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Expectation-Maximization (EM) algorithm (Dempster, Laird and Rubin, 1977) is a popular method for computing maximum likelihood estimates (MLEs) in problems with missing data. Each iteration of the al- gorithm formally consists of an E-step: evaluate the expected complete-data log-likelihood given the observed data, with expectation taken at current pa- rameter estimate; and an M-step: maximize the resulting expression to find the updated estimate. Conditions that guarantee convergence of the EM se- quence to a unique MLE were found by Boyles (1983) and Wu (1983). In complicated models for high-dimensional data, it is common to encounter an intractable integral in the E-step. The Monte Carlo EM algorithm of Wei and Tanner (1990) works around this difficulty by maximizing instead a Monte Carlo approximation to the appropriate conditional expectation. Convergence properties of Monte Carlo EM have been studied, most notably, by Chan and Ledolter (1995) and Fort and Moulines (2003). The goal of this review paper is to provide an accessible but rigorous in- troduction to the convergence properties of EM and Monte Carlo EM. No previous knowledge of the EM algorithm is assumed. We demonstrate the im- plementation of EM and Monte Carlo EM in two simple but realistic examples. We show that if the EM algorithm converges it converges to a stationary point of the likelihood, and that the rate of convergence is linear at best. For Monte Carlo EM we present a readable proof of the main result of Chan and Ledolter (1995), and state without proof the conclusions of Fort and Moulines (2003). An important practical implication of Fort and Moulines's (2003) result relates to the determination of Monte Carlo sample sizes in MCEM; we provide a brief review of the literature (Booth and Hobert, 1999; Caffo, Jank and Jones, 2005) on that problem."]]></description>
<dc:subject>em_algorithm monte_carlo statistics computational_statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0bbb1ae3f89c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:monte_carlo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1203.5181">
    <title>[1203.5181] $k$-MLE: A fast algorithm for learning statistical mixture models</title>
    <dc:date>2012-03-26T00:46:07+00:00</dc:date>
    <link>http://arxiv.org/abs/1203.5181</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We describe $k$-MLE, a fast and efficient local search algorithm for learning finite statistical mixtures of exponential families such as Gaussian mixture models. Mixture models are traditionally learned using the expectation-maximization (EM) soft clustering technique that monotonically increases the incomplete (expected complete) likelihood. Given prescribed mixture weights, the hard clustering $k$-MLE algorithm iteratively assigns data to the most likely weighted component and update the component models using Maximum Likelihood Estimators (MLEs). Using the duality between exponential families and Bregman divergences, we prove that the local convergence of the complete likelihood of $k$-MLE follows directly from the convergence of a dual additively weighted Bregman hard clustering. The inner loop of $k$-MLE can be implemented using any $k$-means heuristic like the celebrated Lloyd's batched or Hartigan's greedy swap updates. We then show how to update the mixture weights by minimizing a cross-entropy criterion that implies to update weights by taking the relative proportion of cluster points, and reiterate the mixture parameter update and mixture weight update processes until convergence. Hard EM is interpreted as a special case of $k$-MLE when both the component update and the weight update are performed successively in the inner loop. To initialize $k$-MLE, we propose $k$-MLE++, a careful initialization of $k$-MLE guaranteeing probabilistically a global bound on the best possible complete likelihood."]]></description>
<dc:subject>em_algorithm mixture_models statistics machine_learning clustering estimation in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d66ebc2993f6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0712.4273">
    <title>[0712.4273] Online EM Algorithm for Latent Data Models</title>
    <dc:date>2012-02-29T16:03:58+00:00</dc:date>
    <link>http://arxiv.org/abs/0712.4273</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this contribution, we propose a generic online (also sometimes called adaptive or recursive) version of the Expectation-Maximisation (EM) algorithm applicable to latent variable models of independent observations. Compared to the algorithm of Titterington (1984), this approach is more directly connected to the usual EM algorithm and does not rely on integration with respect to the complete data distribution. The resulting algorithm is usually simpler and is shown to achieve convergence to the stationary points of the Kullback-Leibler divergence between the marginal distribution of the observation and the model distribution at the optimal rate, i.e., that of the maximum likelihood estimator. In addition, the proposed approach is also suitable for conditional (or regression) models, as illustrated in the case of the mixture of linear regressions model."]]></description>
<dc:subject>to:NB statistics em_algorithm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:581d145ba3e6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.mitpressjournals.org/doi/abs/10.1162/neco.2008.10-06-351">
    <title>Online Learning with Hidden Markov Models</title>
    <dc:date>2012-02-21T04:20:08+00:00</dc:date>
    <link>http://www.mitpressjournals.org/doi/abs/10.1162/neco.2008.10-06-351</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present an online version of the expectation-maximization (EM) algorithm for hidden Markov models (HMMs). The sufficient statistics required for parameters estimation is computed recursively with time, that is, in an online way instead of using the batch forward-backward procedure. This computational scheme is generalized to the case where the model parameters can change with time by introducing a discount factor into the recurrence relations. The resulting algorithm is equivalent to the batch EM algorithm, for appropriate discount factor and scheduling of parameters update. On the other hand, the online algorithm is able to deal with dynamic environments, i.e., when the statistics of the observed data is changing with time. The implications of the online algorithm for probabilistic modeling in neuroscience are briefly discussed."]]></description>
<dc:subject>to:NB markov_models filtering state_estimation statistics em_algorithm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a8088c3cdd66/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:filtering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:state_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1201.5913">
    <title>[1201.5913] A Component-wise EM Algorithm for Mixtures</title>
    <dc:date>2012-02-01T14:21:36+00:00</dc:date>
    <link>http://arxiv.org/abs/1201.5913</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In some situations, EM algorithm shows slow convergence problems. One possible reason is that standard procedures update the parameters simultaneously. In this paper we focus on finite mixture estimation. In this framework, we propose a component-wise EM, which updates the parameters sequentially. We give an interpretation of this procedure as a proximal point algorithm and use it to prove the convergence. Illustrative numerical experiments show how our algorithm compares to EM and a version of the SAGE algorithm."

Huh, is this related to the way that updating partial response functions simultaneously in a GAM can be trouble, and it's better, as in standard backfitting, to update sequentially?]]></description>
<dc:subject>statistics em_algorithm mixture_models in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ef597d6eeebe/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1111.4954">
    <title>[1111.4954] Estimation for general birth-death processes</title>
    <dc:date>2011-11-23T12:54:56+00:00</dc:date>
    <link>http://arxiv.org/abs/1111.4954</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Birth-death processes (BDPs) are continuous-time Markov chains that track the number of "particles" in a system over time. While widely used in population biology, genetics and ecology, statistical inference of the instantaneous particle birth and death rates remains largely limited to restrictive linear BDPs in which per-particle birth and death rates are constant. Researchers often observe the number of particles at discrete times, necessitating data augmentation procedures such as expectation-maximization (EM) to find maximum likelihood estimates. The E-step in the EM algorithm is available in closed-form for some linear BDPs, but otherwise previous work has resorted to approximation or simulation. Remarkably, the E-step conditional expectations can also be expressed as convolutions of computable transition probabilities for any general BDP with arbitrary rates. This important observation, along with a convenient continued fraction representation of the Laplace transforms of the transition probabilities, allows novel and efficient computation of the conditional expectations for all BDPs, eliminating the need for approximation or costly simulation. We use this insight to derive EM algorithms that yield maximum likelihood estimation for general BDPs characterized by various rate models, including generalized linear models. We show that our Laplace convolution technique outperforms competing methods when available and demonstrate a technique to accelerate EM algorithm convergence. Finally, we validate our approach using synthetic data and then apply our methods to estimation of mutation parameters in microsatellite evolution."]]></description>
<dc:subject>to:NB statistics statistical_inference_for_stochastic_processes em_algorithm</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:29ce451c04f4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistical_inference_for_stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1104.3590">
    <title>[1104.3590] An efficient and principled method for detecting communities in networks</title>
    <dc:date>2011-04-22T23:47:47+00:00</dc:date>
    <link>http://arxiv.org/abs/1104.3590</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>community_discovery em_algorithm kith_and_kin newman.mark to_read</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:45a152930cd0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:newman.mark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.springerlink.com/content/y70g818n051643g8/">
    <title>A stable estimator of the information matrix under EM for dependent data</title>
    <dc:date>2010-12-19T15:12:18+00:00</dc:date>
    <link>http://www.springerlink.com/content/y70g818n051643g8/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This article develops a new and stable estimator for information matrix when the EM algorithm is used in maximum likelihood estimation. This estimator is constructed using the smoothed individual complete-data scores that are readily available from running the EM algorithm. The method works for dependent data sets and when the expectation step is an irregular function of the conditioning parameters."  (When I teach EM, I should say something about how to get uncertainty estimates...)
]]></description>
<dc:subject>fisher_information em_algorithm estimation statistics to_teach:data-mining to_teach:undergrad-ADA</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:796f192dea9d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:fisher_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.ssu/1272547280">
    <title>Melnykov, Maitra: Finite mixture models and model-based clustering</title>
    <dc:date>2010-04-30T02:10:23+00:00</dc:date>
    <link>http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.ssu/1272547280</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>clustering mixture_models statistics em_algorithm to_teach:data-mining to_teach:undergrad-ADA in_NB</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:39a5a28e16d8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.jstatsoft.org/v32/i06/">
    <title>mixtools: An R Package for Analyzing Mixture Models</title>
    <dc:date>2009-10-24T23:25:29+00:00</dc:date>
    <link>http://www.jstatsoft.org/v32/i06/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>R programming mixture_models statistics to_teach:data-mining have_read regression clustering em_algorithm to_teach:undergrad-ADA</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ac17f59676f3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:R"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0910.2034">
    <title>[0910.2034] Strategies for Online Inference of Model-Based Clustering in large Networks</title>
    <dc:date>2009-10-21T01:56:02+00:00</dc:date>
    <link>http://arxiv.org/abs/0910.2034</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Can't tell what they're actually doing (other than tweaking estimation procedures).  Read carefully.
]]></description>
<dc:subject>community_discovery network_data_analysis em_algorithm to_read re:stacs</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0618d2a4da35/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:stacs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.mitpressjournals.org/doi/abs/10.1162/neco.2008.02-07-478">
    <title>Spike Train Decoding without Spike Sorting (Ventura)</title>
    <dc:date>2008-02-24T04:17:40+00:00</dc:date>
    <link>http://www.mitpressjournals.org/doi/abs/10.1162/neco.2008.02-07-478</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Valerie's paper on neural decoding and tuning-curve estimation without spike-sorting. "[A] novel paradigm for spike train decoding, which avoids entirely spike sorting based on waveform measurements. This paradigm directly uses the spike train collected a
]]></description>
<dc:subject>em_algorithm neuroscience ventura.valerie have_read kith_and_kin neural_coding_and_decoding</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b431bb056750/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:em_algorithm"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neuroscience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ventura.valerie"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_coding_and_decoding"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>