<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (cshalizi)</title>
    <link>https://pinboard.in/u:cshalizi/public/</link>
    <description>recent bookmarks from cshalizi</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://arxiv.org/abs/2401.15800"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2309.10140"/>
	<rdf:li rdf:resource="https://projecteuclid.org/journals/annals-of-statistics/volume-49/issue-3/LASSO-driven-inference-in-time-and-space/10.1214/20-AOS2019.short"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1911.01850"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2101.01603"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.04171"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aoas/1608346892"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2011.12154"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2002.06060"/>
	<rdf:li rdf:resource="https://link.springer.com/article/10.1007/s11222-019-09914-9"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.14212"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.12327"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.00174"/>
	<rdf:li rdf:resource="https://www.tandfonline.com/doi/full/10.1080/01621459.2019.1654878"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1612.08468"/>
	<rdf:li rdf:resource="https://ieeexplore.ieee.org/document/8700269"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1507.03133"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1907.07384"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1906.01990"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1811.00645"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1905.10573"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1801.03896"/>
	<rdf:li rdf:resource="http://onlinelibrary.wiley.com/doi/10.1111/jtsa.12221/abstract"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/0906.4391"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1507.05315"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1406.0052"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1404.2007"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1403.7063"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1403.7023"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1403.4296"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1403.4544"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1401.8097"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1312.1706"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1312.1473"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1312.5556"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1310.4887"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1309.2068"/>
	<rdf:li rdf:resource="http://normaldeviate.wordpress.com/2013/07/27/the-steep-price-of-sparsity/"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1306.6557"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1306.5505"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1304.5678"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1304.5245"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0906.4391"/>
	<rdf:li rdf:resource="http://people.ee.duke.edu/~lcarin/OSCAR.pdf"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1208.2572"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1205.6843"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.2696"/>
	<rdf:li rdf:resource="http://www.tandfonline.com/doi/abs/10.1080/10618600.2012.679890"/>
	<rdf:li rdf:resource="http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2011.01034.x/abstract"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.4682"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.4680"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0801.1158"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1205.6761"/>
	<rdf:li rdf:resource="http://jmlr.csail.mit.edu/papers/v13/song12a.html"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1206.1024"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0805.1179"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1102.3616"/>
	<rdf:li rdf:resource="http://jmlr.csail.mit.edu/papers/v13/brown12a.html"/>
	<rdf:li rdf:resource="http://www.springerlink.com/content/582v131176130h06/"/>
	<rdf:li rdf:resource="http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2011.01005.x/abstract"/>
	<rdf:li rdf:resource="http://pubs.amstat.org/doi/abs/10.1198/jcgs.2011.09220"/>
	<rdf:li rdf:resource="http://www.springer.com/statistics/statistical+theory+and+methods/book/978-3-642-20191-2?cm_mmc=NBA-_-Jul-11_WEST_8259992-_-product-_-978-3-642-20191-2"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1106.5242"/>
	<rdf:li rdf:resource="http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aoas/1300715197"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1009.2302"/>
	<rdf:li rdf:resource="http://pubs.amstat.org/doi/abs/10.1198/jcgs.2010.07139"/>
	<rdf:li rdf:resource="http://jmlr.csail.mit.edu/papers/v11/aliferis10b.html"/>
	<rdf:li rdf:resource="http://jmlr.csail.mit.edu/papers/v11/aliferis10a.html"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0909.0844"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0909.1308"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://arxiv.org/abs/2401.15800">
    <title>[2401.15800] Statistical Significance of Feature Importance Rankings</title>
    <dc:date>2025-03-31T23:37:55+00:00</dc:date>
    <link>https://arxiv.org/abs/2401.15800</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Feature importance scores are ubiquitous tools for understanding the predictions of machine learning models. However, many popular attribution methods suffer from high instability due to random sampling. Leveraging novel ideas from hypothesis testing, we devise techniques that ensure the most important features are correct with high-probability guarantees. These assess the set of K top-ranked features, as well as the order of its elements. Given a set of local or global importance scores, we demonstrate how to retrospectively verify the stability of the highest ranks. We then introduce two efficient sampling algorithms that identify the K most important features, perhaps in order, with probability exceeding 1−α. The theoretical justification for these procedures is validated empirically on SHAP and LIME."]]></description>
<dc:subject>to:NB statistics variable_selection hooker.giles</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a0e052646ae3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hooker.giles"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2309.10140">
    <title>[2309.10140] A Geometric Framework for Neural Feature Learning</title>
    <dc:date>2023-12-08T14:18:57+00:00</dc:date>
    <link>https://arxiv.org/abs/2309.10140</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present a novel framework for learning system design based on neural feature extractors by exploiting geometric structures in feature spaces. First, we introduce the feature geometry, which unifies statistical dependence and features in the same functional space with geometric structures. By applying the feature geometry, we formulate each learning problem as solving the optimal feature approximation of the dependence component specified by the learning setting. We propose a nesting technique for designing learning algorithms to learn the optimal features from data samples, which can be applied to off-the-shelf network architectures and optimizers. To demonstrate the application of the nesting technique, we further discuss multivariate learning problems, including conditioned inference and multimodal learning, where we present the optimal features and reveal their connections to classical approaches."]]></description>
<dc:subject>to:NB information_geometry variable_selection neural_networks statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:47874c9b2e08/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/journals/annals-of-statistics/volume-49/issue-3/LASSO-driven-inference-in-time-and-space/10.1214/20-AOS2019.short">
    <title>LASSO-driven inference in time and space</title>
    <dc:date>2021-08-10T14:07:16+00:00</dc:date>
    <link>https://projecteuclid.org/journals/annals-of-statistics/volume-49/issue-3/LASSO-driven-inference-in-time-and-space/10.1214/20-AOS2019.short</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the estimation and inference in a system of high-dimensional regression equations allowing for temporal and cross-sectional dependency in covariates and error processes, covering rather general forms of weak temporal dependence. A sequence of regressions with many regressors using LASSO (Least Absolute Shrinkage and Selection Operator) is applied for variable selection purpose, and an overall penalty level is carefully chosen by a block multiplier bootstrap procedure to account for multiplicity of the equations and dependencies in the data. Correspondingly, oracle properties with a jointly selected tuning parameter are derived. We further provide high-quality de-biased simultaneous inference on the many target parameters of the system. We provide bootstrap consistency results of the test procedure, which are based on a general Bahadur representation for the Z-estimators with dependent data. Simulations demonstrate good performance of the proposed inference procedure. Finally, we apply the method to quantify spillover effects of textual sentiment indices in a financial market and to test the connectedness among sectors."]]></description>
<dc:subject>to:NB lasso sparsity regression time_series spatial_statistics variable_selection statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:86f9e897007b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spatial_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1911.01850">
    <title>[1911.01850] Stabilizing Variable Selection and Regression</title>
    <dc:date>2021-05-24T22:54:41+00:00</dc:date>
    <link>https://arxiv.org/abs/1911.01850</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider regression in which one predicts a response Y with a set of predictors X across different experiments or environments. This is a common setup in many data-driven scientific fields and we argue that statistical inference can benefit from an analysis that takes into account the distributional changes across environments. In particular, it is useful to distinguish between stable and unstable predictors, i.e., predictors which have a fixed or a changing functional dependence on the response, respectively. We introduce stabilized regression which explicitly enforces stability and thus improves generalization performance to previously unseen environments. Our work is motivated by an application in systems biology. Using multiomic data, we demonstrate how hypothesis generation about gene function can benefit from stabilized regression. We believe that a similar line of arguments for exploiting heterogeneity in data can be powerful for many other applications as well. We draw a theoretical connection between multi-environment regression and causal models, which allows to graphically characterize stable versus unstable functional dependence on the response. Formally, we introduce the notion of a stable blanket which is a subset of the predictors that lies between the direct causal predictors and the Markov blanket. We prove that this set is optimal in the sense that a regression based on these predictors minimizes the mean squared prediction error given that the resulting regression generalizes to unseen new environments."]]></description>
<dc:subject>to:NB variable_selection regression prediction statistics generalizability buhlmann.peter</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:92e2cf839bf8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:generalizability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:buhlmann.peter"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2101.01603">
    <title>[2101.01603] Comparing methods addressing multi-collinearity when developing prediction models</title>
    <dc:date>2021-01-06T16:51:35+00:00</dc:date>
    <link>https://arxiv.org/abs/2101.01603</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Clinical prediction models are developed widely across medical disciplines. When predictors in such models are highly collinear, unexpected or spurious predictor-outcome associations may occur, thereby potentially reducing face-validity and explainability of the prediction model. Collinearity can be dealt with by exclusion of collinear predictors, but when there is no a priori motivation (besides collinearity) to include or exclude specific predictors, such an approach is arbitrary and possibly inappropriate. We compare different methods to address collinearity, including shrinkage, dimensionality reduction, and constrained optimization. The effectiveness of these methods is illustrated via simulations. In the conducted simulations, no effect of collinearity was observed on predictive outcomes. However, a negative effect of collinearity on the stability of predictor selection was found, affecting all compared methods, but in particular methods that perform strong predictor selection (e.g., Lasso).}"

--- _Of course_ collinearity doesn't matter for predictions!!!]]></description>
<dc:subject>to:NB linear_regression variable_selection re:TALR</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:91c2d6d885ca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:linear_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:TALR"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.04171">
    <title>[2012.04171] Sparse encoding for more-interpretable feature-selecting representations in probabilistic matrix factorization</title>
    <dc:date>2021-01-03T19:48:45+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.04171</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Dimensionality reduction methods for count data are critical to a wide range of applications in medical informatics and other fields where model interpretability is paramount. For such data, hierarchical Poisson matrix factorization (HPF) and other sparse probabilistic non-negative matrix factorization (NMF) methods are considered to be interpretable generative models. They consist of sparse transformations for decoding their learned representations into predictions. However, sparsity in representation decoding does not necessarily imply sparsity in the encoding of representations from the original data features. HPF is often incorrectly interpreted in the literature as if it possesses encoder sparsity. The distinction between decoder sparsity and encoder sparsity is subtle but important. Due to the lack of encoder sparsity, HPF does not possess the column-clustering property of classical NMF -- the factor loading matrix does not sufficiently define how each factor is formed from the original features. We address this deficiency by self-consistently enforcing encoder sparsity, using a generalized additive model (GAM), thereby allowing one to relate each representation coordinate to a subset of the original data features. In doing so, the method also gains the ability to perform feature selection. We demonstrate our method on simulated data and give an example of how encoder sparsity is of practical use in a concrete application of representing inpatient comorbidities in Medicare patients."]]></description>
<dc:subject>to:NB variable_selection sparsity factor_analysis additive_models statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c88603e68cdb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:factor_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:additive_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aoas/1608346892">
    <title>Baker , Tang , Allen : Feature selection for data integration with mixed multiview data</title>
    <dc:date>2020-12-19T16:42:14+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aoas/1608346892</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Data integration methods that analyze multiple sources of data simultaneously can often provide more holistic insights than can separate inquiries of each data source. Motivated by the advantages of data integration in the era of “big data,” we investigate feature selection for high-dimensional multiview data with mixed data types (e.g., continuous, binary, count-valued). This heterogeneity of multiview data poses numerous challenges for existing feature selection methods. However, after critically examining these issues through empirical and theoretically-guided lenses, we develop a practical solution, the Block Randomized Adaptive Iterative Lasso (B-RAIL) which combines the strengths of the randomized Lasso, adaptive weighting schemes and stability selection. B-RAIL serves as a versatile data integration method for sparse regression and graph selection, and we demonstrate the effectiveness of B-RAIL through extensive simulations and a case study to infer the ovarian cancer gene regulatory network. In this case study, B-RAIL successfully identifies well-known biomarkers associated with ovarian cancer and hints at novel candidates for future ovarian cancer research."]]></description>
<dc:subject>to:NB variable_selection lasso sparsity statistics allen.genevera_i.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c18ec82da65c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:allen.genevera_i."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2011.12154">
    <title>[2011.12154] Identifying important predictors in large data bases -- multiple testing and model selection</title>
    <dc:date>2020-11-25T14:26:20+00:00</dc:date>
    <link>https://arxiv.org/abs/2011.12154</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This is a chapter of the forthcoming Handbook of Multiple Testing. We consider a variety of model selection strategies in a high-dimensional setting, where the number of potential predictors p is large compared to the number of available observations n. In particular modifications of information criteria which are suitable in case of p > n are introduced and compared with a variety of penalized likelihood methods, in particular SLOPE and SLOBE. The focus is on methods which control the FDR in terms of model identification. Theoretical results are provided both with respect to model identification and prediction and various simulation results are presented which illustrate the performance of the different methods in different situations."]]></description>
<dc:subject>to:NB multiple_testing variable_selection high-dimensional_statistics statistics regression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:04b56518e578/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:multiple_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2002.06060">
    <title>[2002.06060] Causality in cognitive neuroscience: concepts, challenges, and distributional robustness</title>
    <dc:date>2020-08-20T15:50:02+00:00</dc:date>
    <link>https://arxiv.org/abs/2002.06060</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["While probabilistic models describe the dependence structure between observed variables, causal models go one step further: they predict, for example, how cognitive functions are affected by external interventions that perturb neuronal activity. In this review and perspective article, we introduce the concept of causality in the context of cognitive neuroscience and review existing methods for inferring causal relationships from data. Causal inference is an ambitious task that is particularly challenging in cognitive neuroscience. We discuss two difficulties in more detail: the scarcity of interventional data and the challenge of finding the right variables. We argue for distributional robustness as a guiding principle to tackle these problems. Robustness (or invariance) is a fundamental principle underlying causal methodology. A causal model of a target variable generalises across environments or subjects as long as these environments leave the causal mechanisms intact. Consequently, if a candidate model does not generalise, then either it does not consist of the target variable's causes or the underlying variables do not represent the correct granularity of the problem. In this sense, assessing generalisability may be useful when defining relevant variables and can be used to partially compensate for the lack of interventional data."]]></description>
<dc:subject>to:NB to_read causal_inference statistics variable_selection neuroscience peters.jonas methodological_advice</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4b63620b847f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neuroscience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:peters.jonas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:methodological_advice"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://link.springer.com/article/10.1007/s11222-019-09914-9">
    <title>High-dimensional regression in practice: an empirical study of finite-sample prediction, variable selection and ranking | SpringerLink</title>
    <dc:date>2020-02-23T15:45:43+00:00</dc:date>
    <link>https://link.springer.com/article/10.1007/s11222-019-09914-9</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Penalized likelihood approaches are widely used for high-dimensional regression. Although many methods have been proposed and the associated theory is now well developed, the relative efficacy of different approaches in finite-sample settings, as encountered in practice, remains incompletely understood. There is therefore a need for empirical investigations in this area that can offer practical insight and guidance to users. In this paper, we present a large-scale comparison of penalized regression methods. We distinguish between three related goals: prediction, variable selection and variable ranking. Our results span more than 2300 data-generating scenarios, including both synthetic and semisynthetic data (real covariates and simulated responses), allowing us to systematically consider the influence of various factors (sample size, dimensionality, sparsity, signal strength and multicollinearity). We consider several widely used approaches (Lasso, Adaptive Lasso, Elastic Net, Ridge Regression, SCAD, the Dantzig Selector and Stability Selection). We find considerable variation in performance between methods. Our results support a “no panacea” view, with no unambiguous winner across all scenarios or goals, even in this restricted setting where all data align well with the assumptions underlying the methods. The study allows us to make some recommendations as to which approaches may be most (or least) suitable given the goal and some data characteristics. Our empirical results complement existing theory and provide a resource to compare methods across a range of scenarios and metrics."]]></description>
<dc:subject>to:NB regression prediction statistics high-dimensional_statistics lasso to_teach:linear_models re:TALR variable_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0959893a9ed3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:linear_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:TALR"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.14212">
    <title>[1910.14212] Sobolev Independence Criterion</title>
    <dc:date>2019-11-11T20:03:54+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.14212</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose the Sobolev Independence Criterion (SIC), an interpretable dependency measure between a high dimensional random variable X and a response variable Y . SIC decomposes to the sum of feature importance scores and hence can be used for nonlinear feature selection. SIC can be seen as a gradient regularized Integral Probability Metric (IPM) between the joint distribution of the two random variables and the product of their marginals. We use sparsity inducing gradient penalties to promote input sparsity of the critic of the IPM. In the kernel version we show that SIC can be cast as a convex optimization problem by introducing auxiliary variables that play an important role in feature selection as they are normalized feature importance scores. We then present a neural version of SIC where the critic is parameterized as a homogeneous neural network, improving its representation power as well as its interpretability. We conduct experiments validating SIC for feature selection in synthetic and real-world experiments. We show that SIC enables reliable and interpretable discoveries, when used in conjunction with the holdout randomization test and knockoffs to control the False Discovery Rate. Code is available at this http URL."]]></description>
<dc:subject>to:NB dependence_measures statistics variable_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:62d8f23fed2e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.12327">
    <title>[1910.12327] A simple measure of conditional dependence</title>
    <dc:date>2019-10-29T02:24:47+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.12327</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a coefficient of conditional dependence between two random variables Y and Z given a set of other variables X1,…,Xp, based on an i.i.d. sample. The coefficient has a long list of desirable properties, the most important of which is that under absolutely no distributional assumptions, it converges to a limit in [0,1], where the limit is 0 if and only if Y and Z are conditionally independent given X1,…,Xp, and is 1 if and only if Y is equal to a measurable function of Z given X1,…,Xp. Using this statistic, we devise a new variable selection algorithm, called Feature Ordering by Conditional Independence (FOCI), which is model-free, has no tuning parameters, and is provably consistent under sparsity assumptions. A number of applications to synthetic and real datasets are worked out."]]></description>
<dc:subject>to:NB dependence_measures statistics variable_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:aa7ecdf18065/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.00174">
    <title>[1910.00174] Randomized Ablation Feature Importance</title>
    <dc:date>2019-10-02T15:21:03+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.00174</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Given a model f that predicts a target y from a vector of input features xx=x1,x2,…,xM, we seek to measure the importance of each feature with respect to the model's ability to make a good prediction. To this end, we consider how (on average) some measure of goodness or badness of prediction (which we term ``loss'' ℓ), changes when we hide or \emph{ablate} each feature from the model. To ablate a feature, we replace its value with another possible value randomly. By averaging over many points and many possible replacements, we measure the importance of a feature on the model's ability to make good predictions. Furthermore, we present statistical measures of uncertainty that quantify how confident we are that the feature importance we measure from our finite dataset and finite number of ablations is close to the theoretical true importance value."]]></description>
<dc:subject>to:NB prediction variable_selection statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2e63850756a8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.tandfonline.com/doi/full/10.1080/01621459.2019.1654878">
    <title>IPAD: Stable Interpretable Forecasting with Knockoffs Inference: Journal of the American Statistical Association: Vol 0, No 0</title>
    <dc:date>2019-09-18T12:36:47+00:00</dc:date>
    <link>https://www.tandfonline.com/doi/full/10.1080/01621459.2019.1654878</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Interpretability and stability are two important features that are desired in many contemporary big data applications arising in statistics, economics, and finance. While the former is enjoyed to some extent by many existing forecasting approaches, the latter in the sense of controlling the fraction of wrongly discovered features which can enhance greatly the interpretability is still largely underdeveloped. To this end, in this article, we exploit the general framework of model-X knockoffs introduced recently in Candès, Fan, Janson and Lv [(2018Candès, E. J., Fan, Y., Janson, L., and Lv, J. (2018), “Panning for Gold: ‘Model X’ Knockoffs for High Dimensional Controlled Variable Selection,” Journal of the Royal Statistical Society, Series B, 80, 551–577. DOI:10.1111/rssb.12265. [Crossref] , [Google Scholar]), “Panning for Gold: ‘model X’ Knockoffs for High Dimensional Controlled Variable Selection,” Journal of the Royal Statistical Society, Series B, 80, 551–577], which is nonconventional for reproducible large-scale inference in that the framework is completely free of the use of p-values for significance testing, and suggest a new method of intertwined probabilistic factors decoupling (IPAD) for stable interpretable forecasting with knockoffs inference in high-dimensional models. The recipe of the method is constructing the knockoff variables by assuming a latent factor model that is exploited widely in economics and finance for the association structure of covariates. Our method and work are distinct from the existing literature in which we estimate the covariate distribution from data instead of assuming that it is known when constructing the knockoff variables, our procedure does not require any sample splitting, we provide theoretical justifications on the asymptotic false discovery rate control, and the theory for the power analysis is also established. Several simulation examples and the real data analysis further demonstrate that the newly suggested method has appealing finite-sample performance with desired interpretability and stability compared to some popularly used forecasting methods."]]></description>
<dc:subject>to:NB statistics regression high-dimensional_statistics factor_analysis variable_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c602c00b2afc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:factor_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1612.08468">
    <title>[1612.08468] Visualizing the Effects of Predictor Variables in Black Box Supervised Learning Models</title>
    <dc:date>2019-08-21T13:12:57+00:00</dc:date>
    <link>https://arxiv.org/abs/1612.08468</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["When fitting black box supervised learning models (e.g., complex trees, neural networks, boosted trees, random forests, nearest neighbors, local kernel-weighted methods, etc.), visualizing the main effects of the individual predictor variables and their low-order interaction effects is often important, and partial dependence (PD) plots are the most popular approach for accomplishing this. However, PD plots involve a serious pitfall if the predictor variables are far from independent, which is quite common with large observational data sets. Namely, PD plots require extrapolation of the response at predictor values that are far outside the multivariate envelope of the training data, which can render the PD plots unreliable. Although marginal plots (M plots) do not require such extrapolation, they produce substantially biased and misleading results when the predictors are dependent, analogous to the omitted variable bias in regression. We present a new visualization approach that we term accumulated local effects (ALE) plots, which inherits the desirable characteristics of PD and M plots, without inheriting their preceding shortcomings. Like M plots, ALE plots do not require extrapolation; and like PD plots, they are not biased by the omitted variable phenomenon. Moreover, ALE plots are far less computationally expensive than PD plots."]]></description>
<dc:subject>to:NB variable_selection visual_display_of_quantitative_information statistics regression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9eb5cd7d3c48/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:visual_display_of_quantitative_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://ieeexplore.ieee.org/document/8700269">
    <title>High-Dimensional Adaptive Minimax Sparse Estimation With Interactions - IEEE Journals &amp; Magazine</title>
    <dc:date>2019-08-20T15:51:54+00:00</dc:date>
    <link>https://ieeexplore.ieee.org/document/8700269</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["High-dimensional linear regression with interaction effects is broadly applied in research fields such as bioinformatics and social science. In this paper, first, we investigate the minimax rate of convergence for regression estimation in high-dimensional sparse linear models with two-way interactions. Here, we derive matching upper and lower bounds under three types of heredity conditions: strong heredity, weak heredity, and no heredity. From the results: 1) A stronger heredity condition may or may not drastically improve the minimax rate of convergence. In fact, in some situations, the minimax rates of convergence are the same under all three heredity conditions; 2) The minimax rate of convergence is determined by the maximum of the total price of estimating the main effects and that of estimating the interaction effects, which goes beyond purely comparing the order of the number of non-zero main effects r1 and non-zero interaction effects r2 ; and 3) Under any of the three heredity conditions, the estimation of the interaction terms may be the dominant part in determining the rate of convergence. This is due to either the dominant number of interaction effects over main effects or the higher interaction estimation price induced by a large ambient dimension. Second, we construct an adaptive estimator that achieves the minimax rate of convergence regardless of the true heredity condition and the sparsity indices r1,r2 ."]]></description>
<dc:subject>to:NB statistics high-dimensional_statistics regression sparsity variable_selection linear_regression</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e3bf750aafd7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:linear_regression"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1507.03133">
    <title>[1507.03133] Best Subset Selection via a Modern Optimization Lens</title>
    <dc:date>2019-08-20T14:48:29+00:00</dc:date>
    <link>https://arxiv.org/abs/1507.03133</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In the last twenty-five years (1990-2014), algorithmic advances in integer optimization combined with hardware improvements have resulted in an astonishing 200 billion factor speedup in solving Mixed Integer Optimization (MIO) problems. We present a MIO approach for solving the classical best subset selection problem of choosing k out of p features in linear regression given n observations. We develop a discrete extension of modern first order continuous optimization methods to find high quality feasible solutions that we use as warm starts to a MIO solver that finds provably optimal solutions. The resulting algorithm (a) provides a solution with a guarantee on its suboptimality even if we terminate the algorithm early, (b) can accommodate side constraints on the coefficients of the linear regression and (c) extends to finding best subset solutions for the least absolute deviation loss function. Using a wide variety of synthetic and real datasets, we demonstrate that our approach solves problems with n in the 1000s and p in the 100s in minutes to provable optimality, and finds near optimal solutions for n in the 100s and p in the 1000s in minutes. We also establish via numerical experiments that the MIO approach performs better than {\texttt {Lasso}} and other popularly used sparse learning procedures, in terms of achieving sparse solutions with good predictive power."]]></description>
<dc:subject>to:NB optimization variable_selection statistics via:tslumley</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cd5299ec8dca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:optimization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:tslumley"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1907.07384">
    <title>[1907.07384] Feature Selection via Mutual Information: New Theoretical Insights</title>
    <dc:date>2019-07-18T23:27:29+00:00</dc:date>
    <link>https://arxiv.org/abs/1907.07384</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Mutual information has been successfully adopted in filter feature-selection methods to assess both the relevancy of a subset of features in predicting the target variable and the redundancy with respect to other variables. However, existing algorithms are mostly heuristic and do not offer any guarantee on the proposed solution. In this paper, we provide novel theoretical results showing that conditional mutual information naturally arises when bounding the ideal regression/classification errors achieved by different subsets of features. Leveraging on these insights, we propose a novel stopping condition for backward and forward greedy methods which ensures that the ideal prediction error using the selected feature subset remains bounded by a user-specified threshold. We provide numerical simulations to support our theoretical claims and compare to common heuristic methods."]]></description>
<dc:subject>variable_selection information_theory statistics to_teach:data-mining in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1e47f8cec091/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1906.01990">
    <title>[1906.01990] A Model-free Approach to Linear Least Squares Regression with Exact Probabilities and Applications to Covariate Selection</title>
    <dc:date>2019-06-06T13:45:37+00:00</dc:date>
    <link>https://arxiv.org/abs/1906.01990</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The classical model for linear regression is ${\mathbold Y}={\mathbold x}{\mathbold \beta} +\sigma{\mathbold \varepsilon}$ with i.i.d. standard Gaussian errors. Much of the resulting statistical inference is based on Fisher's F-distribution. In this paper we give two approaches to least squares regression which are model free. The results hold forall data $({\mathbold y},{\mathbold x})$. The derived probabilities are not only exact, they agree with those using the F-distribution based on the classical model. This is achieved by replacing questions about the size of βj, for example βj=0, by questions about the degree to which the covariate ${\mathbold x}_j$ is better than Gaussian white noise or, alternatively, a random orthogonal rotation of ${\mathbold x}_j$. The idea can be extended to choice of covariates, post selection inference PoSI, step-wise choice of covariates, the determination of dependency graphs and to robust regression and non-linear regression. In the latter two cases the probabilities are no longer exact but are based on the chi-squared distribution. The step-wise choice of covariates is of particular interest: it is a very simple, very fast, very powerful, it controls the number of false positives and does not over fit even in the case where the number of covariates far exceeds the sample size"]]></description>
<dc:subject>linear_regression regression statistics variable_selection in_NB color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:bb93af1c2633/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:linear_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1811.00645">
    <title>[1811.00645] The Holdout Randomization Test: Principled and Easy Black Box Feature Selection</title>
    <dc:date>2019-05-30T16:04:56+00:00</dc:date>
    <link>https://arxiv.org/abs/1811.00645</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the problem of feature selection using black box predictive models. For example, high-throughput devices in science are routinely used to gather thousands of features for each sample in an experiment. The scientist must then sift through the many candidate features to find explanatory signals in the data, such as which genes are associated with sensitivity to a prospective therapy. Often, predictive models are used for this task: the model is fit, error on held out data is measured, and strong performing models are assumed to have discovered some fundamental properties of the system. A model-specific heuristic is then used to inspect the model parameters and rank important features, with top features reported as "discoveries." However, such heuristics provide no statistical guarantees and can produce unreliable results. We propose the holdout randomization test (HRT) as a principled approach to feature selection using black box predictive models. The HRT is model agnostic and produces a valid p-value for each feature, enabling control over the false discovery rate (or Type I error) for any predictive model. Further, the HRT is computationally efficient and, in simulations, has greater power than a competing knockoffs-based approach."]]></description>
<dc:subject>cross-validation variable_selection statistics blei.david have_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e4ed13c6dd3c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:blei.david"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1905.10573">
    <title>[1905.10573] Selective inference after variable selection via multiscale bootstrap</title>
    <dc:date>2019-05-28T16:46:26+00:00</dc:date>
    <link>https://arxiv.org/abs/1905.10573</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A general resampling approach is considered for selective inference problem after variable selection in regression analysis. Even after variable selection, it is important to know whether the selected variables are actually useful by showing p-values and confidence intervals of regression coefficients. In the classical approach, significance levels for the selected variables are usually computed by t-test but they are subject to selection bias. In order to adjust the bias in this post-selection inference, most existing studies of selective inference consider the specific variable selection algorithm such as Lasso for which the selection event can be explicitly represented as a simple region in the space of the response variable. Thus, the existing approach cannot handle more complicated algorithm such as MCP (minimax concave penalty). Moreover, most existing approaches set an event, that a specific model is selected, as the selection event. This selection event is too restrictive and may reduce the statistical power, because the hypothesis selection with a specific variable only depends on whether the variable is selected or not. In this study, we consider more appropriate selection event such that the variable is selected, and propose a new bootstrap method to compute an approximately unbiased selective p-value for the selected variable. Our method is applicable to a wide class of variable selection algorithms. In addition, the computational cost of our method is the same order as the classical bootstrap method. Through the numerical experiments, we show the usefulness of our selective inference approach."

--- As always, why not just use data-splitting?  (They may have an answer.)]]></description>
<dc:subject>variable_selection post-selection_inference statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:801500782dac/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:post-selection_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1801.03896">
    <title>[1801.03896] Robust inference with knockoffs</title>
    <dc:date>2018-09-13T16:38:52+00:00</dc:date>
    <link>https://arxiv.org/abs/1801.03896</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the variable selection problem, which seeks to identify important variables influencing a response Y out of many candidate features X1,…,Xp. We wish to do so while offering finite-sample guarantees about the fraction of false positives - selected variables Xj that in fact have no effect on Y after the other features are known. When the number of features p is large (perhaps even larger than the sample size n), and we have no prior knowledge regarding the type of dependence between Y and X, the model-X knockoffs framework nonetheless allows us to select a model with a guaranteed bound on the false discovery rate, as long as the distribution of the feature vector X=(X1,…,Xp) is exactly known. This model selection procedure operates by constructing "knockoff copies'" of each of the p features, which are then used as a control group to ensure that the model selection algorithm is not choosing too many irrelevant features. In this work, we study the practical setting where the distribution of X could only be estimated, rather than known exactly, and the knockoff copies of the Xj's are therefore constructed somewhat incorrectly. Our results, which are free of any modeling assumption whatsoever, show that the resulting model selection procedure incurs an inflation of the false discovery rate that is proportional to our errors in estimating the distribution of each feature Xj conditional on the remaining features {Xk:k≠j}. The model-X knockoff framework is therefore robust to errors in the underlying assumptions on the distribution of X, making it an effective method for many practical applications, such as genome-wide association studies, where the underlying distribution on the features X1,…,Xp is estimated accurately but not known exactly."]]></description>
<dc:subject>regression variable_selection statistics samworth.richard_j. knockoffs to_teach:linear_models in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:888d5db086e8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:samworth.richard_j."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:knockoffs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:linear_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://onlinelibrary.wiley.com/doi/10.1111/jtsa.12221/abstract">
    <title>Oracle M-Estimation for Time Series Models - Giurcanu - 2016 - Journal of Time Series Analysis - Wiley Online Library</title>
    <dc:date>2017-04-04T13:16:54+00:00</dc:date>
    <link>http://onlinelibrary.wiley.com/doi/10.1111/jtsa.12221/abstract</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a thresholding M-estimator for multivariate time series. Our proposed estimator has the oracle property that its large-sample properties are the same as of the classical M-estimator obtained under the a priori information that the zero parameters were known. We study the consistency of the standard block bootstrap, the centred block bootstrap and the empirical likelihood block bootstrap distributions of the proposed M-estimator. We develop automatic selection procedures for the thresholding parameter and for the block length of the bootstrap methods. We present the results of a simulation study of the proposed methods for a sparse vector autoregressive VAR(2) time series model. The analysis of two real-world data sets illustrate applications of the methods in practice."]]></description>
<dc:subject>bootstrap time_series statistics estimation in_NB sparsity variable_selection high-dimensional_statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7728d02c1d9a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bootstrap"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/0906.4391">
    <title>[0906.4391] KNIFE: Kernel Iterative Feature Extraction</title>
    <dc:date>2016-11-30T02:04:49+00:00</dc:date>
    <link>https://arxiv.org/abs/0906.4391</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Selecting important features in non-linear or kernel spaces is a difficult challenge in both classification and regression problems. When many of the features are irrelevant, kernel methods such as the support vector machine and kernel ridge regression can sometimes perform poorly. We propose weighting the features within a kernel with a sparse set of weights that are estimated in conjunction with the original classification or regression problem. The iterative algorithm, KNIFE, alternates between finding the coefficients of the original problem and finding the feature weights through kernel linearization. In addition, a slight modification of KNIFE yields an efficient algorithm for finding feature regularization paths, or the paths of each feature's weight. Simulation results demonstrate the utility of KNIFE for both kernel regression and support vector machines with a variety of kernels. Feature path realizations also reveal important non-linear correlations among features that prove useful in determining a subset of significant variables. Results on vowel recognition data, Parkinson's disease data, and microarray data are also given."]]></description>
<dc:subject>statistics regression variable_selection data_mining to_teach:data-mining kernel_methods in_NB heard_the_talk</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:061ce2697602/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1507.05315">
    <title>[1507.05315] Confidence Sets Based on the Lasso Estimator</title>
    <dc:date>2015-08-05T15:13:50+00:00</dc:date>
    <link>http://arxiv.org/abs/1507.05315</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In a linear regression model with fixed dimension, we construct confidence sets for the unknown parameter vector based on the Lasso estimator in finite samples as well as in an asymptotic setup, thereby quantifying estimation uncertainty of this estimator. In finite samples with Gaussian errors and asymptotically in the case where the Lasso estimator is tuned to perform conservative model-selection, we derive formulas for computing the minimal coverage probability over the entire parameter space for a large class of shapes for the confidence sets, thus enabling the construction of valid confidence sets based on the Lasso estimator in these settings. The choice of shape for the confidence sets and comparison with the confidence ellipse based on the least-squares estimator is also discussed. Moreover, in the case where the Lasso estimator is tuned to enable consistent model-selection, we give a simple confidence set with minimal coverage probability converging to one."]]></description>
<dc:subject>lasso regression confidence_sets model_selection variable_selection statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:800bb38e54a8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:confidence_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1406.0052">
    <title>[1406.0052] Variable selection in high-dimensional additive models based on norms of projections</title>
    <dc:date>2014-07-12T00:22:46+00:00</dc:date>
    <link>http://arxiv.org/abs/1406.0052</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the problem of variable selection in high-dimensional sparse additive models. The proposed method is motivated by geometric considerations in Hilbert spaces, and consists in comparing the norms of the projections of the data on various additive subspaces. Our main results are concentration inequalities which lead to conditions making variable selection possible. In special cases these conditions are known to be optimal. As an application we consider the problem of estimating single components. We show that, up to first order, one can estimate a single component as well as if the other components were known."]]></description>
<dc:subject>additive_models variable_selection hilbert_space statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5e6c4de3a03e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:additive_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hilbert_space"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1404.2007">
    <title>[1404.2007] A Permutation Approach for Selecting the Penalty Parameter in Penalized Model Selection</title>
    <dc:date>2014-04-20T18:12:43+00:00</dc:date>
    <link>http://arxiv.org/abs/1404.2007</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We describe a simple, efficient, permutation based procedure for selecting the penalty parameter in the LASSO. The procedure, which is intended for applications where variable selection is the primary focus, can be applied in a variety of structural settings, including generalized linear models. We briefly discuss connections between permutation selection and existing theory for the LASSO. In addition, we present a simulation study and an analysis of three real data sets in which permutation selection is compared with cross-validation (CV), the Bayesian information criterion (BIC), and a selection method based on recently developed testing procedures for the LASSO."]]></description>
<dc:subject>variable_selection model_selection lasso high-dimensional_statistics nobel.andrew statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f3a8dfd23a3c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nobel.andrew"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1403.7063">
    <title>[1403.7063] A Significance Test for Covariates in Nonparametric Regression</title>
    <dc:date>2014-04-03T18:29:39+00:00</dc:date>
    <link>http://arxiv.org/abs/1403.7063</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider testing the significance of a subset of covariates in a nonparametric regression. These covariates can be continuous and/or discrete. We propose a new kernel-based test that smoothes only over the covariates appearing under the null hypothesis, so that the curse of dimensionality is mitigated. The test statistic is asymptotically pivotal and the rate of which the test detects local alternatives depends only on the dimension of the covariates under the null hypothesis. We show the validity of wild bootstrap for the test. In small samples, our test is competitive compared to existing procedures."]]></description>
<dc:subject>variable_selection hypothesis_testing statistics nonparametrics regression to_teach:undergrad-ADA in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d2e07646c1a5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1403.7023">
    <title>[1403.7023] Worst possible sub-directions in high-dimensional models</title>
    <dc:date>2014-04-01T21:19:01+00:00</dc:date>
    <link>http://arxiv.org/abs/1403.7023</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We examine the rate of convergence of the Lasso estimator of lower dimensional components of the high-dimensional parameter. Under bounds on the ℓ1-norm on the worst possible sub-direction these rates are of order |J|logp/n‾‾‾‾‾‾‾‾‾√ where p is the total number of parameters, J⊂{1,…,p} represents a subset of the parameters and n is the number of observations. We also derive rates in sup-norm in terms of the rate of convergence in ℓ1-norm. The irrepresentable condition on a set J requires that the ℓ1-norm of the worst possible sub-direction is sufficiently smaller than one. In that case sharp oracle results can be obtained. Moreover, if the coefficients in J are small enough the Lasso will put these coefficients to zero. This extends known results which say that the irrepresentable condition on the inactive set (the set where coefficients are exactly zero) implies no false positives. We further show that by de-sparsifying one obtains fast rates in supremum norm without conditions on the worst possible sub-direction. The main assumption here is that approximate sparsity is of order o(n‾‾√/logp). The results are extended to M-estimation with ℓ1-penalty for generalized linear models and exponential families for example. For the graphical Lasso this leads to an extension of known results to the case where the precision matrix is only approximately sparse. The bounds we provide are non-asymptotic but we also present asymptotic formulations for ease of interpretation."]]></description>
<dc:subject>to:NB high-dimensional_statistics lasso sparsity variable_selection statistics van_de_geer.sara</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d88ae59edb90/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:van_de_geer.sara"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1403.4296">
    <title>[1403.4296] Inference for feature selection using the Lasso with high-dimensional data</title>
    <dc:date>2014-03-22T19:27:24+00:00</dc:date>
    <link>http://arxiv.org/abs/1403.4296</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Penalized regression models such as the Lasso have proved useful for variable selection in many fields - especially for situations with high-dimensional data where the numbers of predictors far exceeds the number of observations. These methods identify and rank variables of importance but do not generally provide any inference of the selected variables. Thus, the variables selected might be the "most important" but need not be significant. We propose a significance test for the selection found by the Lasso. We introduce a procedure that computes inference and p-values for features chosen by the Lasso. This method rephrases the null hypothesis and uses a randomization approach which ensures that the error rate is controlled even for small samples. We demonstrate the ability of the algorithm to compute p-values of the expected magnitude with simulated data using a multitude of scenarios that involve various effects strengths and correlation between predictors. The algorithm is also applied to a prostate cancer dataset that has been analyzed in recent papers on the subject. The proposed method is found to provide a powerful way to make inference for feature selection even for small samples and when the number of predictors are several orders of magnitude larger than the number of observations. The algorithm is implemented in the MESS package in R and is freely available."]]></description>
<dc:subject>lasso regression variable_selection re:what_is_the_right_null_model_for_linear_regression high-dimensional_statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5559a8fb3f09/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:what_is_the_right_null_model_for_linear_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1403.4544">
    <title>[1403.4544] On the Sensitivity of the Lasso to the Number of Predictor Variables</title>
    <dc:date>2014-03-21T15:52:20+00:00</dc:date>
    <link>http://arxiv.org/abs/1403.4544</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Lasso is a computationally efficient procedure that can produce sparse estimators when the number of predictors (p) is large. Oracle inequalities provide probability loss bounds for the Lasso estimator at a deterministic choice of the regularization parameter. These bounds tend to zero if p is appropriately controlled, and are thus commonly cited as theoretical justification for the Lasso and its ability to handle high-dimensional settings. Unfortunately, in practice the regularization parameter is not selected to be a deterministic quantity, but is instead chosen using a random, data-dependent procedure. To address this shortcoming of previous theoretical work, we study the loss of the Lasso estimator when tuned optimally for prediction. Assuming orthonormal predictors and a sparse true model, we prove that the probability that the best possible predictive performance of the Lasso deteriorates as p increases can be arbitrarily close to one given a sufficiently high signal to noise ratio and sufficiently large p. We further demonstrate empirically that the deterioration in performance can be far worse than is commonly suggested in the literature and provide a real data example where deterioration is observed."]]></description>
<dc:subject>lasso regression variable_selection high-dimensional_statistics cross-validation statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c0b013c52c01/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1401.8097">
    <title>[1401.8097] An Algorithm for Nonlinear, Nonparametric Model Choice and Prediction</title>
    <dc:date>2014-02-03T20:29:28+00:00</dc:date>
    <link>http://arxiv.org/abs/1401.8097</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We introduce an algorithm which, in the context of nonlinear regression on vector-valued explanatory variables, chooses those combinations of vector components that provide best prediction. The algorithm devotes particular attention to components that might be of relatively little predictive value by themselves, and so might be ignored by more conventional methodology for model choice, but which, in combination with other difficult-to-find components, can be particularly beneficial for prediction. Additionally the algorithm avoids choosing vector components that become redundant once appropriate combinations of other, more relevant components are selected. It is suitable for very high dimensional problems, where it keeps computational labour in check by using a novel sequential argument, and also for more conventional prediction problems, where dimension is relatively low. We explore properties of the algorithm using both theoretical and numerical arguments."]]></description>
<dc:subject>model_selection regression nonparametrics variable_selection statistics hall.peter in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cd1695d5b681/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hall.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1312.1706">
    <title>[1312.1706] Swapping Variables for High-Dimensional Sparse Regression from Correlated Measurements</title>
    <dc:date>2014-01-02T18:27:15+00:00</dc:date>
    <link>http://arxiv.org/abs/1312.1706</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the high-dimensional sparse linear regression problem of accurately estimating a sparse vector using a small number of linear measurements that are contaminated by noise. It is well known that standard computationally tractable sparse regression algorithms, such as the Lasso, OMP, and their various extensions, perform poorly when the measurement matrix contains highly correlated columns. We develop a simple greedy algorithm, called SWAP, that iteratively swaps variables until a desired loss function cannot be decreased any further. SWAP is surprisingly effective in handling measurement matrices with high correlations. In particular, we prove that (i) SWAP outputs the true support, the location of the non-zero entries in the sparse vector, when initialized with the true support, and (ii) SWAP outputs the true support under a relatively mild condition on the measurement matrix when initialized with a support other than the true support. These theoretical results motivate the use of SWAP as a wrapper around various sparse regression algorithms for improved performance. We empirically show the advantages of using SWAP in sparse regression problems by comparing SWAP to several state-of-the-art sparse regression algorithms."]]></description>
<dc:subject>to:NB high-dimensional_statistics lasso sparsity variable_selection statistics vats.divyanshu</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c1912ea2ab6c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:vats.divyanshu"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1312.1473">
    <title>[1312.1473] Oracle Properties and Finite Sample Inference of the Adaptive Lasso for Time Series Regression Models</title>
    <dc:date>2013-12-26T00:33:14+00:00</dc:date>
    <link>http://arxiv.org/abs/1312.1473</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We derive new theoretical results on the properties of the adaptive least absolute shrinkage and selection operator (adaptive lasso) for time series regression models. In particular, we investigate the question of how to conduct finite sample inference on the parameters given an adaptive lasso model for some fixed value of the shrinkage parameter. Central in this study is the test of the hypothesis that a given adaptive lasso parameter equals zero, which therefore tests for a false positive. To this end we construct a simple testing procedure and show, theoretically and empirically through extensive Monte Carlo simulations, that the adaptive lasso combines efficient parameter estimation, variable selection, and valid finite sample inference in one step. Moreover, we analytically derive a bias correction factor that is able to significantly improve the empirical coverage of the test on the active variables. Finally, we apply the introduced testing procedure to investigate the relation between the short rate dynamics and the economy, thereby providing a statistical foundation (from a model choice perspective) to the classic Taylor rule monetary policy model."]]></description>
<dc:subject>lasso time_series variable_selection statistics re:your_favorite_dsge_sucks in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7fcc3eb7d15b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:your_favorite_dsge_sucks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1312.5556">
    <title>[1312.5556] Hierarchical Testing in the High-Dimensional Setting with Correlated Variables</title>
    <dc:date>2013-12-23T16:31:50+00:00</dc:date>
    <link>http://arxiv.org/abs/1312.5556</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a method for testing whether hierarchically ordered groups of potentially correlated variables are significant for explaining a response in a high-dimensional linear model. In presence of highly correlated variables, as is very common in high-dimensional data, it seems indispensable to go beyond an approach of inferring individual regression coefficients. Thanks to the hierarchy among the groups of variables, powerful multiple testing adjustment is possible which leads to a data-driven choice of the resolution level for the groups. Our procedure, based on repeated sample splitting, is shown to asymptotically control the familywise error rate and we provide empirical results for simulated and real data which complement the theoretical analysis."]]></description>
<dc:subject>to:NB to_read high-dimensional_statistics variable_selection buhlmann.peter hierarchical_statistical_models hierarchical_structure</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:332c500f5bc7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:buhlmann.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hierarchical_statistical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hierarchical_structure"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1310.4887">
    <title>[1310.4887] Variable Selection Inference for Bayesian Additive Regression Trees</title>
    <dc:date>2013-10-23T14:26:06+00:00</dc:date>
    <link>http://arxiv.org/abs/1310.4887</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The variable selection problem is especially challenging in high dimensional data, where it is difficult to detect subtle individual effects and interactions between factors. Bayesian additive regression trees (BART, Chipman et al., 2010) provides a novel nonparametric exploratory alternative to parametric regression approaches, such as the lasso or stepwise regression, especially when the number of relevant predictors is sparse relative to the total number of available predictors and the fundamental relationships are nonlinear. To move from the exploratory to the confirmatory, we here provide a principled permutation-based inferential approach for determining when the effect of a selected predictor is likely to be real. Going further, we adapt the BART procedure to incorporate informed prior information about variable importance. We present simulations demonstrating that our method compares favorably with lasso regression and random forests adapted for variable selection in a variety of data settings. To demonstrate the potential of our approach, we apply it to the task of inferring the gene regulatory network in yeast (Saccharomyces cerevisiae). In this application, our BART-based procedure is best able to recover the subset of covariates with the largest signal compared to other variable selection methods."]]></description>
<dc:subject>statistics high-dimensional_statistics variable_selection regression kith_and_kin jensen.shane george.ed in_NB gene_expression_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cb14a0bcfae3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:jensen.shane"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:george.ed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:gene_expression_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1309.2068">
    <title>[1309.2068] Modified Cross-Validation for Penalized High-Dimensional Linear Regression Models</title>
    <dc:date>2013-09-10T18:24:51+00:00</dc:date>
    <link>http://arxiv.org/abs/1309.2068</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper, for Lasso penalized linear regression models in high-dimensional settings, we propose a modified cross-validation method for selecting the penalty parameter. The methodology is extended to other penalties, such as Elastic Net. We conduct extensive simulation studies and real data analysis to compare the performance of the modified cross-validation method with other methods. It is shown that the popular $K$-fold cross-validation method includes many noise variables in the selected model, while the modified cross-validation works well in a wide range of coefficient and correlation settings. Supplemental materials containing the computer code are available online."]]></description>
<dc:subject>cross-validation lasso regression statistics high-dimensional_statistics variable_selection in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:809b6dd19675/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://normaldeviate.wordpress.com/2013/07/27/the-steep-price-of-sparsity/">
    <title>The Steep Price of Sparsity « Normal Deviate</title>
    <dc:date>2013-07-29T19:52:06+00:00</dc:date>
    <link>http://normaldeviate.wordpress.com/2013/07/27/the-steep-price-of-sparsity/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>model_selection track_down_references statistics sparsity variable_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1185172426ca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:track_down_references"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1306.6557">
    <title>[1306.6557] Optimal Feature Selection in High-Dimensional Discriminant Analysis</title>
    <dc:date>2013-06-30T03:40:07+00:00</dc:date>
    <link>http://arxiv.org/abs/1306.6557</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the high-dimensional discriminant analysis problem. For this problem, different methods have been proposed and justified by establishing exact convergence rates for the classification risk, as well as the l2 convergence results to the discriminative rule. However, sharp theoretical analysis for the variable selection performance of these procedures have not been established, even though model interpretation is of fundamental importance in scientific data analysis. This paper bridges the gap by providing sharp sufficient conditions for consistent variable selection using the sparse discriminant analysis (Mai et al., 2012). Through careful analysis, we establish rates of convergence that are significantly faster than the best known results and admit an optimal scaling of the sample size n, dimensionality p, and sparsity level s in the high-dimensional setting. Sufficient conditions are complemented by the necessary information theoretic limits on the variable selection problem in the context of high-dimensional discriminant analysis. Exploiting a numerical equivalence result, our method also establish the optimal results for the ROAD estimator (Fan et al., 2012) and the sparse optimal scaling estimator (Clemmensen et al., 2011). Furthermore, we analyze an exhaustive search procedure, whose performance serves as a benchmark, and show that it is variable selection consistent under weaker conditions. Extensive simulations demonstrating the sharpness of the bounds are also provided."]]></description>
<dc:subject>classifiers high-dimensional_statistics sparsity variable_selection statistics liu.han kolar.mladen in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b9f4ee25efd5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:liu.han"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kolar.mladen"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1306.5505">
    <title>[1306.5505] Asymptotic Properties of Lasso+mLS and Lasso+Ridge in Sparse High-dimensional Linear Regression</title>
    <dc:date>2013-06-27T15:19:31+00:00</dc:date>
    <link>http://arxiv.org/abs/1306.5505</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study the asymptotic properties of Lasso+mLS and Lasso+Ridge under the sparse high-dimensional linear regression model: Lasso selecting predictors and then modified Least Squares (mLS) or Ridge estimating their coefficients. First, we propose a valid inference procedure for parameter estimation based on parametric residual bootstrap after Lasso+mLS and Lasso+Ridge. Second, we derive the asymptotic unbiasedness of Lasso+mLS and Lasso+Ridge. More specifically, we show that their biases decay at an exponential rate and they can achieve the oracle convergence rate of $s/n$ (where $s$ is the number of nonzero regression coefficients and $n$ is the sample size) for mean squared error (MSE). Third, we show that Lasso+mLS and Lasso+Ridge are asymptotically normal. They have an oracle property in the sense that they can select the true predictors with probability converging to 1 and the estimates of nonzero parameters have the same asymptotic normal distribution that they would have if the zero parameters were known in advance. In fact, our analysis is not limited to adopting Lasso in the selection stage, but is applicable to any other model selection criteria with exponentially decay rates of the probability of selecting wrong models."]]></description>
<dc:subject>to:NB lasso regression variable_selection high-dimensional_statistics statistics estimation yu.bin</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c95bb73aab6b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:yu.bin"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1304.5678">
    <title>[1304.5678] Analytic Feature Selection for Support Vector Machines</title>
    <dc:date>2013-04-23T22:31:26+00:00</dc:date>
    <link>http://arxiv.org/abs/1304.5678</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Support vector machines (SVMs) rely on the inherent geometry of a data set to classify training data. Because of this, we believe SVMs are an excellent candidate to guide the development of an analytic feature selection algorithm, as opposed to the more commonly used heuristic methods. We propose a filter-based feature selection algorithm based on the inherent geometry of a feature set. Through observation, we identified six geometric properties that differ between optimal and suboptimal feature sets, and have statistically significant correlations to classifier performance. Our algorithm is based on logistic and linear regression models using these six geometric properties as predictor variables. The proposed algorithm achieves excellent results on high dimensional text data sets, with features that can be organized into a handful of feature types; for example, unigrams, bigrams or semantic structural features. We believe this algorithm is a novel and effective approach to solving the feature selection problem for linear SVMs."]]></description>
<dc:subject>to:NB variable_selection data_mining to_teach:data-mining text_mining classifiers</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c6d333cf0207/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:data_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:text_mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1304.5245">
    <title>[1304.5245] Feature Elimination in empirical risk minimization and support vector machines</title>
    <dc:date>2013-04-22T17:21:47+00:00</dc:date>
    <link>http://arxiv.org/abs/1304.5245</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We develop an approach for feature elimination in empirical risk minimization and support vector machines, based on recursive elimination of features. We present theoretical properties of this method and show that this is uniformly consistent in finding the correct feature space under certain generalized assumptions. We present case studies to show that the assumptions are met in most practical situations and also present simulation studies to demonstrate performance of the proposed approach."]]></description>
<dc:subject>to:NB variable_selection classifiers learning_theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8abc27f2cd48/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:learning_theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0906.4391">
    <title>[0906.4391] KNIFE: Kernel Iterative Feature Extraction</title>
    <dc:date>2012-11-03T15:56:41+00:00</dc:date>
    <link>http://arxiv.org/abs/0906.4391</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Selecting important features in non-linear or kernel spaces is a difficult challenge in both classification and regression problems. When many of the features are irrelevant, kernel methods such as the support vector machine and kernel ridge regression can sometimes perform poorly. We propose weighting the features within a kernel with a sparse set of weights that are estimated in conjunction with the original classification or regression problem. The iterative algorithm, KNIFE, alternates between finding the coefficients of the original problem and finding the feature weights through kernel linearization. In addition, a slight modification of KNIFE yields an efficient algorithm for finding feature regularization paths, or the paths of each feature's weight. Simulation results demonstrate the utility of KNIFE for both kernel regression and support vector machines with a variety of kernels. Feature path realizations also reveal important non-linear correlations among features that prove useful in determining a subset of significant variables. Results on vowel recognition data, Parkinson's disease data, and microarray data are also given."

to_teach tags are tentative]]></description>
<dc:subject>to:NB statistics machine_learning allen.genevera_i. regression classifiers kernel_methods to_teach:data-mining to_teach:undergrad-ADA have_read variable_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:fce2678790d0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:allen.genevera_i."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://people.ee.duke.edu/~lcarin/OSCAR.pdf">
    <title>Simultaneous Regression Shrinkage, Variable Selection, and Supervised Clustering of Predictors with OSCAR</title>
    <dc:date>2012-09-30T13:44:42+00:00</dc:date>
    <link>http://people.ee.duke.edu/~lcarin/OSCAR.pdf</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Variable selection can be challenging, particularly in situations with a large number of predic- tors with possibly high correlations, such as gene expression data. In this article, a new method called the OSCAR (octagonal shrinkage and clustering algorithm for regression) is proposed to simultaneously select variables while grouping them into predictive clusters. In addition to improving prediction accuracy and interpretation, these resulting groups can then be investigated further to discover what contributes to the group having a similar behavior. The technique is based on penalized least squares with a geometrically in- tuitive penalty function that shrinks some coefficients to exactly zero. Additionally, this penalty yields exact equality of some coefficients, encouraging correlated predictors that have a similar effect on the response to form predictive clusters represented by a single coefficient. The proposed procedure is shown to compare favorably to the existing shrinkage and variable selection techniques in terms of both prediction error and model complexity, while yielding the additional grouping information."]]></description>
<dc:subject>to:NB regression variable_selection statistics via:ryantibs</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3d45c29a3a2b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:ryantibs"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1208.2572">
    <title>[1208.2572] Nonparametric sparsity and regularization</title>
    <dc:date>2012-09-04T02:09:59+00:00</dc:date>
    <link>http://arxiv.org/abs/1208.2572</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this work we are interested in the problems of supervised learning and variable selection when the input-output dependence is described by a nonlinear function depending on a few variables. Our goal is to consider a sparse nonparametric model, hence avoiding linear or additive models. The key idea is to measure the importance of each variable in the model by making use of partial derivatives. Based on this intuition we propose a new notion of nonparametric sparsity and a corresponding least squares regularization scheme. Using concepts and results from the theory of reproducing kernel Hilbert spaces and proximal methods, we show that the proposed learning algorithm corresponds to a minimization problem which can be provably solved by an iterative procedure. The consistency properties of the obtained estimator are studied both in terms of prediction and selection performance. An extensive empirical analysis shows that the proposed method performs favorably with respect to the state-of-the-art methods."]]></description>
<dc:subject>to:NB to_read nonparametrics regression variable_selection sparsity statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5404e1979240/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1205.6843">
    <title>[1205.6843] Significance Testing and Group Variable Selection</title>
    <dc:date>2012-06-23T15:17:19+00:00</dc:date>
    <link>http://arxiv.org/abs/1205.6843</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Let X; Z be r and s-dimensional covariates, respectively, used to model the response variable Y as Y = m(X;Z) + sigma(X;Z)epsilon. We develop an ANOVA-type test for the null hypothesis that Z has no influence on the regression function, based on residuals obtained from local polynomial ?fitting of the null model. Using p-values from this test, a group variable selection method based on multiple testing ideas is proposed. Simulations studies suggest that the proposed test procedure outperforms the generalized likelihood ratio test when the alternative is non-additive or there is heteroscedasticity. Additional simulation studies, with data generated from linear, non-linear and logistic regression, reveal that the proposed group variable selection procedure performs competitively against Group Lasso, and outperforms it in selecting groups having nonlinear effects. The proposed group variable selection procedure is illustrated on a real data set."]]></description>
<dc:subject>variable_selection model_selection regression nonparametrics to_teach:undergrad-ADA in_NB kernel_smoothing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1e1c49d015a0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_smoothing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.2696">
    <title>[1206.2696] Flexible Variable Selection for Recovering Sparsity in Nonadditive Nonparametric Models</title>
    <dc:date>2012-06-23T15:09:41+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.2696</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Variable selection for recovering sparsity in nonadditive nonparametric models has been challenging. This problem becomes even more difficult due to complications in modeling unknown interaction terms among high dimensional variables. There is currently no variable selection method to overcome these limitations. Hence, in this paper we propose a variable selection approach that is developed by connecting a kernel machine with the nonparametric multiple regression model. The advantages of our approach are that it can: (1) recover the sparsity, (2) automatically model unknown and complicated interactions, (3) connect with several existing approaches including linear nonnegative garrote, kernel learning and automatic relevant determinants (ARD), and (4) provide flexibility for both additive and nonadditive nonparametric models. Our approach may be viewed as a nonlinear version of a nonnegative garrote method. We model the smoothing function by a least squares kernel machine and construct the nonnegative garrote objective function as the function of the similarity matrix. Since the multiple regression similarity matrix can be written as an additive form of univariate similarity matrices corresponding to input variables, applying a sparse scale parameter on each univariate similarity matrix can reveal its relevance to the response variable. We also derive the asymptotic properties of our approach, and show that it provides a square root consistent estimator of the scale parameters. Furthermore, we prove that sparsistency is satisfied with consistent initial kernel function coefficients under certain conditions and give the necessary and sufficient conditions for sparsistency. An efficient coordinate descent/backfitting algorithm is developed. A resampling procedure for our variable selection methodology is also proposed to improve power."

to_teach tag is tentative, I do a lot with additive models and this might be worth mentioning if it's good.]]></description>
<dc:subject>to:NB regression nonparametrics additive_models variable_selection sparsity statistics to_read to_teach:undergrad-ADA</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7b43e31d14f3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:additive_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.tandfonline.com/doi/abs/10.1080/10618600.2012.679890">
    <title>A Confidence Region Approach to Tuning for Variable Selection - Journal of Computational and Graphical Statistics - Volume 21, Issue 2</title>
    <dc:date>2012-06-23T14:56:55+00:00</dc:date>
    <link>http://www.tandfonline.com/doi/abs/10.1080/10618600.2012.679890</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We develop an approach to tuning of penalized regression variable selection methods by calculating the sparsest estimator contained in a confidence region of a specified level. Because confidence intervals/regions are generally understood, tuning penalized regression methods in this way is intuitive and more easily understood by scientists and practitioners. More importantly, our work shows that tuning to a fixed confidence level often performs better than tuning via the common methods based on Akaike information criterion (AIC), Bayesian information criterion (BIC), or cross-validation (CV) over a wide range of sample sizes and levels of sparsity. Additionally, we prove that by tuning with a sequence of confidence levels converging to one, asymptotic selection consistency is obtained, and with a simple two-stage procedure, an oracle property is achieved. The confidence-region-based tuning parameter is easily calculated using output from existing penalized regression computer packages. Our work also shows how to map any penalty parameter to a corresponding confidence coefficient. This mapping facilitates comparisons of tuning parameter selection methods such as AIC, BIC, and CV, and reveals that the resulting tuning parameters correspond to confidence levels that are extremely low, and can vary greatly across datasets. Supplemental materials for the article are available online."]]></description>
<dc:subject>to:NB variable_selection regression statistics confidence_sets lasso</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:69f5fef13f87/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:confidence_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2011.01034.x/abstract">
    <title>Variable selection with error control: another look at stability selection - Shah - 2012 - Journal of the Royal Statistical Society: Series B (Statistical Methodology) - Wiley Online Library</title>
    <dc:date>2012-06-23T14:30:40+00:00</dc:date>
    <link>http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2011.01034.x/abstract</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Stability selection was recently introduced by Meinshausen and Bühlmann as a very general technique designed to improve the performance of a variable selection algorithm. It is based on aggregating the results of applying a selection procedure to subsamples of the data. We introduce a variant, called complementary pairs stability selection, and derive bounds both on the expected number of variables included by complementary pairs stability selection that have low selection probability under the original procedure, and on the expected number of high selection probability variables that are excluded. These results require no (e.g. exchangeability) assumptions on the underlying model or on the quality of the original selection procedure. Under reasonable shape restrictions, the bounds can be further tightened, yielding improved error control, and therefore increasing the applicability of the methodology."]]></description>
<dc:subject>to:NB variable_selection statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6b054752ae8b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.4682">
    <title>[1206.4682] Copula-based Kernel Dependency Measures</title>
    <dc:date>2012-06-23T14:19:27+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.4682</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The paper presents a new copula based method for measuring dependence between random variables. Our approach extends the Maximum Mean Discrepancy to the copula of the joint distribution. We prove that this approach has several advantageous properties. Similarly to Shannon mutual information, the proposed dependence measure is invariant to any strictly increasing transformation of the marginal variables. This is important in many applications, for example in feature selection. The estimator is consistent, robust to outliers, and uses rank statistics only. We derive upper bounds on the convergence rate and propose independence tests too. We illustrate the theoretical contributions through a series of experiments in feature selection and low-dimensional embedding of distributions."]]></description>
<dc:subject>information_theory entropy_estimation poczos.barnabas variable_selection machine_learning copulas kernel_methods in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:617bbe4e33dd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:entropy_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:poczos.barnabas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:copulas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.4680">
    <title>[1206.4680] Fast Prediction of New Feature Utility</title>
    <dc:date>2012-06-23T14:18:26+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.4680</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study the new feature utility prediction problem: statistically testing whether adding a new feature to the data representation can improve predictive accuracy on a supervised learning task. In many applications, identifying new informative features is the primary pathway for improving performance. However, evaluating every potential feature by re-training the predictor with it can be costly. The paper describes an efficient, learner-independent technique for estimating new feature utility without re-training based on the current predictor's outputs. The method is obtained by deriving a connection between loss reduction potential and the new feature's correlation with the loss gradient of the current predictor. This leads to a simple yet powerful hypothesis testing procedure, for which we prove consistency. Our theoretical analysis is accompanied by empirical evaluation on standard benchmarks and a large-scale industrial dataset."]]></description>
<dc:subject>machine_learning prediction regression classifiers variable_selection have_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3461e24caec3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0801.1158">
    <title>[0801.1158] Hierarchical selection of variables in sparse high-dimensional regression</title>
    <dc:date>2012-06-17T21:26:12+00:00</dc:date>
    <link>http://arxiv.org/abs/0801.1158</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study a regression model with a huge number of interacting variables. We consider a specific approximation of the regression function under two ssumptions: (i) there exists a sparse representation of the regression function in a suggested basis, (ii) there are no interactions outside of the set of the corresponding main effects. We suggest an hierarchical randomized search procedure for selection of variables and of their interactions. We show that given an initial estimator, an estimator with a similar prediction loss but with a smaller number of non-zero coordinates can be found."]]></description>
<dc:subject>to:NB variable_selection high-dimensional_statistics regression statistics re:what_is_the_right_null_model_for_linear_regression bickel.peter_j.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:045bc9afe3b3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:what_is_the_right_null_model_for_linear_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bickel.peter_j."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1205.6761">
    <title>[1205.6761] Nonparametric Model Checking and Variable Selection</title>
    <dc:date>2012-06-07T16:02:38+00:00</dc:date>
    <link>http://arxiv.org/abs/1205.6761</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Let X be a d dimensional vector of covariates and Y be the response variable. Under the nonparametric model Y = m(X) + {sigma}(X) in we develop an ANOVA-type test for the null hypothesis that a particular coordinate of X has no influence on the regression function. The asymptotic distribution of the test statistic, using residuals based on Nadaraya-Watson type kernel estimator and d leq 4, is established under the null hypothesis and local alternatives. Simulations suggest that under a sparse model, the applicability of the test extends to arbitrary d through sufficient dimension reduction. Using p-values from this test, a variable selection method based on multiple testing ideas is proposed. The proposed test outperforms existing procedures, while additional simulations reveal that the proposed variable selection method performs competitively against well established procedures. A real data set is analyzed."]]></description>
<dc:subject>variable_selection regression nonparametrics statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5b928ac92a1f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.csail.mit.edu/papers/v13/song12a.html">
    <title>Feature Selection via Dependence Maximization</title>
    <dc:date>2012-06-07T15:59:35+00:00</dc:date>
    <link>http://jmlr.csail.mit.edu/papers/v13/song12a.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We introduce a framework for feature selection based on dependence maximization between the selected features and the labels of an estimation problem, using the Hilbert-Schmidt Independence Criterion. The key idea is that good features should be highly dependent on the labels. Our approach leads to a greedy procedure for feature selection. We show that a number of existing feature selectors are special cases of this framework. Experiments on both artificial and real-world data show that our feature selector works well in practice."]]></description>
<dc:subject>to:NB variable_selection hilbert_space machine_learning information_theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:93feda6cd8e0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hilbert_space"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1206.1024">
    <title>[1206.1024] Conditional Sure Independence Screening</title>
    <dc:date>2012-06-07T15:41:24+00:00</dc:date>
    <link>http://arxiv.org/abs/1206.1024</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Independence screening is a powerful method for variable selection for `Big Data' when the number of variables is massive. Commonly used independence screening methods are based on marginal correlations or variations of it. In many applications, researchers often have some prior knowledge that a certain set of variables is related to the response. In such a situation, a natural assessment on the relative importance of the other predictors is the conditional contributions of the individual predictors in presence of the known set of variables. This results in conditional sure independence screening (CSIS). Conditioning helps for reducing the false positive and the false negative rates in the variable selection process. In this paper, we propose and study CSIS in the context of generalized linear models. For ultrahigh-dimensional statistical problems, we give conditions under which sure screening is possible and derive an upper bound on the number of selected variables. We also spell out the situation under which CSIS yields model selection consistency. Moreover, we provide two data-driven methods to select the thresholding parameter of conditional screening. The utility of the procedure is illustrated by simulation studies and analysis of two real data sets."]]></description>
<dc:subject>to:NB variable_selection high-dimensional_statistics statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4b61d8167ea5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0805.1179">
    <title>[0805.1179] Autoregressive Process Modeling via the Lasso Procedure</title>
    <dc:date>2012-03-04T17:12:03+00:00</dc:date>
    <link>http://arxiv.org/abs/0805.1179</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Lasso is a popular model selection and estimation procedure for linear models that enjoys nice theoretical properties. In this paper, we study the Lasso estimator for fitting autoregressive time series models. We adopt a double asymptotic framework where the maximal lag may increase with the sample size. We derive theoretical results establishing various types of consistency. In particular, we derive conditions under which the Lasso estimator for the autoregressive coefficients is model selection consistent, estimation consistent and prediction consistent. Simulation study results are reported."]]></description>
<dc:subject>time_series statistics lasso sparsity variable_selection kith_and_kin heard_the_talk rinaldo.alessandro nardi.yuval in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:519c50513386/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:rinaldo.alessandro"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nardi.yuval"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1102.3616">
    <title>[1102.3616] Tight conditions for consistent variable selection in high dimensional nonparametric regression</title>
    <dc:date>2012-02-27T00:02:40+00:00</dc:date>
    <link>http://arxiv.org/abs/1102.3616</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We address the issue of variable selection in the regression model with very high ambient dimension, i.e., when the number of covariates is very large. The main focus is on the situation where the number of relevant covariates, called intrinsic dimension, is much smaller than the ambient dimension. Without assuming any parametric form of the underlying regression function, we get tight conditions making it possible to consistently estimate the set of relevant variables. These conditions relate the intrinsic dimension to the ambient dimension and to the sample size. The procedure that is provably consistent under these tight conditions is simple and is based on comparing the empirical Fourier coefficients with an appropriately chosen threshold value."]]></description>
<dc:subject>regression variable_selection nonparametrics statistics in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8b693691cb40/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.csail.mit.edu/papers/v13/brown12a.html">
    <title>Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection</title>
    <dc:date>2012-02-10T18:10:39+00:00</dc:date>
    <link>http://jmlr.csail.mit.edu/papers/v13/brown12a.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present a unifying framework for information theoretic feature selection, bringing almost two decades of research on heuristic filter criteria under a single theoretical interpretation. This is in response to the question: "what are the implicit statistical assumptions of feature selection criteria based on mutual information?". To answer this, we adopt a different strategy than is usual in the feature selection literature−instead of trying to define a criterion, we derive one, directly from a clearly specified objective function: the conditional likelihood of the training labels. While many hand-designed heuristic criteria try to optimize a definition of feature 'relevancy' and 'redundancy', our approach leads to a probabilistic framework which naturally incorporates these concepts. As a result we can unify the numerous criteria published over the last two decades, and show them to be low-order approximations to the exact (but intractable) optimisation problem. The primary contribution is to show that common heuristics for information based feature selection (including Markov Blanket algorithms as a special case) are approximate iterative maximisers of the conditional likelihood. A large empirical study provides strong evidence to favour certain classes of criteria, in particular those that balance the relative size of the relevancy/redundancy terms. Overall we conclude that the JMI criterion (Yang and Moody, 1999; Meyer et al., 2008) provides the best tradeoff in terms of accuracy, stability, and flexibility with small data samples."]]></description>
<dc:subject>information_theory statistics variable_selection model_selection to_teach:data-mining to:blog machine_learning classifiers have_read in_NB graphical_models</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3584eb0c3974/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.springerlink.com/content/582v131176130h06/">
    <title>Nonparametric estimation of the link function including variable selection - Gerhard Tutz and Sebastian Petry - Statistics and Computing, Volume 22, Number 2</title>
    <dc:date>2011-12-01T12:54:28+00:00</dc:date>
    <link>http://www.springerlink.com/content/582v131176130h06/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Nonparametric methods for the estimation of the link function in generalized linear models are able to avoid bias in the regression parameters. But for the estimation of the link typically the full model, which includes all predictors, has been used. When the number of predictors is large these methods fail since the full model cannot be estimated. In the present article a boosting type method is proposed that simultaneously selects predictors and estimates the link function. The method performs quite well in simulations and real data examples."  (The "to teach" tag is conjectural.)]]></description>
<dc:subject>regression variable_selection statistics nonparametrics to_read to_teach:undergrad-ADA in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6413527c3ed3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2011.01005.x/abstract">
    <title>Variance estimation using refitted cross-validation in ultrahigh dimensional regression - Fan - 2011 - Journal of the Royal Statistical Society: Series B (Statistical Methodology) - Wiley Online Library</title>
    <dc:date>2011-10-10T12:16:53+00:00</dc:date>
    <link>http://onlinelibrary.wiley.com/doi/10.1111/j.1467-9868.2011.01005.x/abstract</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Variance estimation is a fundamental problem in statistical modelling. In ultrahigh dimensional linear regression where the dimensionality is much larger than the sample size, traditional variance estimation techniques are not applicable. Recent advances in variable selection in ultrahigh dimensional linear regression make this problem accessible. One of the major problems in ultrahigh dimensional regression is the high spurious correlation between the unobserved realized noise and some of the predictors. As a result, the realized noises are actually predicted when extra irrelevant variables are selected, leading to a serious underestimate of the level of noise. We propose a two-stage refitted procedure via a data splitting technique, called refitted cross-validation, to attenuate the influence of irrelevant variables with high spurious correlations. Our asymptotic results show that the resulting procedure performs as well as the oracle estimator, which knows in advance the mean regression function. The simulation studies lend further support to our theoretical claims. The naive two-stage estimator and the plug-in one-stage estimators using the lasso and smoothly clipped absolute deviation are also studied and compared. Their performances can be improved by the refitted cross-validation method proposed."]]></description>
<dc:subject>statistics regression variable_selection cross-validation estimation fan.jianqing variance_estimation in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f86a2929f2a4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:fan.jianqing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variance_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://pubs.amstat.org/doi/abs/10.1198/jcgs.2011.09220">
    <title>A Framework for Unbiased Model Selection Based on Boosting</title>
    <dc:date>2011-07-12T17:40:02+00:00</dc:date>
    <link>http://pubs.amstat.org/doi/abs/10.1198/jcgs.2011.09220</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>model_selection variable_selection boosting ensemble_methods statistics</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6492001fa93c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:boosting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ensemble_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.springer.com/statistics/statistical+theory+and+methods/book/978-3-642-20191-2?cm_mmc=NBA-_-Jul-11_WEST_8259992-_-product-_-978-3-642-20191-2">
    <title>Statistics for High-Dimensional Data</title>
    <dc:date>2011-07-05T13:18:31+00:00</dc:date>
    <link>http://www.springer.com/statistics/statistical+theory+and+methods/book/978-3-642-20191-2?cm_mmc=NBA-_-Jul-11_WEST_8259992-_-product-_-978-3-642-20191-2</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>statistics machine_learning variable_selection lasso sparsity buhlmann.peter van_de_geer.sara books:recommended books:owned</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:71f041b9a3fb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:buhlmann.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:van_de_geer.sara"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:books:recommended"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:books:owned"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1106.5242">
    <title>[1106.5242] High Dimensional Sparse Econometric Models: An Introduction</title>
    <dc:date>2011-06-28T14:57:54+00:00</dc:date>
    <link>http://arxiv.org/abs/1106.5242</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[I love how they just flat-out identify "econometrics" with "linear regression with Gaussian noise"; but it looks like a clean exposition with proofs.
]]></description>
<dc:subject>regression lasso variable_selection econometrics</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a5ec335e85a0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:econometrics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aoas/1300715197">
    <title>Radchenko , James : Improved variable selection with Forward-Lasso adaptive shrinkage</title>
    <dc:date>2011-03-23T13:40:05+00:00</dc:date>
    <link>http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aoas/1300715197</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>lasso sparsity regression variable_selection model_selection to:NB</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7aef4dc9dfd2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1009.2302">
    <title>[1009.2302] The Predictive Lasso</title>
    <dc:date>2010-09-16T01:54:03+00:00</dc:date>
    <link>http://arxiv.org/abs/1009.2302</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a shrinkage procedure for simultaneous variable selection and estimation in generalized linear models (GLMs) with an explicit predictive motivation. The procedure estimates the coefficients by minimizing the Kullback-Leibler divergence of a set of predictive distributions to the corresponding predictive distributions for the full model, subject to an $l_1$ constraint on the coefficient vector. This results in selection of a parsimonious model with similar predictive performance to the full model. Thanks to its similar form to the original lasso problem for GLMs, our procedure can benefit from available $l_1$-regularization path algorithms. Simulation studies and real-data examples confirm the efficiency of our method in terms of predictive performance on future observations."
]]></description>
<dc:subject>regression lasso variable_selection sparsity information_theory statistics</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9bcc63589a6c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lasso"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://pubs.amstat.org/doi/abs/10.1198/jcgs.2010.07139">
    <title>&quot;Partial Generalized Additive Models: An Information-Theoretic Approach for Dealing With Concurvity and Selecting Variables&quot; (Gu, Kenny, Zhu, 2010)</title>
    <dc:date>2010-09-16T01:52:49+00:00</dc:date>
    <link>http://pubs.amstat.org/doi/abs/10.1198/jcgs.2010.07139</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Scientists [want to know] which covariates are important, and how [they] affect the response variable, rather than just making predictions. ... Generalized additive models (GAMs) are a class of interpretable, multivariate nonparametric regression models which are very useful ... for these purposes, but concurvity among covariates (the nonlinear analogue of collinearity for linear regression) can ... produce unstable or even wrong estimates of the covariates’ functional effects. We develop a new procedure called partial generalized additive models (pGAM), based on mutual information ... Our procedure is similar in spirit to the Gram–Schmidt method for linear least squares. By building a GAM on a selected set of transformed variables, pGAM produces more stable models, selects variables parsimoniously, and provides insight into the nature of concurvity between the covariates by calculating functional dependencies among them. ... R code for fitting pGAMs is available online"
]]></description>
<dc:subject>regression additive_models information_theory variable_selection statistics</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:92d9d94d50d9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:additive_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.csail.mit.edu/papers/v11/aliferis10b.html">
    <title>Local Causal and Markov Blanket Induction for Causal Discovery and Feature Selection for Classification Part II: Analysis and Extensions</title>
    <dc:date>2010-02-05T05:28:31+00:00</dc:date>
    <link>http://jmlr.csail.mit.edu/papers/v11/aliferis10b.html</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>graphical_models causal_inference variable_selection classifiers machine_learning statistics</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9ad623c602b8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://jmlr.csail.mit.edu/papers/v11/aliferis10a.html">
    <title>Local Causal and Markov Blanket Induction for Causal Discovery and Feature Selection for Classification Part I: Algorithms and Empirical Evaluation</title>
    <dc:date>2010-02-05T05:28:13+00:00</dc:date>
    <link>http://jmlr.csail.mit.edu/papers/v11/aliferis10a.html</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>graphical_models causal_inference variable_selection classifiers machine_learning statistics</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6bef18f55d93/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0909.0844">
    <title>[0909.0844] High-Dimensional Non-Linear Variable Selection through Hierarchical Kernel Learning</title>
    <dc:date>2009-09-15T17:59:15+00:00</dc:date>
    <link>http://arxiv.org/abs/0909.0844</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>variable_selection regression kernel_methods statistics machine_learning in_NB</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:59c474a0ed79/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0909.1308">
    <title>[0909.1308] Efficient Learning of Sparse Conditional Random Fields for Supervised Sequence Labelling</title>
    <dc:date>2009-09-08T11:22:28+00:00</dc:date>
    <link>http://arxiv.org/abs/0909.1308</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>machine_learning sparsity random_fields conditional_random_fields to:NB variable_selection graphical_models</dc:subject>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7c173069e09d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:machine_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_fields"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:conditional_random_fields"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:variable_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>