<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (cshalizi)</title>
    <link>https://pinboard.in/u:cshalizi/public/</link>
    <description>recent bookmarks from cshalizi</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://theoryandpractice.org/2024/10/Yes,%20we%20did%20discover%20the%20Higgs!/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2510.16174"/>
	<rdf:li rdf:resource="https://royalsocietypublishing.org/doi/10.1098/rspa.2021.0549"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2402.13196"/>
	<rdf:li rdf:resource="https://dspace.mit.edu/handle/1721.1/155358"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2402.15213"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2211.01126"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2310.16626"/>
	<rdf:li rdf:resource="https://osf.io/7vy2f/"/>
	<rdf:li rdf:resource="https://academic.oup.com/ej/article-abstract/127/605/F236/5069452?login=false"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2304.04183"/>
	<rdf:li rdf:resource="https://journals.aps.org/pre/abstract/10.1103/PhysRevE.62.1912"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.08052"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2203.08089"/>
	<rdf:li rdf:resource="https://amstat.tandfonline.com/doi/full/10.1080/01621459.2021.1969239#.YqNL_uzMIZE"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.02765"/>
	<rdf:li rdf:resource="https://www.nber.org/papers/w29702"/>
	<rdf:li rdf:resource="https://doi.org/10.1257/jep.35.3.157"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.14676"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.08279"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2009.00503"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-040720-024710"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2104.03167"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2103.08402"/>
	<rdf:li rdf:resource="https://ieeexplore.ieee.org/document/9301324"/>
	<rdf:li rdf:resource="https://jmlr.org/papers/v22/17-570.html"/>
	<rdf:li rdf:resource="https://www.pnas.org/content/117/29/16880.short"/>
	<rdf:li rdf:resource="https://academic.oup.com/biomet/article-abstract/107/4/771/5875781"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1611889233"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1912.03662"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.14530"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.ejs/1609384079"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.ss/1608541216"/>
	<rdf:li rdf:resource="https://academic.oup.com/biomet/article-abstract/107/4/791/5856302?redirectedFrom=fulltext"/>
	<rdf:li rdf:resource="https://www.nature.com/articles/s41562-020-0844-7"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2002.10399"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2012.05784"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1607677236"/>
	<rdf:li rdf:resource="https://doi.org/10.1111/jtsa.12554"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2007.04727"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2009.09440"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1809.04587"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1903.11117"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2008.03971"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-031219-041051"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1600480925"/>
	<rdf:li rdf:resource="https://projecteuclid.org/euclid.aos/1594972828"/>
	<rdf:li rdf:resource="https://ieeexplore.ieee.org/document/8782628"/>
	<rdf:li rdf:resource="https://ieeexplore.ieee.org/document/8804234"/>
	<rdf:li rdf:resource="https://doi.org/10.1111/sjos.12450"/>
	<rdf:li rdf:resource="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0235318"/>
	<rdf:li rdf:resource="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=917901"/>
	<rdf:li rdf:resource="https://doi.org/10.1111/1368-423X.t01-1-00071"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2001.03039"/>
	<rdf:li rdf:resource="https://ieeexplore.ieee.org/document/8736279"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.08883"/>
	<rdf:li rdf:resource="https://rss.onlinelibrary.wiley.com/doi/full/10.1111/rssb.12340"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1910.01692"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.13031"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.13464"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1902.00080"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1904.04052"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1909.03302"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1902.06441"/>
	<rdf:li rdf:resource="https://amstat.tandfonline.com/doi/full/10.1080/10618600.2019.1637749"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1908.06486"/>
	<rdf:li rdf:resource="https://academic.oup.com/biomet/article/106/3/547/5511208?rss=1"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1906.06615"/>
	<rdf:li rdf:resource="https://global.oup.com/academic/product/non-standard-parametric-statistical-inference-9780198505044?cc=us&amp;lang=en#"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1907.07582"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://theoryandpractice.org/2024/10/Yes,%20we%20did%20discover%20the%20Higgs!/">
    <title>Yes, we did discover the Higgs! - Theory And Practice</title>
    <dc:date>2026-04-16T17:30:22+00:00</dc:date>
    <link>https://theoryandpractice.org/2024/10/Yes,%20we%20did%20discover%20the%20Higgs!/</link>
    <dc:creator>cshalizi</dc:creator><dc:subject>cranmer.kyle particle_physics hypothesis_testing statistics philosophy_of_science via:? sociology_of_science science_as_a_social_process have_read to:blog</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:86fa85118401/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cranmer.kyle"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:particle_physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:philosophy_of_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology_of_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:science_as_a_social_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2510.16174">
    <title>[2510.16174] COWs and their Hybrids: A Statistical View of Custom Orthogonal Weights</title>
    <dc:date>2025-10-24T19:41:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2510.16174</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A recurring challenge in high energy physics is inference of the signal component from a distribution for which observations are assumed to be a mixture of signal and background events. A standard assumption is that there exists information encoded in a discriminant variable that is effective at separating signal and background. This can be used to assign a signal weight to each event, with these weights used in subsequent analyses of one or more control variables of interest. The custom orthogonal weights (COWs) approach of Dembinski, et al.(2022), a generalization of the sPlot approach of Barlow (1987) and Pivk and Le Diberder (2005), is tailored to address this objective. The problem, and this method, present interesting and novel statistical issues. Here we formalize the assumptions needed and the statistical properties, while also considering extensions and alternative approaches."]]></description>
<dc:subject>to:NB classifiers hypothesis_testing statistics particle_physics kith_and_kin wasserman.larry</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d79263d603f5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:particle_physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://royalsocietypublishing.org/doi/10.1098/rspa.2021.0549">
    <title>USP: an independence test that improves on Pearson’s chi-squared and the G-test | Proceedings of the Royal Society A: Mathematical, Physical and Engineering Sciences</title>
    <dc:date>2025-01-10T15:06:42+00:00</dc:date>
    <link>https://royalsocietypublishing.org/doi/10.1098/rspa.2021.0549</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present the $U$-statistic permutation (USP) test of independence in the context of discrete data displayed in a contingency table. Either Pearson’s $\chi^2$-test of independence, or the $G$-test, are typically used for this task, but we argue that these tests have serious deficiencies, both in terms of their inability to control the size of the test, and their power properties. By contrast, the USP test is guaranteed to control the size of the test at the nominal level for all sample sizes, has no issues with small (or zero) cell counts, and is able to detect distributions that violate independence in only a minimal way. The test statistic is derived from a $U$-statistic estimator of a natural population measure of dependence, and we prove that this is the unique minimum variance unbiased estimator of this population quantity. The practical utility of the USP test is demonstrated on both simulated data, where its power can be dramatically greater than those of Pearson’s test, the $G$-test and Fisher’s exact test, and on real data. The USP test is implemented in the R package USP."]]></description>
<dc:subject>to:NB dependence_measures hypothesis_testing independence_testing statistics samworth.richard_j.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0861a549ae01/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:independence_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:samworth.richard_j."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2402.13196">
    <title>[2402.13196] Practical Kernel Tests of Conditional Independence</title>
    <dc:date>2024-12-11T15:42:24+00:00</dc:date>
    <link>https://arxiv.org/abs/2402.13196</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We describe a data-efficient, kernel-based approach to statistical testing of conditional independence. A major challenge of conditional independence testing, absent in tests of unconditional independence, is to obtain the correct test level (the specified upper bound on the rate of false positives), while still attaining competitive test power. Excess false positives arise due to bias in the test statistic, which is obtained using nonparametric kernel ridge regression. We propose three methods for bias control to correct the test level, based on data splitting, auxiliary data, and (where possible) simpler function classes. We show these combined strategies are effective both for synthetic and real-world data."]]></description>
<dc:subject>to_read kernel_methods dependence_measures hypothesis_testing gretton.arthur in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5170f8e5ed81/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:gretton.arthur"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://dspace.mit.edu/handle/1721.1/155358">
    <title>Likelihood-Free Hypothesis Testing and Applications of the Energy Distance</title>
    <dc:date>2024-12-06T14:04:25+00:00</dc:date>
    <link>https://dspace.mit.edu/handle/1721.1/155358</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This thesis studies questions in nonparametric testing and estimation that are inspired by machine learning. One of the main problems of our interest is likelihood-free hypothesis testing: given three samples X, Y and Z with sample sizes n, n and m respectively, one must decide whether the distribution of Z is closer to that of X or that of Y . We fully characterize the problem’s sample complexity for multiple distribution classes and with high probability. We uncover connections to two-sample, goodness-of-fit and robust testing, and show the existence of a trade-off of the form mn ≍ k/ε^4, where k is an appropriate notion of complexity and ε is the total variation separation between the distributions of X and Y . We generalize our problem to allow Z to come from a mixture of the distributions of X and Y , and propose a kernel-based test for its solution, and also verify the existence of a trade-off between m and n on experimental data from particle physics. In addition, we demonstrate that the family of “classifier accuracy” tests are not only popular in practice but also provably near-optimal, recovering and simplifying a multitude of classical and recent results. Finally, we study affine classifiers as a tool for estimation and testing, with the key technical tool being a connection to the energy distance. In particular, we propose a density estimation routine based on minimizing the generalized energy distance, targeting smooth densities and Gaussian mixtures. We interpret our results in terms of half-space separability over these classes, and derive analogous results for discrete distributions. As a consequence we deduce that any two discrete distributions are well-separated by a half-space, provided their support is embedded as a packing of a high-dimensional unit ball. We also scrutinize two recent applications of the energy distance in the two-sample testing literature."
]]></description>
<dc:subject>to:NB to_read hypothesis_testing two-sample_tests statistics via:_onionesque kernel_methods goodness-of-fit</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d56e7c266c6a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:two-sample_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:_onionesque"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2402.15213">
    <title>[2402.15213] Statistical Agnostic Regression: a machine learning method to validate regression models</title>
    <dc:date>2024-03-05T16:37:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2402.15213</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Regression analysis is a central topic in statistical modeling, aiming to estimate the relationships between a dependent variable, commonly referred to as the response variable, and one or more independent variables, i.e., explanatory variables. Linear regression is by far the most popular method for performing this task in several fields of research, such as prediction, forecasting, or causal inference. Beyond various classical methods to solve linear regression problems, such as Ordinary Least Squares, Ridge, or Lasso regressions - which are often the foundation for more advanced machine learning (ML) techniques - the latter have been successfully applied in this scenario without a formal definition of statistical significance. At most, permutation or classical analyses based on empirical measures (e.g., residuals or accuracy) have been conducted to reflect the greater ability of ML estimations for detection. In this paper, we introduce a method, named Statistical Agnostic Regression (SAR), for evaluating the statistical significance of an ML-based linear regression based on concentration inequalities of the actual risk using the analysis of the worst case. To achieve this goal, similar to the classification problem, we define a threshold to establish that there is sufficient evidence with a probability of at least 1-eta to conclude that there is a linear relationship in the population between the explanatory (feature) and the response (label) variables. Simulations in only two dimensions demonstrate the ability of the proposed agnostic test to provide a similar analysis of variance given by the classical F test for the slope parameter."

--- I should read this, but the last tag applies with force.  The "classical F test for the slope parameter" in no way tests/validates the existence of a _linear_ relationship, even if all the classical assumptions hold.]]></description>
<dc:subject>statistics linear_regression hypothesis_testing color_me_skeptical in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3c164bcb0d95/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:linear_regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2211.01126">
    <title>[2211.01126] Likelihood-free hypothesis testing</title>
    <dc:date>2023-11-16T03:08:24+00:00</dc:date>
    <link>https://arxiv.org/abs/2211.01126</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Consider the problem of binary hypothesis testing. Given Z coming from either ℙ⊗m or ℚ⊗m, to decide between the two with small probability of error it is sufficient and in most cases necessary to have m≍1/ϵ2, where ϵ measures the separation between ℙ and ℚ in total variation (𝖳𝖵). Achieving this, however, requires complete knowledge of the distributions and can be done, for example, using the Neyman-Pearson test. In this paper we consider a variation of the problem, which we call likelihood-free (or simulation-based) hypothesis testing, where access to ℙ and ℚ is given through n iid observations from each. In the case when ℙ,ℚ are assumed to belong to a non-parametric family , we demonstrate the existence of a fundamental trade-off between n and m given by nm≍n2𝖦𝗈𝖥(ϵ,), where n𝖦𝗈𝖥 is the minimax sample complexity of testing between the hypotheses H0:ℙ=ℚ vs H1:𝖳𝖵(ℙ,ℚ)≥ϵ. We show this for three families of distributions: β-smooth densities supported on [0,1]d, the Gaussian sequence model over a Sobolev ellipsoid, and the collection of distributions on alphabet [k]={1,2,…,k} with pmfs bounded by c/k for fixed c. For the larger family of all distributions on [k] we obtain a more complicated trade-off that exhibits a phase-transition. The test that we propose, based on the L2-distance statistic of Ingster, simultaneously achieves all points on the trade-off curve for the regular classes. This demonstrates the possibility of testing without fully estimating the distributions, provided m≫1/ϵ2."]]></description>
<dc:subject>to_read hypothesis_testing simulation-based_inference in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:bc904e157cbf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:simulation-based_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2310.16626">
    <title>[2310.16626] Scalable Causal Structure Learning via Amortized Conditional Independence Testing</title>
    <dc:date>2023-10-28T18:09:49+00:00</dc:date>
    <link>https://arxiv.org/abs/2310.16626</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Controlling false positives (Type I errors) through statistical hypothesis testing is a foundation of modern scientific data analysis. Existing causal structure discovery algorithms either do not provide Type I error control or cannot scale to the size of modern scientific datasets. We consider a variant of the causal discovery problem with two sets of nodes, where the only edges of interest form a bipartite causal subgraph between the sets. We develop Scalable Causal Structure Learning (SCSL), a method for causal structure discovery on bipartite subgraphs that provides Type I error control. SCSL recasts the discovery problem as a simultaneous hypothesis testing problem and uses discrete optimization over the set of possible confounders to obtain an upper bound on the test statistic for each edge. Semi-synthetic simulations demonstrate that SCSL scales to handle graphs with hundreds of nodes while maintaining error control and good power. We demonstrate the practical applicability of the method by applying it to a cancer dataset to reveal connections between somatic gene mutations and metastases to different tissues."]]></description>
<dc:subject>to:NB hypothesis_testing causal_inference kith_and_kin ramdas.aaditya</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:750d79d0c649/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ramdas.aaditya"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://osf.io/7vy2f/">
    <title>OSF Preprints | Quantitative Political Science Research is Greatly Underpowered</title>
    <dc:date>2023-05-02T20:12:17+00:00</dc:date>
    <link>https://osf.io/7vy2f/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We analyze the statistical power of political science research by collating over 16,000 hypothesis tests from about 2,000 articles. Even with generous assumptions, the median analysis has about 10% power, and only about 1 in 10 tests have at least 80% power to detect the consensus effects reported in the literature. There is also substantial heterogeneity in tests across research areas, with some being characterized by high-power but most having very low power. To contextualize our findings, we survey political methodologists to assess their expectations about power levels. Most methodologists greatly overestimate the statistical power of political science research."]]></description>
<dc:subject>to:NB to_read political_science social_science_methodology statistics hypothesis_testing estimation re:neutral_model_of_inquiry</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c31528a09f2b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:political_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_science_methodology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:neutral_model_of_inquiry"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://academic.oup.com/ej/article-abstract/127/605/F236/5069452?login=false">
    <title>Power of Bias in Economics Research | The Economic Journal | Oxford Academic</title>
    <dc:date>2023-05-02T20:10:08+00:00</dc:date>
    <link>https://academic.oup.com/ej/article-abstract/127/605/F236/5069452?login=false</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We investigate two critical dimensions of the credibility of empirical economics research: statistical power and bias. We survey 159 empirical economics literatures that draw upon 64,076 estimates of economic parameters reported in more than 6,700 empirical studies. Half of the research areas have nearly 90% of their results under‐powered. The median statistical power is 18%, or less. A simple weighted average of those reported results that are adequately powered (power ≥ 80%) reveals that nearly 80% of the reported effects in these empirical economics literatures are exaggerated; typically, by a factor of two and with one‐third inflated by a factor of four or more."

--- Power's really a function, not a number, so where's "18%" come from?  Is that the power to detect an effect of the magnitude estimated (a little weirdly recursive...), or some standard-size magnitude?
--- ETA after reading: Yes, for each area of economics they do a supposedly-robust meta-estimate of the effect size, and try to work out the power to detect an effect that big.]]></description>
<dc:subject>to:NB economics econometrics statistics hypothesis_testing re:neutral_model_of_inquiry estimation have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2c2f32247eac/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:economics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:econometrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:neutral_model_of_inquiry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2304.04183">
    <title>[2304.04183] Nearest-Neighbor Sampling Based Conditional Independence Testing</title>
    <dc:date>2023-04-27T14:48:56+00:00</dc:date>
    <link>https://arxiv.org/abs/2304.04183</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The conditional randomization test (CRT) was recently proposed to test whether two random variables X and Y are conditionally independent given random variables Z. The CRT assumes that the conditional distribution of X given Z is known under the null hypothesis and then it is compared to the distribution of the observed samples of the original data. The aim of this paper is to develop a novel alternative of CRT by using nearest-neighbor sampling without assuming the exact form of the distribution of X given Z. Specifically, we utilize the computationally efficient 1-nearest-neighbor to approximate the conditional distribution that encodes the null hypothesis. Then, theoretically, we show that the distribution of the generated samples is very close to the true conditional distribution in terms of total variation distance. Furthermore, we take the classifier-based conditional mutual information estimator as our test statistic. The test statistic as an empirical fundamental information theoretic quantity is able to well capture the conditional-dependence feature. We show that our proposed test is computationally very fast, while controlling type I and II errors quite well. Finally, we demonstrate the efficiency of our proposed test in both synthetic and real data analyses."]]></description>
<dc:subject>dependence_measures nearest_neighbors hypothesis_testing in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:35ae9f766e30/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nearest_neighbors"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.aps.org/pre/abstract/10.1103/PhysRevE.62.1912">
    <title>Phys. Rev. E 62, 1912 (2000) - Symbolic approach for measuring temporal ``irreversibility''</title>
    <dc:date>2023-04-24T21:59:54+00:00</dc:date>
    <link>https://journals.aps.org/pre/abstract/10.1103/PhysRevE.62.1912</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We describe a symbolic approach for measuring temporal “irreversibility” in time-series measurements. Temporal irreversibility is important because it excludes Gaussian linear dynamics and static transformations of such dynamics from the set of possible generating processes. A symbolic method for measuring temporal irreversibility is attractive because it is computationally efficient, robust to noise, and simplifies statistical analysis of confidence limits. We propose a specific algorithm, called “false flipped symbols,” for establishing the presence of temporal irreversibility without the need for generating surrogate data. Besides characterizing experimental data, our results are relevant to the question of selecting alternative models. We illustrate our points with numerical model output and experimental measurements."]]></description>
<dc:subject>time_series statistical_inference_for_stochastic_processes symbolic_dynamics model_checking hypothesis_testing cleaning_out_the_filing_cabinet_for_the_first_time_since_2005 have_read to:NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:405b67266133/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistical_inference_for_stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:symbolic_dynamics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_checking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cleaning_out_the_filing_cabinet_for_the_first_time_since_2005"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.08052">
    <title>[2206.08052] Likelihood ratio test for structural changes in factor models</title>
    <dc:date>2022-06-19T17:04:32+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.08052</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A factor model with a break in its factor loadings is observationally equivalent to a model without changes in the loadings but a change in the variance of its factors. This effectively transforms a structural change problem of high dimension into a problem of low dimension. This paper considers the likelihood ratio (LR) test for a variance change in the estimated factors. The LR test implicitly explores a special feature of the estimated factors: the pre-break and post-break variances can be a singular matrix under the alternative hypothesis, making the LR test diverging faster and thus more powerful than Wald-type tests. The better power property of the LR test is also confirmed by simulations. We also consider mean changes and multiple breaks. We apply the procedure to the factor modelling and structural change of the US employment using monthly industry-level-data."

--- The first sentence, while obviously correct, is also obviously an excuse for me to get one of my hobby-horses out for a ride...]]></description>
<dc:subject>to:NB factor_analysis change-point_problem hypothesis_testing time_series re:g_paper</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:244e00db99fd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:factor_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:change-point_problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:g_paper"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2203.08089">
    <title>[2203.08089] On Suspicious Coincidences and Pointwise Mutual Information</title>
    <dc:date>2022-06-15T18:56:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2203.08089</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Barlow (1985) hypothesized that the co-occurrence of two events A and B is "suspicious" if P(A,B)≫P(A)P(B). We first review classical measures of association for 2×2 contingency tables, including Yule's Y (Yule, 1912), which depends only on the odds ratio λ, and is independent of the marginal probabilities of the table. We then discuss the mutual information (MI) and pointwise mutual information (PMI), which depend on the ratio P(A,B)/P(A)P(B), as measures of association. We show that, once the effect of the marginals is removed, MI and PMI behave similarly to Y as functions of λ. The pointwise mutual information is used extensively in some research communities for flagging suspicious coincidences, but it is important to bear in mind the sensitivity of the PMI to the marginals, with increased scores for sparser events."]]></description>
<dc:subject>to:NB likelihood hypothesis_testing information_theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1a4b2498b6a5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:likelihood"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://amstat.tandfonline.com/doi/full/10.1080/01621459.2021.1969239#.YqNL_uzMIZE">
    <title>Functional Estimation and Change Detection for Nonstationary Time Series: Journal of the American Statistical Association: Vol 0, No 0</title>
    <dc:date>2022-06-11T04:59:50+00:00</dc:date>
    <link>https://amstat.tandfonline.com/doi/full/10.1080/01621459.2021.1969239#.YqNL_uzMIZE</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Tests for structural breaks in time series should ideally be sensitive to breaks in the parameter of interest, while being robust to nuisance changes. Statistical analysis thus needs to allow for some form of nonstationarity under the null hypothesis of no change. In this article, estimators for integrated parameters of locally stationary time series are constructed and a corresponding functional central limit theorem is established, enabling change-point inference for a broad class of parameters under mild assumptions. The proposed framework covers all parameters which may be expressed as nonlinear functions of moments, for example kurtosis, autocorrelation, and coefficients in a linear regression model. To perform feasible inference based on the derived limit distribution, a bootstrap variant is proposed and its consistency is established. The methodology is illustrated by means of a simulation study and by an application to high-frequency asset prices."]]></description>
<dc:subject>to:NB non-stationarity change-point_problem hypothesis_testing re:codename:catherine_wheel</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e517234d0ed3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:non-stationarity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:change-point_problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:codename:catherine_wheel"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.02765">
    <title>[2206.02765] Communication-constrained hypothesis testing: Optimality, robustness, and reverse data processing inequalities</title>
    <dc:date>2022-06-07T14:18:20+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.02765</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study hypothesis testing under communication constraints, where each sample is quantized before being revealed to a statistician. Without communication constraints, it is well known that the sample complexity of simple binary hypothesis testing is characterized by the Hellinger distance between the distributions. We show that the sample complexity of simple binary hypothesis testing under communication constraints is at most a logarithmic factor larger than in the unconstrained setting and this bound is tight. We develop a polynomial-time algorithm that achieves the aforementioned sample complexity. Our framework extends to robust hypothesis testing, where the distributions are corrupted in the total variation distance. Our proofs rely on a new reverse data processing inequality and a reverse Markov inequality, which may be of independent interest. For simple M-ary hypothesis testing, the sample complexity in the absence of communication constraints has a logarithmic dependence on M. We show that communication constraints can cause an exponential blow-up leading to Ω(M) sample complexity even for adaptive algorithms."]]></description>
<dc:subject>to:NB hypothesis_testing information_theory distributed_systems via:mraginsky</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d72c10f5aae3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:distributed_systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:mraginsky"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nber.org/papers/w29702">
    <title>Incentive-Compatible Critical Values | NBER</title>
    <dc:date>2022-02-02T23:02:02+00:00</dc:date>
    <link>https://www.nber.org/papers/w29702</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Statistically significant results are more rewarded than insignificant ones, so researchers have the incentive to pursue statistical significance. Such p-hacking reduces the informativeness of hypothesis tests by making significant results much more common than they are supposed to be in the absence of true significance. To address this problem, we construct critical values of test statistics such that, if these values are used to determine significance, and if researchers optimally respond to these new significance standards, then significant results occur with the desired frequency. Such incentive-compatible critical values allow for p-hacking so they are larger than classical critical values. Using evidence from the social and medical sciences, we find that the incentive-compatible critical value for any test and any significance level is the classical critical value for the same test with approximately one fifth of the significance level—a form of Bonferroni correction. For instance, for a z-test with a significance level of 5%, the incentive-compatible critical value is 2.31 instead of 1.65 if the test is one-sided and 2.57 instead of 1.96 if the test is two-sided."

--- I fail to see how this could possibly be determined without knowing about the search process.]]></description>
<dc:subject>to:NB hypothesis_testing science_as_a_social_process color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e5bd92d30af7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:science_as_a_social_process"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1257/jep.35.3.157">
    <title>Statistical Significance, p-Values, and the Reporting of Uncertainty - American Economic Association</title>
    <dc:date>2021-08-05T02:56:36+00:00</dc:date>
    <link>https://doi.org/10.1257/jep.35.3.157</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The use of statistical significance and p-values has become a matter of substantial controversy in various fields using statistical methods. This has gone as far as some journals banning the use of indicators for statistical significance, or even any reports of p-values, and, in one case, any mention of confidence intervals. I discuss three of the issues that have led to these often-heated debates. First, I argue that in many cases, p-values and indicators of statistical significance do not answer the questions of primary interest. Such questions typically involve making (recommendations on) decisions under uncertainty. In that case, point estimates and measures of uncertainty in the form of confidence intervals or even better, Bayesian intervals, are often more informative summary statistics. In fact, in that case, the presence or absence of statistical significance is essentially irrelevant, and including them in the discussion may confuse the matter at hand. Second, I argue that there are also cases where testing null hypotheses is a natural goal and where p-values are reasonable and appropriate summary statistics. I conclude that banning them in general is counterproductive. Third, I discuss that the overemphasis in empirical work on statistical significance has led to abuse of p-values in the form of p-hacking and publication bias. The use of pre-analysis plans and replication studies, in combination with lowering the emphasis on statistical significance may help address these problems."]]></description>
<dc:subject>to:NB to_read statistics hypothesis_testing confidence_sets decision_theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e1aad5699696/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:confidence_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:decision_theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.14676">
    <title>[2104.14676] Gaussian Universal Likelihood Ratio Testing</title>
    <dc:date>2021-05-30T21:08:43+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.14676</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The likelihood ratio test (LRT) based on the asymptotic chi-squared distribution of the log likelihood is one of the fundamental tools of statistical inference. A recent universal LRT approach based on sample splitting provides valid hypothesis tests and confidence sets in any setting for which we can compute the split likelihood ratio statistic (or, more generally, an upper bound on the null maximum likelihood). The universal LRT is valid in finite samples and without regularity conditions. This test empowers statisticians to construct tests in settings for which no valid hypothesis test previously existed. For the simple but fundamental case of testing the population mean of d-dimensional Gaussian data, the usual LRT itself applies and thus serves as a perfect test bed to compare against the universal LRT. This work presents the first in-depth exploration of the size, power, and relationships between several universal LRT variants. We show that a repeated subsampling approach is the best choice in terms of size and power. We observe reasonable performance even in a high-dimensional setting, where the expected squared radius of the best universal LRT confidence set is approximately 3/2 times the squared radius of the standard LRT-based set. We illustrate the benefits of the universal LRT through testing a non-convex doughnut-shaped null hypothesis, where a universal inference procedure can have higher power than a standard approach."]]></description>
<dc:subject>to:NB hypothesis_testing statistics kith_and_kin ramdas.aaditya balakrishnan.sivaraman wasserman.larry dunn.robin</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8ecc6c9bd8b2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ramdas.aaditya"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:balakrishnan.sivaraman"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dunn.robin"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.08279">
    <title>[2104.08279] Testing for Outliers with Conformal p-values</title>
    <dc:date>2021-04-21T15:02:37+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.08279</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper studies the construction of p-values for nonparametric outlier detection, taking a multiple-testing perspective. The goal is to test whether new independent samples belong to the same distribution as a reference data set or are outliers. We propose a solution based on conformal inference, a broadly applicable framework which yields p-values that are marginally valid but mutually dependent for different test points. We prove these p-values are positively dependent and enable exact false discovery rate control, although in a relatively weak marginal sense. We then introduce a new method to compute p-values that are both valid conditionally on the training data and independent of each other for different test points; this paves the way to stronger type-I error guarantees. Our results depart from classical conformal inference as we leverage concentration inequalities rather than combinatorial arguments to establish our finite-sample guarantees. Furthermore, our techniques also yield a uniform confidence bound for the false positive rate of any outlier detection algorithm, as a function of the threshold applied to its raw statistics. Finally, the relevance of our results is demonstrated by numerical experiments on real and simulated data."]]></description>
<dc:subject>outlier_detection conformal_prediction hypothesis_testing in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a1a96f4211cf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:outlier_detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:conformal_prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2009.00503">
    <title>[2009.00503] Informative Goodness-of-Fit for Multivariate Distributions</title>
    <dc:date>2021-04-16T19:35:35+00:00</dc:date>
    <link>https://arxiv.org/abs/2009.00503</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This article introduces an informative goodness-of-fit (iGOF) approach to study multivariate distributions. When the null model is rejected, iGOF allows us to identify the underlying sources of mismodeling and naturally equips practitioners with additional insights on the nature of the deviations from the true distribution. The informative character of the procedure is achieved by exploiting smooth tests and random fields theory to facilitate the analysis of multivariate data. Simulation studies show that iGOF enjoys high power for different types of alternatives. The methods presented here directly address the problem of background mismodeling arising in physics and astronomy. It is in these areas that the motivation of this work is rooted."

--- From the abstract it sounds like this is using the fact that a Neyman smooth test involves characterizing the departure from the null, which is a basic observation about them but no doubt under-exploited.  (Cf. appendix on such tests in ADAfaEPoV.)]]></description>
<dc:subject>to:NB goodness-of-fit hypothesis_testing density_estimation neyman_smooth_tests model_checking re:ADAfaEPoV to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7de4c3ae2a11/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:density_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neyman_smooth_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_checking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:ADAfaEPoV"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-040720-024710">
    <title>A Review of Empirical Likelihood | Annual Review of Statistics and Its Application</title>
    <dc:date>2021-04-15T14:57:59+00:00</dc:date>
    <link>https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-040720-024710</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Empirical likelihood is a popular nonparametric analog of the usual parametric likelihood, inheriting many of the large-sample properties of the latter construct. This article presents a review of the empirical likelihood approach from its introduction 30 years ago, up to recent theoretical developments. Aspects of computation and connections between empirical likelihood and other likelihood-type quantities are also explored. The article ends with a discussion of some directions for future research."]]></description>
<dc:subject>empirical_likelihood likelihood hypothesis_testing estimation statistics lazar.nicole in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9fb214584349/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:empirical_likelihood"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:likelihood"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lazar.nicole"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2104.03167">
    <title>[2104.03167] Random graphs with node and block effects: models, goodness-of-fit tests, and applications to biological networks</title>
    <dc:date>2021-04-12T17:06:07+00:00</dc:date>
    <link>https://arxiv.org/abs/2104.03167</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Many popular models from the networks literature can be viewed through a common lens. We describe it here and call the class of models log-linear ERGMs. It includes degree-based models, stochastic blockmodels, and combinations of these. Given the interest in combined node and block effects in network formation mechanisms, we introduce a general directed relative of the degree-corrected stochastic blockmodel: an exponential family model we call p1-SBM. It is a generalization of several well-known variants of the blockmodel.
"We study the problem of testing model fit for the log-linear ERGM class.
"The model fitting approach we take, through the use of quick estimation algorithms borrowed from the contingency table literature and effective sampling methods rooted in graph theory and algebraic statistics, results in an exact test whose p-value can be approximated efficiently in networks of moderate sizes.
"We showcase the performance of the method on two data sets from biology: the connectome of \emph{C. elegans} and the interactome of \emph{Arabidopsis thaliana}. These two networks, a neuronal network and a protein-protein interaction network, have been popular examples in the network science literature, but a model-based approach to studying them has been missing thus far."]]></description>
<dc:subject>to:NB stochastic_block_models network_data_analysis estimation exponential_family_random_graphs hypothesis_testing algebraic_statistics goodness-of-fit</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3dc0c1b71041/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:exponential_family_random_graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algebraic_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2103.08402">
    <title>[2103.08402] Valid sequential inference on probability forecast performance</title>
    <dc:date>2021-04-12T03:18:34+00:00</dc:date>
    <link>https://arxiv.org/abs/2103.08402</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Probability forecasts for binary events play a central role in many applications. Their quality is commonly assessed with proper scoring rules, which assign forecasts a numerical score such that a correct forecast achieves a minimal expected score. In this paper, we construct e-values for testing the statistical significance of score differences of competing forecasts in sequential settings. E-values have been proposed as an alternative to p-values for hypothesis testing, and they can easily be transformed into conservative p-values by taking the multiplicative inverse. The e-values proposed in this article are valid in finite samples without any assumptions on the data generating processes. They also allow optional stopping, so a forecast user may decide to interrupt evaluation taking into account the available data at any time and still draw statistically valid inference, which is generally not true for classical p-value based tests. In a case study on postprocessing of precipitation forecasts, state-of-the-art forecasts dominance tests and e-values lead to the same conclusions."]]></description>
<dc:subject>prediction hypothesis_testing scoring_rules in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4b0ebfc8a677/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:scoring_rules"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://ieeexplore.ieee.org/document/9301324">
    <title>Distributed Chernoff Test: Optimal Decision Systems Over Networks | IEEE Journals &amp; Magazine | IEEE Xplore</title>
    <dc:date>2021-04-10T04:26:10+00:00</dc:date>
    <link>https://ieeexplore.ieee.org/document/9301324</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study “active” decision making over sensor networks where the sensors’ sequential probing actions are actively chosen by continuously learning from past observations. We consider two network settings: with and without central coordination. In the first case, the network nodes interact with each other through a central entity, which plays the role of a fusion center. In the second case, the network nodes interact in a fully distributed fashion. In both of these scenarios, we propose sequential and adaptive hypothesis tests extending the classic Chernoff test. We compare the performance of the proposed tests to the optimal sequential test. In the presence of a fusion center, our test achieves the same asymptotic optimality of the Chernoff test, minimizing the risk, expressed by the expected cost required to reach a decision plus the expected cost of making a wrong decision, when the observation cost per unit time tends to zero. The test is also asymptotically optimal in the higher moments of the time required to reach a decision. Additionally, the test is parsimonious in terms of communications, and the expected number of channel uses per network node tends to a small constant. In the distributed setup, our test achieves the same asymptotic optimality of Chernoff’s test, up to a multiplicative constant in terms of both risk and the higher moments of the decision time. Additionally, the test is parsimonious in terms of communications in comparison to state-of-the-art schemes proposed in the literature. The analysis of these tests is also extended to account for message quantization and communication over channels with random erasures."]]></description>
<dc:subject>to:NB hypothesis_testing distributed_systems</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3224565c072d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:distributed_systems"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://jmlr.org/papers/v22/17-570.html">
    <title>On the Optimality of Kernel-Embedding Based Goodness-of-Fit Tests</title>
    <dc:date>2021-04-10T04:21:42+00:00</dc:date>
    <link>https://jmlr.org/papers/v22/17-570.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The reproducing kernel Hilbert space (RKHS) embedding of distributions offers a general and flexible framework for testing problems in arbitrary domains and has attracted considerable amount of attention in recent years. To gain insights into their operating characteristics, we study here the statistical performance of such approaches within a minimax framework. Focusing on the case of goodness-of-fit tests, our analyses show that a vanilla version of the kernel embedding based test could be minimax suboptimal, {when considering χ2χ2 distance as the separation metric}. Hence we suggest a simple remedy by moderating the embedding. We prove that the moderated approach provides optimal tests for a wide range of deviations from the null and can also be made adaptive over a large collection of interpolation spaces. Numerical experiments are presented to further demonstrate the merits of our approach."

]]></description>
<dc:subject>to:NB statistics kernel_methods hilbert_space goodness-of-fit hypothesis_testing minimax</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:48eed823ffac/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hilbert_space"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:minimax"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.pnas.org/content/117/29/16880.short">
    <title>Universal inference | PNAS</title>
    <dc:date>2021-04-09T19:07:06+00:00</dc:date>
    <link>https://www.pnas.org/content/117/29/16880.short</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a general method for constructing confidence sets and hypothesis tests that have finite-sample guarantees without regularity conditions. We refer to such procedures as “universal.” The method is very simple and is based on a modified version of the usual likelihood-ratio statistic that we call “the split likelihood-ratio test” (split LRT) statistic. The (limiting) null distribution of the classical likelihood-ratio statistic is often intractable when used to test composite null hypotheses in irregular statistical models. Our method is especially appealing for statistical inference in these complex setups. The method we suggest works for any parametric model and also for some nonparametric models, as long as computing a maximum-likelihood estimator (MLE) is feasible under the null. Canonical examples arise in mixture modeling and shape-constrained inference, for which constructing tests and confidence sets has been notoriously difficult. We also develop various extensions of our basic methods. We show that in settings when computing the MLE is hard, for the purpose of constructing valid tests and intervals, it is sufficient to upper bound the maximum likelihood. We investigate some conditions under which our methods yield valid inferences under model misspecification. Further, the split LRT can be used with profile likelihoods to deal with nuisance parameters, and it can also be run sequentially to yield anytime-valid P values and confidence sequences. Finally, when combined with the method of sieves, it can be used to perform model selection with nested model classes."]]></description>
<dc:subject>to:NB have_read hypothesis_testing confidence_sets statistics kith_and_kin wasserman.larry ramdas.aaditya to_teach:undergrad-ADA re:HEAS balakrishnan.sivaraman</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5825e245a7b6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:confidence_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ramdas.aaditya"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:undergrad-ADA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:HEAS"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:balakrishnan.sivaraman"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://academic.oup.com/biomet/article-abstract/107/4/771/5875781">
    <title>On testing marginal versus conditional independence | Biometrika | Oxford Academic</title>
    <dc:date>2021-03-16T17:24:01+00:00</dc:date>
    <link>https://academic.oup.com/biomet/article-abstract/107/4/771/5875781</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider testing marginal independence versus conditional independence in a trivariate Gaussian setting. The two models are nonnested, and their intersection is a union of two marginal independences. We consider two sequences of such models, one from each type of independence, that are closest to each other in the Kullback–Leibler sense as they approach the intersection. They become indistinguishable if the signal strength, as measured by the product of two correlation parameters, decreases faster than the standard parametric rate. Under local alternatives at such a rate, we show that the asymptotic distribution of the likelihood ratio depends on where and how the local alternatives approach the intersection. To deal with this nonuniformity, we study a class of envelope distributions by taking pointwise suprema over asymptotic cumulative distribution functions. We show that these envelope distributions are well behaved and lead to model selection procedures with rate-free uniform error guarantees and near-optimal power. To control the error even when the two models are indistinguishable, rather than insist on a dichotomous choice, the proposed procedure will choose either or both models."

--- The "uniform error guarantees" part is really exciting.]]></description>
<dc:subject>to:NB to_read hypothesis_testing dependence_measures richardson.thomas statistics via:richardson.thomas model_selection</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d80a74e1eea1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:richardson.thomas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:richardson.thomas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1611889233">
    <title>Kim , Ramdas , Singh , Wasserman : Classification accuracy as a proxy for two-sample testing</title>
    <dc:date>2021-02-04T15:31:39+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1611889233</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["When data analysts train a classifier and check if its accuracy is significantly different from chance, they are implicitly performing a two-sample test. We investigate the statistical properties of this flexible approach in the high-dimensional setting. We prove two results that hold for all classifiers in any dimensions: if its true error remains ϵϵ-better than chance for some ϵ>0ϵ>0 as d,n→∞d,n→∞, then (a) the permutation-based test is consistent (has power approaching to one), (b) a computationally efficient test based on a Gaussian approximation of the null distribution is also consistent. To get a finer understanding of the rates of consistency, we study a specialized setting of distinguishing Gaussians with mean-difference δδ and common (known or unknown) covariance ΣΣ, when d/n→c∈(0,∞)d/n→c∈(0,∞). We study variants of Fisher’s linear discriminant analysis (LDA) such as “naive Bayes” in a nontrivial regime when ϵ→0ϵ→0 (the Bayes classifier has true accuracy approaching 1/2), and contrast their power with corresponding variants of Hotelling’s test. Surprisingly, the expressions for their power match exactly in terms of nn, dd, δδ, ΣΣ, and the LDA approach is only worse by a constant factor, achieving an asymptotic relative efficiency (ARE) of 1/π‾‾√1/π for balanced samples. We also extend our results to high-dimensional elliptical distributions with finite kurtosis. Other results of independent interest include minimax lower bounds, and the optimality of Hotelling’s test when d=o(n)d=o(n). Simulation results validate our theory, and we present practical takeaway messages along with natural open problems."]]></description>
<dc:subject>to:NB hypothesis_testing two-sample_tests classifiers high-dimensional_statistics heard_the_talk kith_and_kin singh.aarti wasserman.larry ramdas.aaditya</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9a8de542290c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:two-sample_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:high-dimensional_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:singh.aarti"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ramdas.aaditya"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1912.03662">
    <title>[1912.03662] The Binary Expansion Randomized Ensemble Test (BERET)</title>
    <dc:date>2021-01-11T16:30:45+00:00</dc:date>
    <link>https://arxiv.org/abs/1912.03662</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Recently, the binary expansion testing framework was introduced to test the independence of two continuous random variables by utilizing symmetry statistics that are complete sufficient statistics for dependence. We develop a new test based on an ensemble approach that uses the sum of squared symmetry statistics and distance correlation. Simulation studies suggest that this method improves the power while preserving the clear interpretation of the binary expansion testing. We extend this method to tests of independence of random vectors in arbitrary dimension. Through random projections, the proposed binary expansion randomized ensemble test transforms the multivariate independence testing problem into a univariate problem. Simulation studies and data example analyses show that the proposed method provides relatively robust performance compared with existing methods."]]></description>
<dc:subject>to:NB dependence_measures random_projections hypothesis_testing independence_and_condtional_independence_testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3eb4f7e10b8e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_projections"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:independence_and_condtional_independence_testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.14530">
    <title>[2012.14530] On the T-test</title>
    <dc:date>2021-01-03T20:07:48+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.14530</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The T-test is probably the most popular statistical test; it is routinely recommended by the textbooks. The applicability of the test relies upon the validity of normal or Student's approximation to the distribution of Student's statistic tn. However, the latter assumption is not valid as often as assumed. We show that normal or Student's approximation to Ł(tn) does not hold uniformly even in the class n of samples from zero-mean unit-variance bounded distributions. We present lower bounds to the corresponding error. The fact that a non-parametric test is not applicable uniformly to samples from the class n seems to be established for the first time. It means the T-test can be misleading, and should not be recommended in its present form. We suggest a generalisation of the test that allows for variability of possible limiting/approximating distributions to Ł(tn)."

--- This is not a well-written article (in particular there's a lot of repetition), but the basic point about convergence to the limiting Gaussian or Student distribution being non-uniform and potentially very slow is sound.  The non-appearance of the word "bootstrap" in the paper makes me think the author is almost certainly a probabilist rather than a practicing statistician.]]></description>
<dc:subject>to:NB probability hypothesis_testing to_teach:linear_models central_limit_theorem have_skimmed</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e8e6341bc967/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:probability"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:linear_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:central_limit_theorem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.ejs/1609384079">
    <title>Duan , Ramdas , Balakrishnan , Wasserman : Interactive martingale tests for the global null</title>
    <dc:date>2021-01-03T19:46:24+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.ejs/1609384079</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Global null testing is a classical problem going back about a century to Fisher’s and Stouffer’s combination tests. In this work, we present simple martingale analogs of these classical tests, which are applicable in two distinct settings: (a) the online setting in which there is a possibly infinite sequence of pp-values, and (b) the batch setting, where one uses prior knowledge to preorder the hypotheses. Through theory and simulations, we demonstrate that our martingale variants have higher power than their classical counterparts even when the preordering is only weakly informative. Finally, using a recent idea of “masking” pp-values, we develop a novel interactive test for the global null that can take advantage of covariates and repeated user guidance to create a data-adaptive ordering that achieves higher detection power against structured alternatives."]]></description>
<dc:subject>to:NB hypothesis_testing kith_and_kin wasserman.larry ramdas.aaditya balakrishnan.sivaraman martingales</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:eb736278718f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ramdas.aaditya"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:balakrishnan.sivaraman"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:martingales"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.ss/1608541216">
    <title>Gao , Ma : Minimax Rates in Network Analysis: Graphon Estimation, Community Detection and Hypothesis Testing</title>
    <dc:date>2020-12-21T14:11:01+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.ss/1608541216</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper surveys some recent developments in fundamental limits and optimal algorithms for network analysis. We focus on minimax optimal rates in three fundamental problems of network analysis: graphon estimation, community detection and hypothesis testing. For each problem, we review state-of-the-art results in the literature followed by general principles behind the optimal procedures that lead to minimax estimation and testing. This allows us to connect problems in network analysis to other statistical inference problems from a general perspective."]]></description>
<dc:subject>to:NB network_data_analysis graph_limits hypothesis_testing minimax nonparametrics re:smoothing_adjacency_matrices community_discovery to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4146f1114577/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graph_limits"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:minimax"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:nonparametrics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:smoothing_adjacency_matrices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://academic.oup.com/biomet/article-abstract/107/4/791/5856302?redirectedFrom=fulltext">
    <title>Combining p-values via averaging | Biometrika | Oxford Academic</title>
    <dc:date>2020-12-17T01:40:44+00:00</dc:date>
    <link>https://academic.oup.com/biomet/article-abstract/107/4/791/5856302?redirectedFrom=fulltext</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper proposes general methods for the problem of multiple testing of a single hypothesis, with a standard goal of combining a number of pp-values without making any assumptions about their dependence structure. A result by Rüschendorf (1982) and, independently, Meng (1993) implies that the pp-values can be combined by scaling up their arithmetic mean by a factor of 2, and no smaller factor is sufficient in general. A similar result by Mattner about the geometric mean replaces 2 by e. Based on more recent developments in mathematical finance, specifically, robust risk aggregation techniques, we extend these results to generalized means; in particular, we show that KK  pp-values can be combined by scaling up their harmonic mean by a factor of logKlog⁡K asymptotically as KK tends to infinity. This leads to a generalized version of the Bonferroni–Holm procedure. We also explore methods using weighted averages of pp-values. Finally, we discuss the efficiency of various methods of combining pp-values and how to choose a suitable method in light of data and prior information."]]></description>
<dc:subject>to:NB multiple_testing meta-analysis hypothesis_testing statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:29adc59c22f3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:multiple_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:meta-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nature.com/articles/s41562-020-0844-7">
    <title>Neutral syndrome | Nature Human Behaviour</title>
    <dc:date>2020-12-15T15:17:10+00:00</dc:date>
    <link>https://www.nature.com/articles/s41562-020-0844-7</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Neutral models of evolution assume the absence of natural selection. Formerly confined to ecology and evolutionary biology, neutral models are spreading. In recent years they’ve been applied to explaining the diversity of baby names, scientific citations, cryptocurrencies, pot decorations, literary lexica, tumour variants and much more besides. Here, we survey important neutral models and highlight their similarities. We investigate the most widely used tests of neutrality, show that they are weak and suggest more powerful methods. We conclude by discussing the role of neutral models in the explanation of diversity. We suggest that the ability of neutral models to fit low-information distributions should not be taken as evidence for the absence of selection. Nevertheless, many studies, in increasingly diverse fields, make just such claims. We call this tendency ‘neutral syndrome’."

--- Comments in advance of reading: surely the right approach here is to embed the neutral model in a larger model which does include selection/adaptation, and form a confidence interval for the strength of adaptation.  To put it in the language of my guru Deborah Mayo, if neutral models pass severe tests, that's not evidence _for_ adaptation.

]]></description>
<dc:subject>to:NB to_read neutral_models social_science_methodology hypothesis_testing via:rvenkat evolution</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:68bbd7f3fb0f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neutral_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_science_methodology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:rvenkat"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:evolution"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2002.10399">
    <title>[2002.10399] Confidence Sets and Hypothesis Testing in a Likelihood-Free Inference Setting</title>
    <dc:date>2020-12-13T23:29:48+00:00</dc:date>
    <link>https://arxiv.org/abs/2002.10399</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Parameter estimation, statistical tests and confidence sets are the cornerstones of classical statistics that allow scientists to make inferences about the underlying process that generated the observed data. A key question is whether one can still construct hypothesis tests and confidence sets with proper coverage and high power in a so-called likelihood-free inference (LFI) setting; that is, a setting where the likelihood is not explicitly known but one can forward-simulate observable data according to a stochastic model. In this paper, we present 𝙰𝙲𝙾𝚁𝙴 (Approximate Computation via Odds Ratio Estimation), a frequentist approach to LFI that first formulates the classical likelihood ratio test (LRT) as a parametrized classification problem, and then uses the equivalence of tests and confidence sets to build confidence regions for parameters of interest. We also present a goodness-of-fit procedure for checking whether the constructed tests and confidence regions are valid. 𝙰𝙲𝙾𝚁𝙴 is based on the key observation that the LRT statistic, the rejection probability of the test, and the coverage of the confidence set are conditional distribution functions which often vary smoothly as a function of the parameters of interest. Hence, instead of relying solely on samples simulated at fixed parameter settings (as is the convention in standard Monte Carlo solutions), one can leverage machine learning tools and data simulated in the neighborhood of a parameter to improve estimates of quantities of interest. We demonstrate the efficacy of 𝙰𝙲𝙾𝚁𝙴 with both theoretical and empirical results. Our implementation is available on Github."]]></description>
<dc:subject>have_read heard_the_talk approved_the_thesis_proposal simulation-based_inference lee.ann_b. izbicki.rafael dalmasso.niccolo statistics confidence_sets hypothesis_testing in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f4683e889416/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:approved_the_thesis_proposal"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:simulation-based_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lee.ann_b."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:izbicki.rafael"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dalmasso.niccolo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:confidence_sets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2012.05784">
    <title>[2012.05784] Detecting Structured Signals in Ising Models</title>
    <dc:date>2020-12-12T19:58:07+00:00</dc:date>
    <link>https://arxiv.org/abs/2012.05784</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper, we study the effect of dependence on detecting a class of signals in Ising models, where the signals are present in a structured way. Examples include Ising Models on lattices, and Mean-Field type Ising Models (Erdős-Rényi, Random regular, and dense graphs). Our results rely on correlation decay and mixing type behavior for Ising Models, and demonstrate the beneficial behavior of criticality in the detection of strictly lower signals. As a by-product of our proof technique, we develop sharp control on mixing and spin-spin correlation for several Mean-Field type Ising Models in all regimes of temperature -- which might be of independent interest."]]></description>
<dc:subject>to:NB ising_model hypothesis_testing random_fields</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:aca767ba90f5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ising_model"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_fields"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1607677236">
    <title>Karmakar , Small : Assessment of the extent of corroboration of an elaborate theory of a causal hypothesis using partial conjunctions of evidence factors</title>
    <dc:date>2020-12-11T17:32:24+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1607677236</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["An elaborate theory of predictions of a causal hypothesis consists of several falsifiable statements derived from the causal hypothesis. Statistical tests for the various pieces of the elaborate theory help to clarify how much the causal hypothesis is corroborated. In practice, the degree of corroboration of the causal hypothesis has been assessed by a verbal description of which of the several tests provides evidence for which of the several predictions. This verbal approach can miss quantitative patterns. In this paper, we develop a quantitative approach. We first decompose these various tests of the predictions into independent factors with different sources of potential biases. Support for the causal hypothesis is enhanced when many of these evidence factors support the predictions. A sensitivity analysis is used to assess the potential bias that could make the finding of the tests spurious. Along with this multiparameter sensitivity analysis, we consider the partial conjunctions of the tests. These partial conjunctions quantify the evidence supporting various fractions of the collection of predictions. A partial conjunction test involves combining tests of the components in the partial conjunction. We find the asymptotically optimal combination of tests in the context of a sensitivity analysis. Our analysis of an elaborate theory of a causal hypothesis controls for the familywise error rate."]]></description>
<dc:subject>to:NB causal_inference statistics hypothesis_testing model_checking small.dylan</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1f0374e8b6e0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_checking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:small.dylan"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1111/jtsa.12554">
    <title>Robust discrimination between long‐range dependence and a change in mean - Gerstenberger - 2021 - Journal of Time Series Analysis - Wiley Online Library</title>
    <dc:date>2020-12-10T12:23:25+00:00</dc:date>
    <link>https://doi.org/10.1111/jtsa.12554</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this article we introduce a robust to outliers Wilcoxon change‐point testing procedure, for distinguishing between short‐range dependent time series with a change in mean at unknown time and stationary long‐range dependent time series. We establish the asymptotic distribution of the test statistic under the null hypothesis for L1 near epoch dependent processes and show its consistency under the alternative. The Wilcoxon‐type testing procedure similarly as the CUSUM‐type testing procedure (of Berkes I., Horváth L., Kokoszka P. and Shao Q. 2006. Ann.Statist. 34:1140–1165), requires estimation of the location of a possible change‐point, and then using pre‐ and post‐break subsamples to discriminate between short and long‐range dependence. A simulation study examines the empirical size and power of the Wilcoxon‐type testing procedure in standard cases and with disturbances by outliers. It shows that in standard cases the Wilcoxon‐type testing procedure behaves equally well as the CUSUM‐type testing procedure but outperforms it in presence of outliers. We also apply both testing procedure to hydrologic data."]]></description>
<dc:subject>to:NB time_series change-point_problem long-range_dependence statistics hypothesis_testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8d49b1722b99/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:change-point_problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:long-range_dependence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2007.04727">
    <title>[2007.04727] Supplemental Studies for Simultaneous Goodness-of-Fit Testing</title>
    <dc:date>2020-12-07T15:28:30+00:00</dc:date>
    <link>https://arxiv.org/abs/2007.04727</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Testing to see whether a given data set comes from some specified distribution is among the oldest types of problems in Statistics. Many such tests have been developed and their performance studied. The general result has been that while a certain test might perform well, aka have good power, in one situation it will fail badly in others. This is not a surprise given the great many ways in which a distribution can differ from the one specified in the null hypothesis. It is therefore very difficult to decide a priori which test to use. The obvious solution is not to rely on any one test but to run several of them. This however leads to the problem of simultaneous inference, that is, if several tests are done even if the null hypothesis were true, one of them is likely to reject it anyway just by random chance. In this paper we present a method that yields a p value that is uniform under the null hypothesis no matter how many tests are run. This is achieved by adjusting the p value via simulation. While this adjustment method is not new, it has not previously been used in the context of goodness-of-fit testing. We present a number of simulation studies that show the uniformity of the p value and others that show that this test is superior to any one test if the power is averaged over a large number of cases."]]></description>
<dc:subject>to:NB hypothesis_testing multiple_testing goodness-of-fit statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:49a48e200b24/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:multiple_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2009.09440">
    <title>[2009.09440] The Significance Filter, the Winner's Curse and the Need to Shrink</title>
    <dc:date>2020-12-07T04:45:41+00:00</dc:date>
    <link>https://arxiv.org/abs/2009.09440</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The "significance filter" refers to focusing exclusively on statistically significant results. Since frequentist properties such as unbiasedness and coverage are valid only before the data have been observed, there are no guarantees if we condition on significance. In fact, the significance filter leads to overestimation of the magnitude of the parameter, which has been called the "winner's curse". It can also lead to undercoverage of the confidence interval. Moreover, these problems become more severe if the power is low. While these issues clearly deserve our attention, they have been studied only informally and mathematical results are lacking. Here we study them from the frequentist and the Bayesian perspective. We prove that the relative bias of the magnitude is a decreasing function of the power and that the usual confidence interval undercovers when the power is less than 50%. We conclude that failure to apply the appropriate amount of shrinkage can lead to misleading inferences."]]></description>
<dc:subject>to:NB meta-analysis hypothesis_testing via:?</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:afaca6b1e98e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:meta-analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1809.04587">
    <title>[1809.04587] Distributed Chernoff Test: Optimal decision systems over networks</title>
    <dc:date>2020-11-30T03:52:01+00:00</dc:date>
    <link>https://arxiv.org/abs/1809.04587</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study "active" decision making over sensor networks where the sensors' sequential probing actions are actively chosen by continuously learning from past observations. We consider two network settings: with and without central coordination. In the first case, the network nodes interact with each other through a central entity, which plays the role of a fusion center. In the second case, the network nodes interact in a fully distributed fashion. In both of these scenarios, we propose sequential and adaptive hypothesis tests extending the classic Chernoff test. We compare the performance of the proposed tests to the optimal sequential test. In the presence of a fusion center, our test achieves the same asymptotic optimality of the Chernoff test, minimizing the risk, expressed by the expected cost required to reach a decision plus the expected cost of making a wrong decision, when the observation cost per unit time tends to zero. The test is also asymptotically optimal in the higher moments of the time required to reach a decision. Additionally, the test is parsimonious in terms of communications, and the expected number of channel uses per network node tends to a small constant. In the distributed setup, our test achieves the same asymptotic optimality of Chernoff's test, up to a multiplicative constant in terms of both risk and the higher moments of the decision time. Additionally, the test is parsimonious in terms of communications in comparison to state-of-the-art schemes proposed in the literature. The analysis of these tests is also extended to account for message quantization and communication over channels with random erasures."]]></description>
<dc:subject>to:NB hypothesis_testing distributed_systems active_learning experimental_design statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c4ddfaccc30b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:distributed_systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:active_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experimental_design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1903.11117">
    <title>[1903.11117] Testing for Differences in Stochastic Network Structure</title>
    <dc:date>2020-11-25T14:52:28+00:00</dc:date>
    <link>https://arxiv.org/abs/1903.11117</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["How can one determine whether a community-level treatment, such as the introduction of a social program or trade shock, alters agents' incentives to form links in a network? This paper proposes analogues of a two-sample Kolmogorov-Smirnov test, widely used in the literature to test the null hypothesis of "no treatment effects", for network data. It first specifies a testing problem in which the null hypothesis is that two networks are drawn from the same random graph model. It then describes two randomization tests based on the magnitude of the difference between the networks' adjacency matrices as measured by the 2→2 and ∞→1 operator norms. Power properties of the tests are examined analytically, in simulation, and through two real-world applications. A key finding is that the test based on the ∞→1 norm can be substantially more powerful than that based on the 2→2 norm for the kinds of sparse and degree-heterogeneous networks common in economics."]]></description>
<dc:subject>to:NB network_data_analysis re:network_differences two-sample_tests hypothesis_testing to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ba62b7e5b0dc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:two-sample_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2008.03971">
    <title>[2008.03971] A Note on Likelihood Ratio Tests for Models with Latent Variables</title>
    <dc:date>2020-11-25T14:35:28+00:00</dc:date>
    <link>https://arxiv.org/abs/2008.03971</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The likelihood ratio test (LRT) is widely used for comparing the relative fit of nested latent variable models. Following Wilks' theorem, the LRT is conducted by comparing the LRT statistic with its asymptotic distribution under the restricted model, a χ2-distribution with degrees of freedom equal to the difference in the number of free parameters between the two nested models under comparison. For models with latent variables such as factor analysis, structural equation models and random effects models, however, it is often found that the χ2 approximation does not hold. In this note, we show how the regularity conditions of Wilks' theorem may be violated using three examples of models with latent variables. In addition, a more general theory for LRT is given that provides the correct asymptotic theory for these LRTs. This general theory was first established in Chernoff (1954) and discussed in both van der Vaart (2000) and Drton (2009), but it does not seem to have received enough attention. We illustrate this general theory with the three examples."]]></description>
<dc:subject>to:NB factor_analysis mixture_models inference_to_latent_objects hypothesis_testing likelihood statistics re:HEAS</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4d387d3303d2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:factor_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mixture_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:likelihood"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:HEAS"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-031219-041051">
    <title>Statistical Significance | Annual Review of Statistics and Its Application</title>
    <dc:date>2020-11-19T20:03:23+00:00</dc:date>
    <link>https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-031219-041051</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A broad review is given of the role of statistical significance tests in the analysis of empirical data. Four main types of application are outlined. The first, conceptually quite different from the others, concerns decision making in such contexts as medical screening and industrial inspection. The others assess the security of conclusions. The article concludes with an outline discussion of some more specialized points."

]]></description>
<dc:subject>to:NB statistics hypothesis_testing goodness-of-fit cox.d.r. to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cf10bd385748/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cox.d.r."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1600480925">
    <title>Fauß , Zoubir , Poor : Minimax optimal sequential hypothesis tests for Markov processes</title>
    <dc:date>2020-11-19T05:30:29+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1600480925</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Under mild Markov assumptions, sufficient conditions for strict minimax optimality of sequential tests for multiple hypotheses under distributional uncertainty are derived. First, the design of optimal sequential tests for simple hypotheses is revisited, and it is shown that the partial derivatives of the corresponding cost function are closely related to the performance metrics of the underlying sequential test. Second, an implicit characterization of the least favorable distributions for a given testing policy is stated. By combining the results on optimal sequential tests and least favorable distributions, sufficient conditions for a sequential test to be minimax optimal under general distributional uncertainties are obtained. The cost function of the minimax optimal test is further identified as a generalized ff-dissimilarity and the least favorable distributions as those that are most similar with respect to this dissimilarity. Numerical examples for minimax optimal sequential tests under different uncertainties illustrate the theoretical results."]]></description>
<dc:subject>to:NB hypothesis_testing markov_models stochastic_processes statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:db42efc7de75/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/euclid.aos/1594972828">
    <title>Shah , Peters : The hardness of conditional independence testing and the generalised covariance measure</title>
    <dc:date>2020-11-18T21:50:00+00:00</dc:date>
    <link>https://projecteuclid.org/euclid.aos/1594972828</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["It is a common saying that testing for conditional independence, that is, testing whether whether two random vectors XX and YY are independent, given ZZ, is a hard statistical problem if ZZ is a continuous random variable (or vector). In this paper, we prove that conditional independence is indeed a particularly difficult hypothesis to test for. Valid statistical tests are required to have a size that is smaller than a pre-defined significance level, and different tests usually have power against a different class of alternatives. We prove that a valid test for conditional independence does not have power against any alternative.
"Given the nonexistence of a uniformly valid conditional independence test, we argue that tests must be designed so their suitability for a particular problem may be judged easily. To address this need, we propose in the case where XX and YY are univariate to nonlinearly regress XX on ZZ, and YY on ZZ and then compute a test statistic based on the sample covariance between the residuals, which we call the generalised covariance measure (GCM). We prove that validity of this form of test relies almost entirely on the weak requirement that the regression procedures are able to estimate the conditional means XX given ZZ, and YY given ZZ, at a slow rate. We extend the methodology to handle settings where XX and YY may be multivariate or even high dimensional. While our general procedure can be tailored to the setting at hand by combining it with any regression technique, we develop the theoretical guarantees for kernel ridge regression. A simulation study shows that the test based on GCM is competitive with state of the art conditional independence tests. Code is available as the R package 𝙶𝚎𝚗𝚎𝚛𝚊𝚕𝚒𝚜𝚎𝚍𝙲𝚘𝚟𝚊𝚛𝚒𝚊𝚗𝚌𝚎𝙼𝚎𝚊𝚜𝚞𝚛𝚎GeneralisedCovarianceMeasure on CRAN."]]></description>
<dc:subject>to:NB dependence_measures hypothesis_testing causal_discovery statistics peters.jonas independence_and_conditional_independence_testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9081e4cded17/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:peters.jonas"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:independence_and_conditional_independence_testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://ieeexplore.ieee.org/document/8782628">
    <title>Testing Ising Models - IEEE Journals &amp; Magazine</title>
    <dc:date>2020-11-16T17:00:33+00:00</dc:date>
    <link>https://ieeexplore.ieee.org/document/8782628</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Given samples from an unknown multivariate distribution p, is it possible to distinguish whether p is the product of its marginals versus p being far from every product distribution? Similarly, is it possible to distinguish whether p equals a given distribution q versus p and q being far from each other? These problems of testing independence and goodnessof-fit have received enormous attention in statistics, information theory, and theoretical computer science, with sample-optimal algorithms known in several interesting regimes of parameters. Unfortunately, it has also been understood that these problems become intractable in large dimensions, necessitating exponential sample complexity. Motivated by the exponential lower bounds for general distributions as well as the ubiquity of Markov random fields (MRFs) in the modeling of high-dimensional distributions, we initiate the study of distribution testing on structured multivariate distributions, and in particular, the prototypical example of MRFs: the Ising Model. We demonstrate that, in this structured setting, we can avoid the curse of dimensionality, obtaining sample, and time efficient testers for independence and goodness-of-fit. One of the key technical challenges we face along the way is bounding the variance of functions of the Ising model."]]></description>
<dc:subject>to:NB random_fields hypothesis_testing statistics ising_model</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f131bb3148ca/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_fields"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:ising_model"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://ieeexplore.ieee.org/document/8804234">
    <title>Semidefinite Tests for Latent Causal Structures - IEEE Journals &amp; Magazine</title>
    <dc:date>2020-11-16T16:04:42+00:00</dc:date>
    <link>https://ieeexplore.ieee.org/document/8804234</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Testing whether a probability distribution is compatible with a given Bayesian network is a fundamental task in the field of causal inference, where Bayesian networks model causal relations. Here we consider the class of causal structures where all correlations between observed quantities are solely due to the influence from latent variables. We show that each model of this type imposes a certain signature on the observable covariance matrix in terms of a particular decomposition into positive semidefinite components. This signature, and thus the underlying hypothetical latent structure, can be tested in a computationally efficient manner via semidefinite programming. This stands in stark contrast with the algebraic geometric tools required if the full observable probability distribution is taken into account. The semidefinite test is compared with tests based on entropic inequalities."]]></description>
<dc:subject>to:NB causal_discovery graphical_models statistics hypothesis_testing inference_to_latent_objects</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9caefd16483a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphical_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1111/sjos.12450">
    <title>Clustering with statistical error control - Vogt - - Scandinavian Journal of Statistics - Wiley Online Library</title>
    <dc:date>2020-11-15T20:51:15+00:00</dc:date>
    <link>https://doi.org/10.1111/sjos.12450</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This article presents a clustering approach that allows for rigorous statistical error control similar to a statistical test. We develop estimators for both the unknown number of clusters and the clusters themselves. The estimators depend on a tuning parameter α which is similar to the significance level of a statistical hypothesis test. By choosing α, one can control the probability of overestimating the true number of clusters, while the probability of underestimation is asymptotically negligible. In addition, the probability that the estimated clusters differ from the true ones is controlled. In the theoretical part of the article, formal versions of these statements on statistical error control are derived in a baseline model with convex clusters. A simulation study and two applications to temperature and gene expression microarray data complement the theoretical analysis."]]></description>
<dc:subject>to:NB clustering hypothesis_testing statistics to_read to_teach:data-mining</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c79397a7e54a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data-mining"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0235318">
    <title>Systematic review of the use of “magnitude-based inference” in sports science and medicine</title>
    <dc:date>2020-07-13T16:43:55+00:00</dc:date>
    <link>https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0235318</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Magnitude-based inference (MBI) is a controversial statistical method that has been used in hundreds of papers in sports science despite criticism from statisticians. To better understand how this method has been applied in practice, we systematically reviewed 232 papers that used MBI. We extracted data on study design, sample size, and choice of MBI settings and parameters. Median sample size was 10 per group (interquartile range, IQR: 8–15) for multi-group studies and 14 (IQR: 10–24) for single-group studies; few studies reported a priori sample size calculations (15%). Authors predominantly applied MBI’s default settings and chose “mechanistic/non-clinical” rather than “clinical” MBI even when testing clinical interventions (only 16 studies out of 232 used clinical MBI). Using these data, we can estimate the Type I error rates for the typical MBI study. Authors frequently made dichotomous claims about effects based on the MBI criterion of a “likely” effect and sometimes based on the MBI criterion of a “possible” effect. When the sample size is n = 8 to 15 per group, these inferences have Type I error rates of 12%-22% and 22%-45%, respectively. High Type I error rates were compounded by multiple testing: Authors reported results from a median of 30 tests related to outcomes; and few studies specified a primary outcome (14%). We conclude that MBI has promoted small studies, promulgated a “black box” approach to statistics, and led to numerous papers where the conclusions are not supported by the data. Amidst debates over the role of p-values and significance testing in science, MBI also provides an important natural experiment: we find no evidence that moving researchers away from p-values or null hypothesis significance testing makes them less prone to dichotomization or over-interpretation of findings."

--- I hadn't heard of this particular little cult, but sheesh.  (The last sentence of the abstract it the key.)]]></description>
<dc:subject>to:NB have_read bad_data_analysis statistics why_oh_why_cant_we_have_a_better_academic_publishing_system hypothesis_testing estimation trapped_in_plutos_republic</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:47bc87dbe9e7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bad_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:why_oh_why_cant_we_have_a_better_academic_publishing_system"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:trapped_in_plutos_republic"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=917901">
    <title>Empirical Likelihood Methods in Econometrics: Theory and Practice by Yuichi Kitamura :: SSRN</title>
    <dc:date>2020-05-16T18:01:29+00:00</dc:date>
    <link>https://papers.ssrn.com/sol3/papers.cfm?abstract_id=917901</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Recent developments in empirical likelihood (EL) methods are reviewed. First, to put the method in perspective, two interpretations of empirical likelihood are presented, one as a nonparametric maximum likelihood estimation method (NPMLE) and the other as a generalized minimum contrast estimator (GMC). The latter interpretation provides a clear connection between EL, GMM, GEL and other related estimators. Second, EL is shown to have various advantages over other methods. The theory of large deviations demonstrates that EL emerges naturally in achieving asymptotic optimality both for estimation and testing. Interestingly, higher order asymptotic analysis also suggests that EL is generally a preferred method. Third, extensions of EL are discussed in various settings, including estimation of conditional moment restriction models, nonparametric specification testing and time series models. Finally, practical issues in applying EL to real data, such as computational algorithms for EL, are discussed. Numerical examples to illustrate the efficacy of the method are presented."]]></description>
<dc:subject>to:NB statistics estimation hypothesis_testing likelihood empirical_likelihood large_deviations re:HEAS</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d0c1ac40de03/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:likelihood"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:empirical_likelihood"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:large_deviations"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:HEAS"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1111/1368-423X.t01-1-00071">
    <title>Model selection tests for nonlinear dynamic models | The Econometrics Journal | Oxford Academic</title>
    <dc:date>2020-05-16T18:00:25+00:00</dc:date>
    <link>https://doi.org/10.1111/1368-423X.t01-1-00071</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper generalizes Vuong (1989) asymptotically normal tests for model selection in several important directions. First, it allows for incompletely parametrized models such as econometric models defined by moment conditions. Second, it allows for a broad class of estimation methods that includes most estimators currently used in practice. Third, it considers model selection criteria other than the models’ likelihoods such as the mean squared errors of prediction. Fourth, the proposed tests are applicable to possibly misspecified nonlinear dynamic models with weakly dependent heterogeneous data. Cases where the estimation methods optimize the model selection criteria are distinguished from cases where they do not. We also consider the estimation of the asymptotic variance of the difference between the competing models’ selection criteria, which is necessary to our tests. Finally, we discuss conditions under which our tests are valid. It is seen that the competing models must be essentially nonnested."]]></description>
<dc:subject>have_read to_reread model_selection hypothesis_testing statistics misspecification time_series statistical_inference_for_stochastic_processes vuong.quang to_teach:data_over_space_and_time re:HEAS in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:513c1caa4d37/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_reread"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:misspecification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistical_inference_for_stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:vuong.quang"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:HEAS"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2001.03039">
    <title>[2001.03039] Minimax Optimal Conditional Independence Testing</title>
    <dc:date>2020-01-30T23:50:41+00:00</dc:date>
    <link>https://arxiv.org/abs/2001.03039</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We consider the problem of conditional independence testing of X and Y given Z where X,Y and Z are three real random variables and Z is continuous. We focus on two main cases -- when X and Y are both discrete, and when X and Y are both continuous. In view of recent results on conditional independence testing (Shah and Peters 2018), one cannot hope to design non-trivial tests, which control the type I error for all absolutely continuous conditionally independent distributions, while still ensuring power against interesting alternatives. Consequently, we identify various, natural smoothness assumptions on the conditional distributions of X,Y|Z=z as z varies in the support of Z, and study the hardness of conditional independence testing under these smoothness assumptions. We derive matching lower and upper bounds on the critical radius of separation between the null and alternative hypotheses in the total variation metric. The tests we consider are easily implementable and rely on binning the support of the continuous variable Z. To complement these results, we provide a new proof of the hardness result of Shah and Peters and show that in the absence of smoothness assumptions conditional independence testing remains difficult even when X,Y are discrete variables of finite (and not scaling with the sample-size) support."]]></description>
<dc:subject>to:NB kith_and_kin dependence_measures hypothesis_testing statistics wasserman.larry neykov.matey balakrishnan.sivaraman</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e2ba29bee637/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:wasserman.larry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neykov.matey"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:balakrishnan.sivaraman"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://ieeexplore.ieee.org/document/8736279">
    <title>Anonymous Heterogeneous Distributed Detection: Optimal Decision Rules, Error Exponents, and the Price of Anonymity - IEEE Journals &amp; Magazine</title>
    <dc:date>2019-10-24T14:29:16+00:00</dc:date>
    <link>https://ieeexplore.ieee.org/document/8736279</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We explore the fundamental limits of heterogeneous distributed detection in an anonymous sensor network with $n$ sensors and a single fusion center. The fusion center collects the single observation from each of the $n$ sensors to detect a binary parameter. The sensors are clustered into multiple groups, and different groups follow different distributions under a given hypothesis. The key challenge for the fusion center is the anonymity of sensors—although it knows the exact number of sensors and the distribution of observations in each group, it does not know which group each sensor belongs to. It is hence natural to consider it as a composite hypothesis testing problem. First, we propose an optimal test called mixture likelihood ratio test , which is a randomized threshold test based on the ratio of the uniform mixture of all the possible distributions under one hypothesis to that under the other hypothesis. Optimality is shown by first arguing that there exists an optimal test that is symmetric , that is, it does not depend on the order of observations across the sensors, and then proving that the mixture likelihood ratio test is optimal among all symmetric tests. Second, we focus on the Neyman–Pearson setting and characterize the error exponent of the worst-case type-II error probability as $n$ tends to infinity, assuming the number of sensors in each group is proportional to $n$ . Finally, we generalize our result to find the collection of all achievable type-I and type-II error exponents, showing that the boundary of the region can be obtained by solving an optimization problem. Our results elucidate the price of anonymity in heterogeneous distributed detection, and can be extended to $M$ -ary hypothesis testing with heterogeneous observations generated according to hidden latent variables."]]></description>
<dc:subject>to:NB hypothesis_testing distributed_systems statistics information_theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1c2d0aec4d4a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:distributed_systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.08883">
    <title>[1910.08883] The Exact Equivalence of Independence Testing and Two-Sample Testing</title>
    <dc:date>2019-10-22T13:44:52+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.08883</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Testing independence and testing equality of distributions are two tightly related statistical hypotheses. Several distance and kernel-based statistics are recently proposed to achieve universally consistent testing for either hypothesis. On the distance side, the distance correlation is proposed for independence testing, and the energy statistic is proposed for two-sample testing. On the kernel side, the Hilbert-Schmidt independence criterion is proposed for independence testing and the maximum mean discrepancy is proposed for two-sample testing. In this paper, we show that two-sample testing are special cases of independence testing via an auxiliary label vector, and prove that distance correlation is exactly equivalent to the energy statistic in terms of the population statistic, the sample statistic, and the testing p-value via permutation test. The equivalence can be further generalized to K-sample testing and extended to the kernel regime. As a consequence, it suffices to always use an independence statistic to test equality of distributions, which enables better interpretability of the test statistic and more efficient testing."]]></description>
<dc:subject>to:NB two-sample_tests dependence_measures statistics hypothesis_testing</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:348b88959e6d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:two-sample_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://rss.onlinelibrary.wiley.com/doi/full/10.1111/rssb.12340">
    <title>The conditional permutation test for independence while controlling for confounders - Berrett - - Journal of the Royal Statistical Society: Series B (Statistical Methodology) - Wiley Online Library</title>
    <dc:date>2019-10-22T13:28:01+00:00</dc:date>
    <link>https://rss.onlinelibrary.wiley.com/doi/full/10.1111/rssb.12340</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a general new method, the conditional permutation test, for testing the conditional independence of variables X and Y given a potentially high dimensional random vector Z that may contain confounding factors. The test permutes entries of X non‐uniformly, to respect the existing dependence between X and Z and thus to account for the presence of these confounders. Like the conditional randomization test of Candès and co‐workers in 2018, our test relies on the availability of an approximation to the distribution of X|Z—whereas their test uses this estimate to draw new X‐values, for our test we use this approximation to design an appropriate non‐uniform distribution on permutations of the X‐values already seen in the true data. We provide an efficient Markov chain Monte Carlo sampler for the implementation of our method and establish bounds on the type I error in terms of the error in the approximation of the conditional distribution of X|Z, finding that, for the worst‐case test statistic, the inflation in type I error of the conditional permutation test is no larger than that of the conditional randomization test. We validate these theoretical results with experiments on simulated data and on the Capital Bikeshare data set."]]></description>
<dc:subject>to:NB dependence_measures hypothesis_testing statistics samworth.richard_j.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:af2e30e28e98/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:samworth.richard_j."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1910.01692">
    <title>[1910.01692] Algebraic statistics, tables, and networks: The Fienberg advantage</title>
    <dc:date>2019-10-11T22:17:53+00:00</dc:date>
    <link>https://arxiv.org/abs/1910.01692</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Stephen Fienberg's affinity for contingency table problems and reinterpreting models with a fresh look gave rise to a new approach for hypothesis testing of network models that are linear exponential families. We outline his vision and influence in this fundamental problem, as well as generalizations to multigraphs and hypergraphs."]]></description>
<dc:subject>to:NB statistics network_data_analysis hypothesis_testing fienberg.stephen_e.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:dd3a575c9e7c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:fienberg.stephen_e."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.13031">
    <title>[1909.13031] Nonzero-sum Adversarial Hypothesis Testing Games</title>
    <dc:date>2019-10-01T17:13:55+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.13031</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study nonzero-sum hypothesis testing games that arise in the context of adversarial classification, in both the Bayesian as well as the Neyman-Pearson frameworks. We first show that these games admit mixed strategy Nash equilibria, and then we examine some interesting concentration phenomena of these equilibria. Our main results are on the exponential rates of convergence of classification errors at equilibrium, which are analogous to the well-known Chernoff-Stein lemma and Chernoff information that describe the error exponents in the classical binary hypothesis testing problem, but with parameters derived from the adversarial model. The results are validated through numerical experiments."]]></description>
<dc:subject>to:NB low-regret_learning hypothesis_testing statistics information_theory</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a6d033616ec5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:low-regret_learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.13464">
    <title>[1909.13464] Network Differential Connectivity Analysis</title>
    <dc:date>2019-10-01T16:17:41+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.13464</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Identifying differences in networks has become a canonical problem in many biological applications. Here, we focus on testing whether two Gaussian graphical models are the same. Existing methods try to accomplish this goal by either directly comparing their estimated structures, or testing the null hypothesis that the partial correlation matrices are equal. However, estimation approaches do not provide measures of uncertainty, e.g., p-values, which are crucial in drawing scientific conclusions. On the other hand, existing testing approaches could lead to misleading results in some cases. To address these shortcomings, we propose a qualitative hypothesis testing framework, which tests whether the connectivity patterns in the two networks are the same. Our framework is especially appropriate if the goal is to identify nodes or edges that are differentially connected. No existing approach could test such hypotheses and provide corresponding measures of uncertainty, e.g., p-values. We investigate theoretical and numerical properties of our proposal and illustrate its utility in biological applications. Theoretically, we show that under appropriate conditions, our proposal correctly controls the type-I error rate in testing the qualitative hypothesis. Empirically, we demonstrate the performance of our proposal using simulation datasets and applications in cancer genetics and brain imaging studies."]]></description>
<dc:subject>to:NB network_data_analysis hypothesis_testing two-sample_tests statistics re:network_differences</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:612fcc0d1d37/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:two-sample_tests"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1902.00080">
    <title>[1902.00080] Minimax Testing of Identity to a Reference Ergodic Markov Chain</title>
    <dc:date>2019-09-26T18:19:59+00:00</dc:date>
    <link>https://arxiv.org/abs/1902.00080</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We exhibit an efficient procedure for testing, based on a single long state sequence, whether an unknown Markov chain is identical to or ε-far from a given reference chain. We obtain nearly matching (up to logarithmic factors) upper and lower sample complexity bounds for our notion of distance, which is based on total variation. Perhaps surprisingly, we discover that the sample complexity depends solely on the properties of the known reference chain and does not involve the unknown chain at all, which is not even assumed to be ergodic."]]></description>
<dc:subject>to:NB time_series markov_models hypothesis_testing goodness-of-fit kontorovich.aryeh kith_and_kin minimax</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:da3ba3029e00/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kontorovich.aryeh"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:minimax"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1904.04052">
    <title>[1904.04052] Practical tests for significance in Markov Chains</title>
    <dc:date>2019-09-15T14:47:32+00:00</dc:date>
    <link>https://arxiv.org/abs/1904.04052</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We give qualitative and quantitative improvements to theorems which enable significance testing in Markov Chains, with a particular eye toward the goal of enabling strong, interpretable, and statistically rigorous claims of political gerrymandering. Our results can be used to demonstrate at a desired significance level that a given Markov Chain state (e.g., a districting) is extremely unusual (rather than just atypical) with respect to the fragility of its characteristics in the chain. We also provide theorems specialized to leverage quantitative improvements when there is a product structure in the underlying probability space, as can occur due to geographical constraints on districtings."]]></description>
<dc:subject>to:NB markov_models hypothesis_testing anomaly_detection color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0f4e495420df/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:markov_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:anomaly_detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1909.03302">
    <title>[1909.03302] On the Optimality of Gaussian Kernel Based Nonparametric Tests against Smooth Alternatives</title>
    <dc:date>2019-09-15T14:35:28+00:00</dc:date>
    <link>https://arxiv.org/abs/1909.03302</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Nonparametric tests via kernel embedding of distributions have witnessed a great deal of practical successes in recent years. However, statistical properties of these tests are largely unknown beyond consistency against a fixed alternative. To fill in this void, we study here the asymptotic properties of goodness-of-fit, homogeneity and independence tests using Gaussian kernels, arguably the most popular and successful among such tests. Our results provide theoretical justifications for this common practice by showing that tests using Gaussian kernel with an appropriately chosen scaling parameter are minimax optimal against smooth alternatives in all three settings. In addition, our analysis also pinpoints the importance of choosing a diverging scaling parameter when using Gaussian kernels and suggests a data-driven choice of the scaling parameter that yields tests optimal, up to an iterated logarithmic factor, over a wide range of smooth alternatives. Numerical experiments are also presented to further demonstrate the practical merits of the methodology."]]></description>
<dc:subject>to:NB kernel_methods hilbert_space goodness-of-fit hypothesis_testing statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3f7d02a8db5b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hilbert_space"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1902.06441">
    <title>[1902.06441] Aggregated test of independence based on HSIC measures</title>
    <dc:date>2019-08-30T20:15:21+00:00</dc:date>
    <link>https://arxiv.org/abs/1902.06441</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Dependence measures based on reproducing kernel Hilbert spaces, also known as Hilbert-Schmidt Independence Criterion and denoted HSIC, are widely used to statistically decide whether or not two random vectors are dependent. Recently, non-parametric HSIC-based statistical tests of independence have been performed. However, these tests lead to the question of the choice of the kernels associated to the HSIC. In particular, there is as yet no method to objectively select specific kernels with theoretical guarantees in terms of first and second kind errors. One of the main contributions of this work is to develop a new HSIC-based aggregated procedure which avoids such a kernel choice, and to provide theoretical guarantees for this procedure. To achieve this, we first introduce non-asymptotic single tests based on Gaussian kernels with a given bandwidth, which are of prescribed level α∈(0,1). From a theoretical point of view, we upper-bound their uniform separation rate of testing over Sobolev and Nikol'skii balls. Then, we aggregate several single tests, and obtain similar upper-bounds for the uniform separation rate of the aggregated procedure over the same regularity spaces. Another main contribution is that we provide a lower-bound for the non-asymptotic minimax separation rate of testing over Sobolev balls, and deduce that the aggregated procedure is adaptive in the minimax sense over such regularity spaces. Finally, from a practical point of view, we perform numerical studies in order to assess the efficiency of our aggregated procedure and compare it to existing independence tests in the literature."]]></description>
<dc:subject>to:NB dependence_measures hypothesis_testing statistics hilbert_space minimax</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c57bbe81672d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hilbert_space"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:minimax"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://amstat.tandfonline.com/doi/full/10.1080/10618600.2019.1637749">
    <title>Testing Sparsity-Inducing Penalties: Journal of Computational and Graphical Statistics: Vol 0, No 0</title>
    <dc:date>2019-08-20T16:07:38+00:00</dc:date>
    <link>https://amstat.tandfonline.com/doi/full/10.1080/10618600.2019.1637749</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Many penalized maximum likelihood estimators correspond to posterior mode estimators under specific prior distributions. Appropriateness of a particular class of penalty functions can therefore be interpreted as the appropriateness of a prior for the parameters. For example, the appropriateness of a lasso penalty for regression coefficients depends on the extent to which the empirical distribution of the regression coefficients resembles a Laplace distribution. We give a testing procedure of whether or not a Laplace prior is appropriate and accordingly, whether or not using a lasso penalized estimate is appropriate. This testing procedure is designed to have power against exponential power priors which correspond to ℓqℓq penalties. Via simulations, we show that this testing procedure achieves the desired level and has enough power to detect violations of the Laplace assumption when the numbers of observations and unknown regression coefficients are large. We then introduce an adaptive procedure that chooses a more appropriate prior and corresponding penalty from the class of exponential power priors when the null hypothesis is rejected. We show that this can improve estimation of the regression coefficients both when they are drawn from an exponential power distribution and when they are drawn from a spike-and-slab distribution. Supplementary materials for this article are available online."

--- I feel like I fundamentally disagree with this approach.  Those priors are merely (to quote Jamie Robins and Larry Wasserman) "frequentist pursuit", and have no bearing on whether (say) the Lasso will give a good sparse, linear approximation to the underlying regression function (see https://normaldeviate.wordpress.com/2013/09/11/consistency-sparsistency-and-presistency/).  All of which said, Hoff is always worth listening to, so the last tag applies with special force.]]></description>
<dc:subject>to:NB model_checking sparsity regression hypothesis_testing bayesianism re:phil-of-bayes_paper hoff.peter to_besh</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:aaba8d8a838f/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_checking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sparsity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bayesianism"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:phil-of-bayes_paper"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hoff.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_besh"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1908.06486">
    <title>[1908.06486] A Consistent Independence Test for Multivariate Time-Series</title>
    <dc:date>2019-08-20T15:32:37+00:00</dc:date>
    <link>https://arxiv.org/abs/1908.06486</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A fundamental problem in statistical data analysis is testing whether two phenomena are related. When the phenomena in question are time series, many challenges emerge. The first is defining a dependence measure between time series at the population level, as well as a sample level test statistic. The second is computing or estimating the distribution of this test statistic under the null, as the permutation test procedure is invalid for most time series structures. This work aims to address these challenges by combining distance correlation and multiscale graph correlation (MGC) from independence testing literature and block permutation testing from time series analysis. Two hypothesis tests for testing the independence of time series are proposed. These procedures also characterize whether the dependence relationship between the series is linear or nonlinear, and the time lag at which this dependence is maximized. For strictly stationary auto-regressive moving average (ARMA) processes, the proposed independence tests are proven valid and consistent. Finally, neural connectivity in the brain is analyzed using fMRI data, revealing linear dependence of signals within the visual network and default mode network, and nonlinear relationships in other regions. This work opens up new theoretical and practical directions for many modern time series analysis problems."]]></description>
<dc:subject>to:NB dependence_measures time_series hypothesis_testing statistics to_teach:data_over_space_and_time</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f68eefa3e6d6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:data_over_space_and_time"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://academic.oup.com/biomet/article/106/3/547/5511208?rss=1">
    <title>Nonparametric independence testing via mutual information | Biometrika | Oxford Academic</title>
    <dc:date>2019-08-14T19:14:17+00:00</dc:date>
    <link>https://academic.oup.com/biomet/article/106/3/547/5511208?rss=1</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a test of independence of two multivariate random vectors, given a sample from the underlying population. Our approach is based on the estimation of mutual information, whose decomposition into joint and marginal entropies facilitates the use of recently developed efficient entropy estimators derived from nearest neighbour distances. The proposed critical values may be obtained by simulation in the case where an approximation to one marginal is available or by permuting the data otherwise. This facilitates size guarantees, and we provide local power analyses, uniformly over classes of densities whose mutual information satisfies a lower bound. Our ideas may be extended to provide new goodness-of-fit tests for normal linear models based on assessing the independence of our vector of covariates and an appropriately defined notion of an error vector. The theory is supported by numerical studies on both simulated and real data."]]></description>
<dc:subject>to:NB dependence_measures hypothesis_testing information_theory statistics entropy_estimation samworth.richard_j.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b5d546c7c070/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dependence_measures"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:entropy_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:samworth.richard_j."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1906.06615">
    <title>[1906.06615] Detecting new signals under background mismodelling</title>
    <dc:date>2019-08-09T13:37:39+00:00</dc:date>
    <link>https://arxiv.org/abs/1906.06615</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Searches for new astrophysical phenomena often involve several sources of non-random uncertainties which can lead to highly misleading results. Among these, model-uncertainty arising from background mismodelling can dramatically compromise the sensitivity of the experiment under study. Specifically, overestimating the background distribution in the signal region increases the chances of missing new physics. Conversely, underestimating the background outside the signal region leads to an artificially enhanced sensitivity and a higher likelihood of claiming false discoveries. The aim of this work is to provide a unified statistical strategy to perform modelling, estimation, inference, and signal characterization under background mismodelling. The method proposed allows to incorporate the (partial) scientific knowledge available on the background distribution and provides a data-updated version of it in a purely nonparametric fashion without requiring the specification of prior distributions. Applications in the context of dark matter searches and radio surveys show how the tools presented in this article can be used to incorporate non-stochastic uncertainty due to instrumental noise and to overcome violations of classical distributional assumptions in stacking experiments."]]></description>
<dc:subject>to:NB anomaly_detection hypothesis_testing physics particle_physics statistics misspecification</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e14915d272b7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:anomaly_detection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:particle_physics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:misspecification"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://global.oup.com/academic/product/non-standard-parametric-statistical-inference-9780198505044?cc=us&amp;lang=en#">
    <title>Non-Standard Parametric Statistical Inference - Russell Cheng - Oxford University Press</title>
    <dc:date>2019-08-05T18:37:11+00:00</dc:date>
    <link>https://global.oup.com/academic/product/non-standard-parametric-statistical-inference-9780198505044?cc=us&amp;lang=en#</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This book discusses the fitting of parametric statistical models to data samples. Emphasis is placed on: (i) how to recognize situations where the problem is non-standard when parameter estimates behave unusually, and (ii) the use of parametric bootstrap resampling methods in analyzing such problems.
"A frequentist likelihood-based viewpoint is adopted, for which there is a well-established and very practical theory. The standard situation is where certain widely applicable regularity conditions hold. However, there are many apparently innocuous situations where standard theory breaks down, sometimes spectacularly. Most of the departures from regularity are described geometrically, with only sufficient mathematical detail to clarify the non-standard nature of a problem and to allow formulation of practical solutions.
"The book is intended for anyone with a basic knowledge of statistical methods, as is typically covered in a university statistical inference course, wishing to understand or study how standard methodology might fail. Easy to understand statistical methods are presented which overcome these difficulties, and demonstrated by detailed examples drawn from real applications. Simple and practical model-building is an underlying theme.
"Parametric bootstrap resampling is used throughout for analyzing the properties of fitted models, illustrating its ease of implementation even in non-standard situations. Distributional properties are obtained numerically for estimators or statistics not previously considered in the literature because their theoretical distributional properties are too hard to obtain theoretically. Bootstrap results are presented mainly graphically in the book, providing an accessible demonstration of the sampling behaviour of estimators."]]></description>
<dc:subject>to:NB bootstrap estimation hypothesis_testing statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:3db28c9164d7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bootstrap"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1907.07582">
    <title>[1907.07582] Testing for Unobserved Heterogeneity via k-means Clustering</title>
    <dc:date>2019-07-18T10:55:01+00:00</dc:date>
    <link>https://arxiv.org/abs/1907.07582</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Clustering methods such as k-means have found widespread use in a variety of applications. This paper proposes a formal testing procedure to determine whether a null hypothesis of a single cluster, indicating homogeneity of the data, can be rejected in favor of multiple clusters. The test is simple to implement, valid under relatively mild conditions (including non-normality, and heterogeneity of the data in aspects beyond those in the clustering analysis), and applicable in a range of contexts (including clustering when the time series dimension is small, or clustering on parameters other than the mean). We verify that the test has good size control in finite samples, and we illustrate the test in applications to clustering vehicle manufacturers and U.S. mutual funds."]]></description>
<dc:subject>hypothesis_testing model_selection model_checking clustering statistics in_NB time_series have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1cda8c06d404/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hypothesis_testing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_selection"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:model_checking"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>