<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (cshalizi)</title>
    <link>https://pinboard.in/u:cshalizi/public/</link>
    <description>recent bookmarks from cshalizi</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://spiral.imperial.ac.uk/entities/publication/5e4c3fee-816e-474a-a452-b7c74376aa69"/>
	<rdf:li rdf:resource="https://www.sciencedirect.com/science/article/pii/S0378873325000668?via%3Dihub"/>
	<rdf:li rdf:resource="https://www.nature.com/articles/s41593-025-02196-7"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2510.06136"/>
	<rdf:li rdf:resource="https://onlinelibrary.wiley.com/doi/abs/10.1111/jtsa.70023?campaign=wolearlyview"/>
	<rdf:li rdf:resource="https://osf.io/xra56"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2506.22946"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2411.10908"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/content/journals/10.1146/annurev-biodatasci-080917-013444"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2310.20609"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2503.09299"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2503.03047"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2503.16959"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2110.15886"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2211.13000"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2404.11464"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2402.07999"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2402.15489"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2401.01404v2"/>
	<rdf:li rdf:resource="https://projecteuclid.org/journals/annals-of-statistics/volume-51/issue-6/Projective-sparse-and-learnable-latent-position-network-models/10.1214/23-AOS2340.short"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2203.14223"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2309.03969"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2306.07252"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.05081"/>
	<rdf:li rdf:resource="https://www.tandfonline.com/doi/full/10.1080/01621459.2023.2213466?casa_token=5YCQmnR5dvgAAAAA%3ALmyxVAGqVmvF-xgzm8Xk3b0CoQY3O9T4_YAJ-EL3djAUMPgoV855ee62aIIWj5oINszLh5cqdEA"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2305.06353"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2305.10380"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2305.14814"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2305.12470"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2305.00156"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2304.03331"/>
	<rdf:li rdf:resource="https://www.jstor.org/stable/44965462"/>
	<rdf:li rdf:resource="https://academic.oup.com/jid/article/191/Supplement_1/S42/934023"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2303.16598"/>
	<rdf:li rdf:resource="https://www.tandfonline.com/doi/abs/10.1080/01621459.2023.2183133"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2302.10095"/>
	<rdf:li rdf:resource="https://www.cambridge.org/core/journals/network-science/article/abs/network-community-detection-method-with-integration-of-data-from-multiple-layers-and-node-attributes/4381E20BCA49A082C27A6F607F3D561F"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2303.04871"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2303.07023"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2303.05909"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2301.03630"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2212.12839"/>
	<rdf:li rdf:resource="https://www.nature.com/articles/s41567-022-01866-8"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2210.07491"/>
	<rdf:li rdf:resource="https://www.annualreviews.org/doi/abs/10.1146/annurev-soc-030320-102100"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.05614"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2008.05337"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2206.01553"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2203.07230"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2203.06948"/>
	<rdf:li rdf:resource="https://www.jmlr.org/papers/v23/19-1056.html"/>
	<rdf:li rdf:resource="https://projecteuclid.org/journals/bernoulli/volume-28/issue-2/Asymptotically-efficient-estimators-for-stochastic-blockmodels--The-naive-MLE/10.3150/21-BEJ1376.short"/>
	<rdf:li rdf:resource="https://journals.sagepub.com/doi/full/10.1177/2053951720949577"/>
	<rdf:li rdf:resource="https://osf.io/preprints/socarxiv/4mp6x/"/>
	<rdf:li rdf:resource="https://doi.org/10.1017/9781108774116"/>
	<rdf:li rdf:resource="https://doi.org/10.1017/9781108865791"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2112.00183"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2107.00248"/>
	<rdf:li rdf:resource="https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.428"/>
	<rdf:li rdf:resource="https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.426"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2003.04235"/>
	<rdf:li rdf:resource="https://dl.acm.org/doi/abs/10.1145/3447548.3470821"/>
	<rdf:li rdf:resource="https://dl.acm.org/doi/abs/10.1145/3447548.3470795"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2107.07489"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2108.01149"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2007.02156"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2107.14677"/>
	<rdf:li rdf:resource="https://sociologicalscience.com/articles-v8-14-285/"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/2107.04224"/>
	<rdf:li rdf:resource="https://icml.cc/Conferences/2021/ScheduleMultitrack?event=9083"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://spiral.imperial.ac.uk/entities/publication/5e4c3fee-816e-474a-a452-b7c74376aa69">
    <title>Sparse and partially observed large-scale networks: analytic statistics, behaviour, and structural inference</title>
    <dc:date>2026-02-17T16:02:02+00:00</dc:date>
    <link>https://spiral.imperial.ac.uk/entities/publication/5e4c3fee-816e-474a-a452-b7c74376aa69</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Most real-world systems are networked, composed of nodes connected to each other via edges, whose overall behaviour can depend on the underlying connectivity structure in non-trivial ways. They are also of a large scale, consisting of numerous nodes and edges not all of which can be viably observed, making the study of key structural properties computationally intractable. This thesis responds to the challenge by leveraging sparsity of edges to determine (1) structural statistics analytically, and (2) the relevance of structure in describing behaviour, based on (3) an inferred statistical model of the structure of sparse and partially observed large-scale networks. First, we determine higher-order structural statistics of networks: in particular, the length of a shortest path connecting any two nodes in the network. Assuming a statistical model for the network structure, we establish an analytic distribution of shortest path lengths, whose approximate closed-form has a natural interpretation of traversing independent walks. This formalism yields new results for both network- and node-level properties like percolation phenomena and distance-based centralities of closeness and betweenness, for a large family of statistical network models. Then, we contextualise behaviour in a network to health behaviours in a social network: in particular, we consider how misinformation influences vaccination outcomes. Through a randomised experiment we find that exposure to misinformation about COVID-19 vaccines negatively impacts individuals' intentions to vaccinate against the disease. To address one possible psycho-social mechanism, we use a psychometric test to show that the ability to detect misinformation positively predicts regional vaccine uptake. Assuming a statistical model for the network structure, we observe that the ability is mediated by the social network. Finally, we infer a statistical model for social networks from a source of partial observations. We use publicly available data from spatially aggregated friendship counts to infer a Bayesian socio-physical connectivity model. The approach is grounded in a microscopic process of friendship formation describing how people in a society connect to each other jointly in social and physical space, that explains the empirically observed decay of connection probabilities in physical space. It provides a multidimensional social inequality measure quantifying the surprisal of social connections that we term the "social Gini" index."]]></description>
<dc:subject>to:NB network_data_analysis social_networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8be9d1ba3093/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.sciencedirect.com/science/article/pii/S0378873325000668?via%3Dihub">
    <title>Use of aggregated relational data in agent-based modeling - ScienceDirect</title>
    <dc:date>2026-02-17T16:00:59+00:00</dc:date>
    <link>https://www.sciencedirect.com/science/article/pii/S0378873325000668?via%3Dihub</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Aggregated relational data (ARD) provides valuable information for inferring structural features of personal social networks at scale. Following recent ARD studies, we suggest a formal parameter for agent-based modeling (ABM) that helps reflect multiple structural features of extended social networks (e.g., size; variation; distribution) and apply it to a widely known classic ABM—Axelrod’s cultural dynamic model. Results show that when incorporating realistic network features estimated from ARD, the model generates outcomes substantially different from its original results. Our study highlights ARD's potential to enrich ABM in reflecting more realistic networks that better connect micro-processes with macro-phenomena."]]></description>
<dc:subject>to:NB agent-based_models networks axelrod_model network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f3b72b309c49/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:agent-based_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:axelrod_model"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nature.com/articles/s41593-025-02196-7">
    <title>Investigating the methodological foundation of lesion network mapping | Nature Neuroscience</title>
    <dc:date>2026-02-07T20:26:58+00:00</dc:date>
    <link>https://www.nature.com/articles/s41593-025-02196-7</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Lesion network mapping (LNM) is a neuroimaging framework that uses normative functional connectivity (FC) data to link heterogeneous brain lesions and functional alterations to brain networks implicated in neurological and psychiatric conditions. However, many of the networks identified by LNM and related methods appear to be highly similar across diverse conditions such as addiction, depression, psychosis and epilepsy. To understand this similarity, we re-examined the data from multiple LNM studies and assessed the methodological roots of the method. Our findings reveal a foundational limitation: at its core, LNM involves a repetitive sampling of one and the same FC matrix. As a result, it systematically maps sets of local brain changes—whether they are patient lesions, magnetic resonance imaging-derived alterations, synthetic or random—onto the same nonspecific properties of the used FC data, producing highly similar networks across conditions. This central limitation cautions the use of LNM as a method for studying distinct biological networks underlying brain disorders. Our work may aid the development of a new generation of network-mapping methods from first principles."

--- Utterly devastating.]]></description>
<dc:subject>to:NB neuroscience network_data_analysis evisceration functional_connectivity via:? have_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2dc87b1031bb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neuroscience"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:evisceration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:functional_connectivity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2510.06136">
    <title>[2510.06136] Geometric Model Selection for Latent Space Network Models: Hypothesis Testing via Multidimensional Scaling and Resampling Techniques</title>
    <dc:date>2026-01-30T12:09:41+00:00</dc:date>
    <link>https://arxiv.org/abs/2510.06136</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Latent space models assume that network ties are more likely between nodes that are closer together in an underlying latent space. Euclidean space is a popular choice for the underlying geometry, but hyperbolic geometry can mimic more realistic patterns of ties in complex networks. To identify the underlying geometry, past research has applied non-Euclidean extensions of multidimensional scaling (MDS) to the observed geodesic distances: the shortest path lengths between nodes. The difference in stress, a standard goodness-of-fit metric for MDS, across the geometries is then used to select a latent geometry with superior model fit (lower stress). The effectiveness of this method is assessed through simulations of latent space networks in Euclidean and hyperbolic geometries. To better account for uncertainty, we extend permutation-based hypothesis tests for MDS to the latent network setting. However, these tests do not incorporate any network structure. We propose a parametric bootstrap distribution of networks, conditioned on observed geodesic distances and the Gaussian Latent Position Model (GLPM). Our method extends the Davidson-MacKinnon J-test to latent space network models with differing latent geometries. We pay particular attention to large and sparse networks, and both the permutation test and the bootstrapping methods show an improvement in detecting the underlying geometry."]]></description>
<dc:subject>to:NB network_data_analysis multidimensional_scaling re:hyperbolic_networks inference_to_latent_objects</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9dfccc0379ec/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:multidimensional_scaling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:hyperbolic_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://onlinelibrary.wiley.com/doi/abs/10.1111/jtsa.70023?campaign=wolearlyview">
    <title>Online Network Change Point Detection With Missing Values and Temporal Dependence - Xu - Journal of Time Series Analysis - Wiley Online Library</title>
    <dc:date>2025-10-25T20:04:01+00:00</dc:date>
    <link>https://onlinelibrary.wiley.com/doi/abs/10.1111/jtsa.70023?campaign=wolearlyview</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper, we study online change point detection in dynamic networks with time-heterogeneous missing patterns within networks and dependence across both nodes and time. The missingness probabilities, the entrywise sparsity of networks, the rank of networks and the jump size in terms of the Frobenius norm are all allowed to vary as functions of the pre-change sample size. On top of a thorough handling of all the model parameters, we notably allow the edges and missingness to be temporally dependent. To the best of our knowledge, such a general framework has not been rigorously or systematically studied before in the literature. We propose a polynomial-time change point detection algorithm, with a version of the soft-impute algorithm as the imputation sub-routine. By piecing up these established sub-routines, our proposed algorithm achieves sharp detection delay while controlling the overall Type-I error. Extensive numerical experiments support our theoretical findings and demonstrate the effectiveness of our proposed method in practice."]]></description>
<dc:subject>to:NB time_series network_data_analysis change-point_problem re:network_differences</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6d46bde9739c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:change-point_problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://osf.io/xra56">
    <title>Community Competition and Political Extremism</title>
    <dc:date>2025-09-05T16:02:43+00:00</dc:date>
    <link>https://osf.io/xra56</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Online extremist movements, although appearing monolithic from the outside, are composed of competing ideologies and strategies. Commitment to and promotion of extremist violence varies widely between the communities that make up the wider movement. What explains this variation? I argue that community-level extremism is driven by competition between online communities for attention and engagement on social media platforms. To support this argument, I construct a theoretical framework for understanding social media platforms as sites of political contestation and distribution of public goods. I gather two novel datasets, using an overlapping snowball chain sampling algorithm and transformer-based classifier to capture community competition and extremist content. I show that inter-movement competition between communities drives the share of extremism expressed in communities, as well as the level of out-group-focused extremism."]]></description>
<dc:subject>to:NB networked_life sociology social_life_of_the_mind network_data_analysis re:actually-dr-internet-is-the-name-of-the-monsters-creator</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:605b6b96aca1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:networked_life"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_life_of_the_mind"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:actually-dr-internet-is-the-name-of-the-monsters-creator"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2506.22946">
    <title>[2506.22946] Modular versus Hierarchical: A Structural Signature of Topic Popularity in Mathematical Research</title>
    <dc:date>2025-08-05T12:59:14+00:00</dc:date>
    <link>https://arxiv.org/abs/2506.22946</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Mathematical researchers, especially those in early-career positions, face critical decisions about topic specialization with limited information about the collaborative environments of different research areas. The aim of this paper is to study how the popularity of a research topic is associated with the structure of that topic's collaboration network, as observed by a suite of measures capturing organizational structure at several scales. We apply these measures to 1,938 algorithmically discovered topics across 121,391 papers sourced from arXiv metadata during the period 2020--2025. Our analysis, which controls for the confounding effects of network size, reveals a structural dichotomy--we find that popular topics organize into modular "schools of thought," while niche topics maintain hierarchical core-periphery structures centered around established experts. This divide is not an artifact of scale, but represents a size-independent structural pattern correlated with popularity. We also document a "constraint reversal": after controlling for size, researchers in popular fields face greater structural constraints on collaboration opportunities, contrary to conventional expectations. Our findings suggest that topic selection is an implicit choice between two fundamentally different collaborative environments, each with distinct implications for a researcher's career. To make these structural patterns transparent to the research community, we developed the Math Research Compass (this https URL), an interactive platform providing data on topic popularity and collaboration patterns across mathematical topics."]]></description>
<dc:subject>to:NB sociology_of_science bibliometry network_data_analysis mathematics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:631e8471e78a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sociology_of_science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bibliometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mathematics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2411.10908">
    <title>[2411.10908] The Conflict Graph Design: Estimating Causal Effects under Arbitrary Neighborhood Interference</title>
    <dc:date>2025-04-28T02:05:56+00:00</dc:date>
    <link>https://arxiv.org/abs/2411.10908</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["A fundamental problem in network experiments is selecting an appropriate experimental design in order to precisely estimate a given causal effect of interest. In this work, we propose the Conflict Graph Design, a general approach for constructing experiment designs under network interference with the goal of precisely estimating a pre-specified causal effect. A central aspect of our approach is the notion of a conflict graph, which captures the fundamental unobservability associated with the causal effect and the underlying network. In order to estimate effects, we propose a modified Horvitz--Thompson estimator. We show that its variance under the Conflict Graph Design is bounded as O(λ(H)/n), where λ(H) is the largest eigenvalue of the adjacency matrix of the conflict graph. These rates depend on both the underlying network and the particular causal effect under investigation. Not only does this yield the best known rates of estimation for several well-studied causal effects (e.g. the global and direct effects) but it also provides new methods for effects which have received less attention from the perspective of experiment design (e.g. spill-over effects). Finally, we construct conservative variance estimators which facilitate asymptotically valid confidence intervals for the causal effect of interest."]]></description>
<dc:subject>to:NB to_read experimental_design causal_inference network_data_analysis re:do_not_adjust_your_receiver</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d8cfab94679b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experimental_design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:do_not_adjust_your_receiver"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/content/journals/10.1146/annurev-biodatasci-080917-013444">
    <title>Network Analysis as a Grand Unifier in Biomedical Data Science | Annual Reviews</title>
    <dc:date>2025-04-09T14:54:58+00:00</dc:date>
    <link>https://www.annualreviews.org/content/journals/10.1146/annurev-biodatasci-080917-013444</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Biomedical data scientists study many types of networks, ranging from those formed by neurons to those created by molecular interactions. People often criticize these networks as uninterpretable diagrams termed hairballs; however, here we show that molecular biological networks can be interpreted in several straightforward ways. First, we can break down a network into smaller components, focusing on individual pathways and modules. Second, we can compute global statistics describing the network as a whole. Third, we can compare networks. These comparisons can be within the same context (e.g., between two gene regulatory networks) or cross-disciplinary (e.g., between regulatory networks and governmental hierarchies). The latter comparisons can transfer a formalism, such as that for Markov chains, from one context to another or relate our intuitions in a familiar setting (e.g., social networks) to the relatively unfamiliar molecular context. Finally, key aspects of molecular networks are dynamics and evolution, i.e., how they evolve over time and how genetic variants affect them. By studying the relationships between variants in networks, we can begin to interpret many common diseases, such as cancer and heart disease."]]></description>
<dc:subject>to:NB network_data_analysis networks biochemical_networks via:aks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:30e36c1f0a02/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:biochemical_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:aks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2310.20609">
    <title>[2310.20609] Graph Matching via convex relaxation to the simplex</title>
    <dc:date>2025-04-09T14:54:27+00:00</dc:date>
    <link>https://arxiv.org/abs/2310.20609</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper addresses the Graph Matching problem, which consists of finding the best possible alignment between two input graphs, and has many applications in computer vision, network deanonymization and protein alignment. A common approach to tackle this problem is through convex relaxations of the NP-hard \emph{Quadratic Assignment Problem} (QAP).
"Here, we introduce a new convex relaxation onto the unit simplex and develop an efficient mirror descent scheme with closed-form iterations for solving this problem. Under the correlated Gaussian Wigner model, we show that the simplex relaxation admits a unique solution with high probability. In the noiseless case, this is shown to imply exact recovery of the ground truth permutation. Additionally, we establish a novel sufficiency condition for the input matrix in standard greedy rounding methods, which is less restrictive than the commonly used `diagonal dominance' condition. We use this condition to show exact one-step recovery of the ground truth (holding almost surely) via the mirror descent scheme, in the noiseless setting. We also use this condition to obtain significantly improved conditions for the GRAMPA algorithm [Fan et al. 2019] in the noiseless setting."]]></description>
<dc:subject>to:NB network_data_analysis re:network_differences optimization</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5332420e2d77/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:optimization"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2503.09299">
    <title>[2503.09299] Low-Rank Graphon Estimation: Theory and Applications to Graphon Games</title>
    <dc:date>2025-04-09T14:17:10+00:00</dc:date>
    <link>https://arxiv.org/abs/2503.09299</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper tackles the challenge of estimating a low-rank graphon from sampled network data, employing a singular value thresholding (SVT) estimator to create a piecewise-constant graphon based on the network's adjacency matrix. Under certain assumptions about the graphon's structural properties, we establish bounds on the operator norm distance between the true graphon and its estimator, as well as on the rank of the estimated graphon. In the second part of the paper, we apply our estimator to graphon games. We derive bounds on the suboptimality of interventions in the social welfare problem in graphon games when the intervention is based on the estimated graphon. These bounds are expressed in terms of the operator norm of the difference between the true and estimated graphons. We also emphasize the computational benefits of using the low-rank estimated graphon to solve these problems."]]></description>
<dc:subject>to:NB network_data_analysis re:smoothing_adjacency_matrices low-rank_approximation</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4a5fdd1f720b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:smoothing_adjacency_matrices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:low-rank_approximation"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2503.03047">
    <title>[2503.03047] Stochastic block models with many communities and the Kesten--Stigum bound</title>
    <dc:date>2025-04-09T14:14:39+00:00</dc:date>
    <link>https://arxiv.org/abs/2503.03047</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study the inference of communities in stochastic block models with a growing number of communities. For block models with n vertices and a fixed number of communities q, it was predicted in Decelle et al. (2011) that there are computationally efficient algorithms for recovering the communities above the Kesten--Stigum (KS) bound and that efficient recovery is impossible below the KS bound. This conjecture has since stimulated a lot of interest, with the achievability side proven in a line of research that culminated in the work of Abbe and Sandon (2018). Conversely, recent work by Sohn and Wein (2025) provides evidence for the hardness part using the low-degree paradigm.
"In this paper we investigate community recovery in the regime q=qn→∞ as n→∞ where no such predictions exist. We show that efficient inference of communities remains possible above the KS bound. Furthermore, we show that recovery of block models is low-degree hard below the KS bound when the number of communities satisfies q≪n‾√. Perhaps surprisingly, we find that when q≫n‾√, there is an efficient algorithm based on non-backtracking walks for recovery even below the KS bound. We identify a new threshold and ask if it is the threshold for efficient recovery in this regime. Finally, we show that detection is easy and identify (up to a constant) the information-theoretic threshold for community recovery as the number of communities q diverges.
"Our low-degree hardness results also naturally have consequences for graphon estimation, improving results of Luo and Gao (2024)."]]></description>
<dc:subject>to:NB community_discovery network_data_analysis graphons re:smoothing_adjacency_matrices</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2fa03337b383/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphons"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:smoothing_adjacency_matrices"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2503.16959">
    <title>[2503.16959] Higher-order dissimilarity measures for hypergraph comparison</title>
    <dc:date>2025-03-31T23:37:14+00:00</dc:date>
    <link>https://arxiv.org/abs/2503.16959</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In recent years, networks with higher-order interactions have emerged as a powerful tool to model complex systems. Comparing these higher-order systems remains however a challenge. Traditional similarity measures designed for pairwise networks fail indeed to capture salient features of hypergraphs, hence potentially neglecting important information. To address this issue, here we introduce two novel measures, Hyper NetSimile and Hyperedge Portrait Divergence, specifically designed for comparing hypergraphs. These measures take explicitly into account the properties of multi-node interactions, using complementary approaches. They are defined for any arbitrary pair of hypergraphs, of potentially different sizes, thus being widely applicable. We illustrate the effectiveness of these metrics through clustering experiments on synthetic and empirical higher-order networks, showing their ability to correctly group hypergraphs generated by different models and to distinguish real-world systems coming from different contexts. Our results highlight the advantages of using higher-order dissimilarity measures over traditional pairwise representations in capturing the full structural complexity of the systems considered."]]></description>
<dc:subject>to:NB network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5249779be399/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2110.15886">
    <title>[2110.15886] A probabilistic view of latent space graphs and phase transitions</title>
    <dc:date>2025-01-13T05:10:37+00:00</dc:date>
    <link>https://arxiv.org/abs/2110.15886</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study random graphs with latent geometric structure, where the probability of each edge depends on the underlying random positions corresponding to the two endpoints. We focus on the setting where this conditional probability is a general monotone increasing function of the inner product of two vectors; such a function can naturally be viewed as the cumulative distribution function of some independent random variable. We consider a one-parameter family of random graphs, characterized by the variance of this random variable, that smoothly interpolates between a random dot product graph and an Erdős--Rényi random graph. We prove phase transitions of detecting geometry in these graphs, in terms of the dimension of the underlying geometric space and the variance parameter of the conditional probability. When the dimension is high or the variance is large, the graph is similar to an Erdős--Rényi graph with the same edge density that does not possess geometry; in other parameter regimes, there is a computationally efficient signed triangle statistic that distinguishes them. The proofs make use of information-theoretic inequalities and concentration of measure phenomena."]]></description>
<dc:subject>to:NB network_data_analysis sds_icsd_search phase_transitions</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:08090b3d5fd9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sds_icsd_search"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:phase_transitions"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2211.13000">
    <title>[2211.13000] A Network Classification Method based on Density Time Evolution Patterns Extracted from Network Automata</title>
    <dc:date>2024-07-23T14:56:27+00:00</dc:date>
    <link>https://arxiv.org/abs/2211.13000</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Network modeling has proven to be an efficient tool for many interdisciplinary areas, including social, biological, transport, and many other real world complex systems. In addition, cellular automata (CA) are a formalism that has been studied in the last decades as a model for exploring patterns in the dynamic spatio-temporal behavior of these systems based on local rules. Some studies explore the use of cellular automata to analyze the dynamic behavior of networks, denominating them as network automata (NA). Recently, NA proved to be efficient for network classification, since it uses a time-evolution pattern (TEP) for the feature extraction. However, the TEPs explored by previous studies are composed of binary values, which does not represent detailed information on the network analyzed. Therefore, in this paper, we propose alternate sources of information to use as descriptor for the classification task, which we denominate as density time-evolution pattern (D-TEP) and state density time-evolution pattern (SD-TEP). We explore the density of alive neighbors of each node, which is a continuous value, and compute feature vectors based on histograms of the TEPs. Our results show a significant improvement compared to previous studies at five synthetic network databases and also seven real world databases. Our proposed method demonstrates not only a good approach for pattern recognition in networks, but also shows great potential for other kinds of data, such as images."]]></description>
<dc:subject>to:NB to_read network_data_analysis cellular_automata classifiers via:vaguery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c05ac37b45b9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cellular_automata"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:vaguery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2404.11464">
    <title>[2404.11464] Rates of convergence and normal approximations for estimators of local dependence random graph models</title>
    <dc:date>2024-04-24T14:12:23+00:00</dc:date>
    <link>https://arxiv.org/abs/2404.11464</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Local dependence random graph models are a class of block models for network data which allow for dependence among edges under a local dependence assumption defined around the block structure of the network. Since being introduced by Schweinberger and Handcock (2015), research in the statistical network analysis and network science literatures have demonstrated the potential and utility of this class of models. In this work, we provide the first statistical disclaimers which provide conditions under which estimation and inference procedures can be expected to provide accurate and valid inferences. This is accomplished by deriving convergence rates of inference procedures for local dependence random graph models based on a single observation of the graph, allowing both the number of model parameters and the sizes of blocks to tend to infinity. First, we derive the first non-asymptotic bounds on the ℓ2-error of maximum likelihood estimators, along with convergence rates. Second, and more importantly, we derive the first non-asymptotic bounds on the error of the multivariate normal approximation. In so doing, we introduce the first principled approach to providing statistical disclaimers through quantifying the uncertainty about statistical conclusions based on data."

--- I kind of like the phrase "statistical disclaimer", but I'm pretty sure it's just good old fashioned probably-approximately-correct, a.k.a. consistency.]]></description>
<dc:subject>statistics network_data_analysis exponential_family_random_graphs stochastic_block_models in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:42f4a77dbcd5/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:exponential_family_random_graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2402.07999">
    <title>[2402.07999] NetInfoF Framework: Measuring and Exploiting Network Usable Information</title>
    <dc:date>2024-03-12T01:33:46+00:00</dc:date>
    <link>https://arxiv.org/abs/2402.07999</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Given a node-attributed graph, and a graph task (link prediction or node classification), can we tell if a graph neural network (GNN) will perform well? More specifically, do the graph structure and the node features carry enough usable information for the task? Our goals are (1) to develop a fast tool to measure how much information is in the graph structure and in the node features, and (2) to exploit the information to solve the task, if there is enough. We propose NetInfoF, a framework including NetInfoF_Probe and NetInfoF_Act, for the measurement and the exploitation of network usable information (NUI), respectively. Given a graph data, NetInfoF_Probe measures NUI without any model training, and NetInfoF_Act solves link prediction and node classification, while two modules share the same backbone. In summary, NetInfoF has following notable advantages: (a) General, handling both link prediction and node classification; (b) Principled, with theoretical guarantee and closed-form solution; (c) Effective, thanks to the proposed adjustment to node similarity; (d) Scalable, scaling linearly with the input size. In our carefully designed synthetic datasets, NetInfoF correctly identifies the ground truth of NUI and is the only method being robust to all graph scenarios. Applied on real-world datasets, NetInfoF wins in 11 out of 12 times on link prediction compared to general GNN baselines."]]></description>
<dc:subject>to:NB network_data_analysis classifiers entropy_estimation information_theory faloutsos.christos</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:d6e3be34531e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:classifiers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:entropy_estimation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:faloutsos.christos"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2402.15489">
    <title>[2402.15489] On inference for modularity statistics in structured networks</title>
    <dc:date>2024-03-05T16:40:57+00:00</dc:date>
    <link>https://arxiv.org/abs/2402.15489</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper revisits the classical concept of network modularity and its spectral relaxations used throughout graph data analysis. We formulate and study several modularity statistic variants for which we establish asymptotic distributional results in the large-network limit for networks exhibiting nodal community structure. Our work facilitates testing for network differences and can be used in conjunction with existing theoretical guarantees for stochastic blockmodel random graphs. Our results are enabled by recent advances in the study of low-rank truncations of large network adjacency matrices. We provide confirmatory simulation studies and real data analysis pertaining to the network neuroscience study of psychosis, specifically schizophrenia. Collectively, this paper contributes to the limited existing literature to date on statistical inference for modularity-based network analysis. Supplemental materials for this article are available online."]]></description>
<dc:subject>community_discovery network_data_analysis stochastic_block_models schizophrenia in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:0f780eabaf26/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:schizophrenia"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2401.01404v2">
    <title>[2401.01404v2] Scalable network reconstruction in subquadratic time</title>
    <dc:date>2024-01-11T01:12:15+00:00</dc:date>
    <link>https://arxiv.org/abs/2401.01404v2</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Network reconstruction consists in determining the unobserved pairwise couplings between N nodes given only observational data on the resulting behavior that is conditioned on those couplings -- typically a time-series or independent samples from a graphical model. A major obstacle to the scalability of algorithms proposed for this problem is a seemingly unavoidable quadratic complexity of O(N2), corresponding to the requirement of each possible pairwise coupling being contemplated at least once, despite the fact that most networks of interest are sparse, with a number of non-zero couplings that is only O(N). Here we present a general algorithm applicable to a broad range of reconstruction problems that achieves its result in subquadratic time, with a data-dependent complexity loosely upper bounded by O(N3/2logN), but with a more typical log-linear complexity of O(Nlog2N). Our algorithm relies on a stochastic second neighbor search that produces the best edge candidates with high probability, thus bypassing an exhaustive quadratic search. In practice, our algorithm achieves a performance that is many orders of magnitude faster than the quadratic baseline, allows for easy parallelization, and thus enables the reconstruction of networks with hundreds of thousands and even millions of nodes and edges."]]></description>
<dc:subject>to:NB network_data_analysis inference_to_latent_objects piexoto.tiago_p. computational_statistics via:rvenkat</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8414c5373c7d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:piexoto.tiago_p."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:computational_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:via:rvenkat"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/journals/annals-of-statistics/volume-51/issue-6/Projective-sparse-and-learnable-latent-position-network-models/10.1214/23-AOS2340.short">
    <title>Projective, sparse and learnable latent position network models</title>
    <dc:date>2023-12-22T21:02:52+00:00</dc:date>
    <link>https://projecteuclid.org/journals/annals-of-statistics/volume-51/issue-6/Projective-sparse-and-learnable-latent-position-network-models/10.1214/23-AOS2340.short</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["When modeling network data using a latent position model, it is typical to assume that the nodes’ positions are independently and identically distributed. However, this assumption implies the average node degree grows linearly with the number of nodes, which is inappropriate when the graph is thought to be sparse. We propose an alternative assumption—that the latent positions are generated according to a Poisson point process—and show that it is compatible with various levels of sparsity. Unlike other notions of sparse latent position models in the literature, our framework also defines a projective sequence of probability models, thus ensuring consistency of statistical inference across networks of different sizes. We establish conditions for consistent estimation of the latent positions, and compare our results to existing frameworks for modeling sparse networks."

--- Ungated: http://arxiv.org/abs/1709.09702
--- Comments: (2023-12) - (2017-09) = referee #2, man, referee #2]]></description>
<dc:subject>in_NB self-promotion network_data_analysis spencer.neil to:blog re:geographons_project graphons</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8e9df0f8cb85/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:self-promotion"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spencer.neil"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:blog"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:geographons_project"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphons"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2203.14223">
    <title>[2203.14223] Identifying Peer Influence in Therapeutic Communities</title>
    <dc:date>2023-11-16T03:04:55+00:00</dc:date>
    <link>https://arxiv.org/abs/2203.14223</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We investigate if there is a peer influence or role model effect on successful graduation from Therapeutic Communities (TCs). We analyze anonymized individual-level observational data from 3 TCs that kept records of written exchanges of affirmations and corrections among residents, and their precise entry and exit dates. The affirmations allow us to form peer networks, and the entry and exit dates allow us to define a causal effect of interest. We conceptualize the causal role model effect as measuring the difference in the expected outcome of a resident (ego) who can observe one of their social contacts (e.g., peers who gave affirmations), to be successful in graduating before the ego's exit vs not successfully graduating before the ego's exit. Since peer influence is usually confounded with unobserved homophily in observational data, we model the network with a latent variable model to estimate homophily and include it in the outcome equation. We provide a theoretical guarantee that the bias of our peer influence estimator decreases with sample size. Our results indicate there is an effect of peers' graduation on the graduation of residents. The magnitude of peer influence differs based on gender, race, and the definition of the role model effect. A counterfactual exercise quantifies the potential benefits of intervention of assigning a buddy to "at-risk" individuals directly on the treated resident and indirectly on their peers through network propagation."

--- OK, maybe we should have written out the more general "assume you can estimate latent locations in an arbitrary graphon at such-and-such a rate" theorem...]]></description>
<dc:subject>to:NB to_read have_skimmed network_data_analysis homophily social_influence re:community_control</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b0381fb50e84/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:homophily"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_influence"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:community_control"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2309.03969">
    <title>[2309.03969] Estimating the prevalance of peer effects and other spillovers</title>
    <dc:date>2023-11-02T01:17:18+00:00</dc:date>
    <link>https://arxiv.org/abs/2309.03969</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In settings where interference between units is possible, we define the prevalance of indirect effects to be the number of units who are affected by the treatment of others. This quantity does not fully identify an indirect effect, but may be used to show whether such effects are widely prevalent. Given a randomized experiment with binary-valued outcomes, methods are presented for conservative point estimation and one-sided interval estimation. No assumptions beyond randomization of treatment are required, allowing for usage in settings where models or assumptions on interference might be questionable. To show asymptotic coverage of our intervals in settings not covered by existing results, we provide a central limit theorem that combines local dependence and sampling without replacement. Consistency and minimax properties of the point estimator are shown as well. The approach is demonstrated on an experiment in which students were treated for a highly transmissible parasitic infection, for which we find that a significant fraction of students were affected by the treatment of schools other than their own."]]></description>
<dc:subject>to:NB causal_inference experiments_on_networks network_data_analysis kith_and_kin choi.david_s.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4f870f4f26ad/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experiments_on_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:choi.david_s."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2306.07252">
    <title>[2306.07252] On the Validity of Conformal Prediction for Network Data Under Non-Uniform Sampling</title>
    <dc:date>2023-06-28T01:22:37+00:00</dc:date>
    <link>https://arxiv.org/abs/2306.07252</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study the properties of conformal prediction for network data under various sampling mechanisms that commonly arise in practice but often result in a non-representative sample of nodes. We interpret these sampling mechanisms as selection rules applied to a superpopulation and study the validity of conformal prediction conditional on an appropriate selection event. We show that the sampled subarray is exchangeable conditional on the selection event if the selection rule satisfies a permutation invariance property and a joint exchangeability condition holds for the superpopulation. Our result implies the finite-sample validity of conformal prediction for certain selection events related to ego networks and snowball sampling. We also show that when data are sampled via a random walk on a graph, a variant of weighted conformal prediction yields asymptotically valid prediction sets for an independently selected node from the population."]]></description>
<dc:subject>conformal_prediction network_data_analysis kith_and_kin lunde.robert in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:7666bfb36eb0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:conformal_prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lunde.robert"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.05081">
    <title>[2206.05081] The Evolution Of Centralisation on Cryptocurrency Platforms</title>
    <dc:date>2023-06-24T20:37:58+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.05081</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["More than ten years ago the blockchain was acclaimed as the solution to overcome centralised trusted third parties for online payments. Through the years the crypto-movement changed and evolved, although decentralisation remained the core ideology and the necessary feature every new crypto-project should provide. In this paper we study the concept of centralisation in cryptocurrencies using a wide array of methodologies from the complex systems literature, on a comparative collection of blockchains, in order to define the many different levels a blockchain system may display (de-)centralisation and to question whether the present state of cryptocurrencies is, in a technological and economical sense, actually decentralised."]]></description>
<dc:subject>in_NB network_data_analysis cryptocurrencies re:no_one_makes_you_push_to_github have_skimmed</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:fad8d993f1a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cryptocurrencies"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:no_one_makes_you_push_to_github"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.tandfonline.com/doi/full/10.1080/01621459.2023.2213466?casa_token=5YCQmnR5dvgAAAAA%3ALmyxVAGqVmvF-xgzm8Xk3b0CoQY3O9T4_YAJ-EL3djAUMPgoV855ee62aIIWj5oINszLh5cqdEA">
    <title>Full article: Survival Mixed Membership Blockmodel</title>
    <dc:date>2023-06-08T15:33:28+00:00</dc:date>
    <link>https://www.tandfonline.com/doi/full/10.1080/01621459.2023.2213466?casa_token=5YCQmnR5dvgAAAAA%3ALmyxVAGqVmvF-xgzm8Xk3b0CoQY3O9T4_YAJ-EL3djAUMPgoV855ee62aIIWj5oINszLh5cqdEA</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Whenever we send a message via a channel such as e-mail, Facebook, WhatsApp, WeChat, or LinkedIn, we care about the response rate—the probability that our message will receive a response—and the response time—how long it will take to receive a reply. Recent studies have made considerable efforts to model the sending behaviors of messages in social networks with point processes. However, statistical research on modeling response rates and response times on social networks is still lacking. Compared with sending behaviors, which are often determined by the sender’s characteristics, response rates and response times further depend on the relationship between the sender and the receiver. Here, we develop a survival mixed membership blockmodel (SMMB) that integrates semiparametric cure rate models with a mixed membership stochastic blockmodel to analyze time-to-event data observed for node pairs in a social network, and we are able to prove its model identifiability without the pure node assumption. We develop a Markov chain Monte Carlo algorithm to conduct posterior inference and select the number of social clusters in the network according to the conditional deviance information criterion. The application of the SMMB to the Enron e-mail corpus offers novel insights into the company’s organization and power relations."]]></description>
<dc:subject>to:NB stochastic_block_models point_processes network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2158c75ce0e7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:point_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2305.06353">
    <title>[2305.06353] An Overview of Asymptotic Normality in Stochastic Blockmodels: Cluster Analysis and Inference</title>
    <dc:date>2023-06-08T15:30:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2305.06353</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper provides a selective review of the statistical network analysis literature focused on clustering and inference problems for stochastic blockmodels and their variants. We survey asymptotic normality results for stochastic blockmodels as a means of thematically linking classical statistical concepts to contemporary research in network data analysis. Of note, multiple different forms of asymptotically Gaussian behavior arise in stochastic blockmodels and are useful for different purposes, pertaining to estimation and testing, the characterization of cluster structure in community detection, and understanding latent space geometry. This paper concludes with a discussion of open problems and ongoing research activities addressing asymptotic normality and its implications for statistical network modeling."]]></description>
<dc:subject>to:NB stochastic_block_models network_data_analysis central_limit_theorem to_teach:baby-nets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5e2b399628cb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:central_limit_theorem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2305.10380">
    <title>[2305.10380] Goodness of fit testing based on graph functionals for homgenous Erdös Renyi graphs</title>
    <dc:date>2023-06-08T15:25:49+00:00</dc:date>
    <link>https://arxiv.org/abs/2305.10380</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The Erdös Renyi graph is a popular choice to model network data as it is parsimoniously parametrized, straightforward to interprete and easy to estimate. However, it has limited suitability in practice, since it often fails to capture crucial characteristics of real-world networks. To check the adequacy of this model, we propose a novel class of goodness-of-fit tests for homogeneous Erdös Renyi models against heterogeneous alternatives that allow for nonconstant edge probabilities. We allow for asymptotically dense and sparse networks. The tests are based on graph functionals that cover a broad class of network statistics for which we derive limiting distributions in a unified manner. The resulting class of asymptotic tests includes several existing tests as special cases. Further, we propose a parametric bootstrap and prove its consistency, which allows for performance improvements particularly for small network sizes and avoids the often tedious variance estimation for asymptotic tests. Moreover, we analyse the sensitivity of different goodness-of-fit test statistics that rely on popular choices of subgraphs. We evaluate the proposed class of tests and illustrate our theoretical findings by extensive simulations."

--- On the one hand, I don't think anyone ever regards E-R as a serious option.  OTOH, this might be a step towards goodness-of-fit testing for models that are serious candidates...]]></description>
<dc:subject>goodness-of-fit network_data_analysis in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8d1e90a8b88c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:goodness-of-fit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2305.14814">
    <title>[2305.14814] What functions can Graph Neural Networks compute on random graphs? The role of Positional Encoding</title>
    <dc:date>2023-06-05T03:07:54+00:00</dc:date>
    <link>https://arxiv.org/abs/2305.14814</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We aim to deepen the theoretical understanding of Graph Neural Networks (GNNs) on large graphs, with a focus on their expressive power. Existing analyses relate this notion to the graph isomorphism problem, which is mostly relevant for graphs of small sizes, or studied graph classification or regression tasks, while prediction tasks on nodes are far more relevant on large graphs. Recently, several works showed that, on very general random graphs models, GNNs converge to certains functions as the number of nodes grows. In this paper, we provide a more complete and intuitive description of the function space generated by equivariant GNNs for node-tasks, through general notions of convergence that encompass several previous examples. We emphasize the role of input node features, and study the impact of node Positional Encodings (PEs), a recent line of work that has been shown to yield state-of-the-art results in practice. Through the study of several examples of PEs on large random graphs, we extend previously known universality results to significantly more general models. Our theoretical results hint at some normalization tricks, which is shown numerically to have a positive impact on GNN generalization on synthetic and real data. Our proofs contain new concentration inequalities of independent interest."]]></description>
<dc:subject>to:NB neural_networks network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:de570450713c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2305.12470">
    <title>[2305.12470] Quasi-Monte Carlo Graph Random Features</title>
    <dc:date>2023-05-28T13:45:37+00:00</dc:date>
    <link>https://arxiv.org/abs/2305.12470</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We present a novel mechanism to improve the accuracy of the recently-introduced class of graph random features (GRFs). Our method induces negative correlations between the lengths of the algorithm's random walks by imposing antithetic termination: a procedure to sample more diverse random walks which may be of independent interest. It has a trivial drop-in implementation. We derive strong theoretical guarantees on the properties of these quasi-Monte Carlo GRFs (q-GRFs), proving that they yield lower-variance estimators of the 2-regularised Laplacian kernel under mild conditions. Remarkably, our results hold for any graph topology. We demonstrate empirical accuracy improvements on a variety of tasks including a new practical application: time-efficient approximation of the graph diffusion process. To our knowledge, q-GRFs constitute the first rigorously studied quasi-Monte Carlo scheme for kernels defined on combinatorial objects, inviting new research on correlations between graph random walks."]]></description>
<dc:subject>random_features network_data_analysis monte_carlo graph_theory to_read re:codename:catherine_wheel in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:98485ad59efc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_features"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:monte_carlo"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graph_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:codename:catherine_wheel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2305.00156">
    <title>[2305.00156] Taming graph kernels with random features</title>
    <dc:date>2023-05-05T01:42:21+00:00</dc:date>
    <link>https://arxiv.org/abs/2305.00156</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We introduce in this paper the mechanism of graph random features (GRFs). GRFs can be used to construct unbiased randomized estimators of several important kernels defined on graphs' nodes, in particular the regularized Laplacian kernel. As regular RFs for non-graph kernels, they provide means to scale up kernel methods defined on graphs to larger networks. Importantly, they give substantial computational gains also for smaller graphs, while applied in downstream applications. Consequently, GRFs address the notoriously difficult problem of cubic (in the number of the nodes of the graph) time complexity of graph kernels algorithms. We provide a detailed theoretical analysis of GRFs and an extensive empirical evaluation: from speed tests, through Frobenius relative error analysis to kmeans graph-clustering with graph kernels. We show that the computation of GRFs admits an embarrassingly simple distributed algorithm that can be applied if the graph under consideration needs to be split across several machines. We also introduce a (still unbiased) quasi Monte Carlo variant of GRFs, q-GRFs, relying on the so-called reinforced random walks, that might be used to optimize the variance of GRFs. As a byproduct, we obtain a novel approach to solve certain classes of linear equations with positive and symmetric matrices."]]></description>
<dc:subject>kernel_methods random_features network_data_analysis re:codename:catherine_wheel to_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ea626ea336b0/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:random_features"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:codename:catherine_wheel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2304.03331">
    <title>[2304.03331] A Socio-Demographic Latent Space Approach to Spatial Data When Geography is Important but Not All-Important</title>
    <dc:date>2023-04-27T14:47:55+00:00</dc:date>
    <link>https://arxiv.org/abs/2304.03331</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Many models for spatial and spatio-temporal data assume that "near things are more related than distant things," which is known as the first law of geography. While geography may be important, it may not be all-important, for at least two reasons. First, technology helps bridge distance, so that regions separated by large distances may be more similar than would be expected based on geographical distance. Second, geographical, political, and social divisions can make neighboring regions dissimilar. We develop a flexible Bayesian approach for learning from spatial data which units are close in an unobserved socio-demographic space and hence which units are similar. As a by-product, the Bayesian approach helps quantify the relative importance of socio-demographic space relative to geographical space. To demonstrate the proposed approach, we present simulations along with an application to county-level data on median household income in the U.S. state of Florida."]]></description>
<dc:subject>network_data_analysis social_networks in_NB spatial_statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:e5a0327fabe9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spatial_statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.jstor.org/stable/44965462">
    <title>Sampling Individuals With Large Sexual Networks: An Evaluation of Four Approaches on JSTOR</title>
    <dc:date>2023-04-24T21:39:01+00:00</dc:date>
    <link>https://www.jstor.org/stable/44965462</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Background: Methods for accessing large sexual networks are essential for investigating the mechanisms for the spread of sexually transmitted infections. Goal: Four samples of cases were compared with the total population to determine which identified the largest networks. Study Design: Individuals with positive test results for chlamydia during a 6-month period were selected from a laboratory database and linked with sex partner information from a notifiable disease registry. Sexual networks were constructed for a random sample, people with positive results from two or more tests for chlamydia, people with positive tests results for both gonorrhea and chlamydia, and the preceding two groups combined. Results: The coinfected people combined with the repeaters yielded the highest proportion (47.8%) of large networks (>10 people), followed by the coinfected people, the repeaters, and finally the random sample. Conclusions: People coinfected with chlamydia and gonorrhea and those with repeated chlamydial infection present ideal opportunities for both research and prevention."

--- Background reading for a collaboration that never (as it were) came to fruition.]]></description>
<dc:subject>to_reread social_networks network_data_analysis social_measurement practices_relating_to_the_transmission_of_genetic_information to_teach:baby-nets cleaning_out_the_filing_cabinet_for_the_first_time_since_2005 have_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:11d02cd222d1/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_reread"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:practices_relating_to_the_transmission_of_genetic_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cleaning_out_the_filing_cabinet_for_the_first_time_since_2005"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://academic.oup.com/jid/article/191/Supplement_1/S42/934023">
    <title>Determinants and Consequences of Sexual Networks as They Affect the Spread of Sexually Transmitted Infections | The Journal of Infectious Diseases | Oxford Academic</title>
    <dc:date>2023-04-24T21:36:46+00:00</dc:date>
    <link>https://academic.oup.com/jid/article/191/Supplement_1/S42/934023</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Because pathogens spread only within the unique context of a sexual union between people when one person is infectious, the other is susceptible to new infection, and condoms are not used to prevent transmission, the epidemiological study of sexually transmitted infections (STIs) is particularly challenging. Social network analysis entails the study of ties among people and how the structure and quality of such ties affect individuals and overall group dynamics. Although ascertaining complete sexual networks is difficult, application of this approach has provided unique insights into the spread of STIs that traditional individual-based epidemiological methods do not capture. This article provides a brief background on the design and assessments of studies of social networks, to illustrate how these methods have been applied to understanding the distribution of STIs, to inform the development of interventions for STI control."

--- Background reading for a collaboration that never (as it were) came to fruition.
--- Is Sevgi Aral any relation of Sinan Aral?]]></description>
<dc:subject>to_reread social_networks network_data_analysis social_measurement practices_relating_to_the_transmission_of_genetic_information cleaning_out_the_filing_cabinet_for_the_first_time_since_2005 to_teach:baby-nets have_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:6034c3cb1dd6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_reread"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_measurement"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:practices_relating_to_the_transmission_of_genetic_information"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cleaning_out_the_filing_cabinet_for_the_first_time_since_2005"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2303.16598">
    <title>[2303.16598] Robust recovery of Robinson $L^p$-graphons</title>
    <dc:date>2023-04-22T13:54:22+00:00</dc:date>
    <link>https://arxiv.org/abs/2303.16598</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In this paper, we study the Robinson graphon completion/recovery problem for the class of Lp-graphons. We introduce a function Λ on the space of Lp-graphons, which measures the extent to which a graphon w exhibits the Robinson property: for all x<y<z, w(x,z)≤min{w(x,y),w(y,z)}. We prove that the function Λ satisfies the following: (1) Λ is compatible with the cut-norm, in the sense that if two graphons are close in the cut-norm, then their Λ values are close; and (2) when p>5, every Lp-graphon w can be approximated by a Robinson graphon, with error of the approximation bounded in terms of \IN{Λ(w)}. When w is a noisy version of a Robinson graphon, our method provides a concrete formula for recovering the Robinson graphon approximating w in cut-norm."]]></description>
<dc:subject>re:smoothing_adjacency_matrices graphons network_data_analysis scooped? to_read in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:2651f1bebb02/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:smoothing_adjacency_matrices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graphons"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:scooped?"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.tandfonline.com/doi/abs/10.1080/01621459.2023.2183133">
    <title>Network Inference Using the Hub Model and Variants: Journal of the American Statistical Association: Vol 0, No ja</title>
    <dc:date>2023-03-18T13:57:06+00:00</dc:date>
    <link>https://www.tandfonline.com/doi/abs/10.1080/01621459.2023.2183133</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Statistical network analysis primarily focuses on inferring the parameters of an observed network. In many applications, especially in the social sciences, the observed data is the groups formed by individual subjects. In these applications, the network is itself a parameter of a statistical model. Zhao and Weko (2019) propose a model-based approach, called the hub model, to infer implicit networks from grouping behavior. The hub model assumes that each member of the group is brought together by a member of the group called the hub. The set of members which can serve as a hub is called the hub set. The hub model belongs to the family of Bernoulli mixture models. Identifiability of Bernoulli mixture model parameters is a notoriously difficult problem. This paper proves identifiability of the hub model parameters and estimation consistency under mild conditions. Furthermore, this paper generalizes the hub model by introducing a model component that allows hubless groups in which individual nodes spontaneously appear independent of any other individual. We refer to this additional component as the null component. The new model bridges the gap between the hub model and the degenerate case of the mixture model – the Bernoulli product. Identifiability and consistency are also proved for the new model. In addition, a penalized likelihood approach is proposed to estimate the hub set when it is unknown."]]></description>
<dc:subject>inference_to_latent_objects functional_connectivity network_data_analysis time_series to_read bickel.peter in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:fdee119e04f3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:functional_connectivity"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:time_series"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:bickel.peter"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2302.10095">
    <title>[2302.10095] Conformal Prediction for Network-Assisted Regression</title>
    <dc:date>2023-03-18T13:55:08+00:00</dc:date>
    <link>https://arxiv.org/abs/2302.10095</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["An important problem in network analysis is predicting a node attribute using both network covariates, such as graph embedding coordinates or local subgraph counts, and conventional node covariates, such as demographic characteristics. While standard regression methods that make use of both types of covariates may be used for prediction, statistical inference is complicated by the fact that the nodal summary statistics are often dependent in complex ways. We show that under a mild joint exchangeability assumption, a network analog of conformal prediction achieves finite sample validity for a wide range of network covariates. We also show that a form of asymptotic conditional validity is achievable. The methods are illustrated on both simulated networks and a citation network dataset."]]></description>
<dc:subject>network_data_analysis prediction regression conformal_prediction kith_and_kin lunde.robert levina.elizaveta zhu.ji in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:cd205e9fff43/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:conformal_prediction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lunde.robert"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:levina.elizaveta"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:zhu.ji"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.cambridge.org/core/journals/network-science/article/abs/network-community-detection-method-with-integration-of-data-from-multiple-layers-and-node-attributes/4381E20BCA49A082C27A6F607F3D561F">
    <title>A network community detection method with integration of data from multiple layers and node attributes | Network Science | Cambridge Core</title>
    <dc:date>2023-03-18T13:52:18+00:00</dc:date>
    <link>https://www.cambridge.org/core/journals/network-science/article/abs/network-community-detection-method-with-integration-of-data-from-multiple-layers-and-node-attributes/4381E20BCA49A082C27A6F607F3D561F</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Multilayer networks are in the focus of the current complex network study. In such networks, multiple types of links may exist as well as many attributes for nodes. To fully use multilayer—and other types of complex networks in applications, the merging of various data with topological information renders a powerful analysis. First, we suggest a simple way of representing network data in a data matrix where rows correspond to the nodes and columns correspond to the data items. The number of columns is allowed to be arbitrary, so that the data matrix can be easily expanded by adding columns. The data matrix can be chosen according to targets of the analysis and may vary a lot from case to case. Next, we partition the rows of the data matrix into communities using a method which allows maximal compression of the data matrix. For compressing a data matrix, we suggest to extend so-called regular decomposition method for non-square matrices. We illustrate our method for several types of data matrices, in particular, distance matrices, and matrices obtained by augmenting a distance matrix by a column of node degrees, or by concatenating several distance matrices corresponding to layers of a multilayer network. We illustrate our method with synthetic power-law graphs and two real networks: an Internet autonomous systems graph and a world airline graph. We compare the outputs of different community recovery methods on these graphs and discuss how incorporating node degrees as a separate column to the data matrix leads our method to identify community structures well-aligned with tiered hierarchical structures commonly encountered in complex scale-free networks."]]></description>
<dc:subject>to:NB community_discovery network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:1cf2365d0f22/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2303.04871">
    <title>[2303.04871] Discovering a change point in a time series of organoid networks via the iso-mirror</title>
    <dc:date>2023-03-17T18:12:06+00:00</dc:date>
    <link>https://arxiv.org/abs/2303.04871</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Recent advancements have been made in the development of cell-based in-vitro neuronal networks, or organoids. In order to better understand the network structure of these organoids, [6] propose a method for inferring effective connectivity networks from multi-electrode array data. In this paper, a novel statistical method called spectral mirror estimation [2] is applied to a time series of inferred effective connectivity organoid networks. This method produces a one-dimensional iso-mirror representation of the dynamics of the time series of the networks. A classical change point algorithm is then applied to this representation, which successfully detects a neuroscientifically significant change point coinciding with the time inhibitory neurons start appearing and the percentage of astrocytes increases dramatically [9]. This finding demonstrates the potential utility of applying the iso-mirror dynamic structure discovery method to inferred effective connectivity time series of organoid networks."]]></description>
<dc:subject>to:NB neural_data_analysis network_data_analysis change-point_problem re:network_differences dimension_reduction</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5859e41f08a4/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:neural_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:change-point_problem"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:dimension_reduction"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2303.07023">
    <title>[2303.07023] Strong, weak or no balance? Testing structural hypotheses against real networks</title>
    <dc:date>2023-03-17T17:42:57+00:00</dc:date>
    <link>https://arxiv.org/abs/2303.07023</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The abundance of data about social, economic and political relationships has opened an era in which social theories can be tested against empirical evidence, allowing human behaviour to be analyzed just as any other natural phenomenon. The present contribution focuses on balance theory, stating that social agents tend to avoid the formation of `unbalanced', or `frustrated', cycles, i.e. cycles with an odd number of negative links. Such a statement can be made statistically rigorous only after a comparison with a null model. Since the existing ones cannot account for the heterogeneity of individual actors, we, first, extend the Exponential Random Graphs framework to binary, undirected, signed networks with local constraints and, then, employ both homogeneous and heterogeneous benchmarks to compare the empirical abundance of short cycles with its expected value, on several, real-world systems. What emerges is that the level of balance in real-world networks crucially depends on (at least) three factors, i.e. the measure adopted to quantify it, the nature of the data, the null model employed for the analysis. As an example, the study of triangles reveals that homogeneous null models with global constraints tend to favour the weak version of balance theory, according to which only the triangle with one negative link should be under-represented in real, social and political networks; on the other hand, heterogeneous null models with local constraints tend to favour the strong version of balance theory, according to which also the triangle with all negative links should be under-represented in real, social networks. Biological networks, instead, are found to be significantly frustrated under any benchmark considered here."]]></description>
<dc:subject>network_data_analysis exponential_family_random_graphs social_balance social_networks in_NB of_course_its_really_a_spin_glass</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9bad33ad8977/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:exponential_family_random_graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_balance"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:of_course_its_really_a_spin_glass"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2303.05909">
    <title>[2303.05909] A pseudo-likelihood approach to community detection in weighted networks</title>
    <dc:date>2023-03-17T17:42:02+00:00</dc:date>
    <link>https://arxiv.org/abs/2303.05909</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Community structure is common in many real networks, with nodes clustered in groups sharing the same connections patterns. While many community detection methods have been developed for networks with binary edges, few of them are applicable to networks with weighted edges, which are common in practice. We propose a pseudo-likelihood community estimation algorithm derived under the weighted stochastic block model for networks with normally distributed edge weights, extending the pseudo-likelihood algorithm for binary networks, which offers some of the best combinations of accuracy and computational efficiency. We prove that the estimates obtained by the proposed method are consistent under the assumption of homogeneous networks, a weighted analogue of the planted partition model, and show that they work well in practice for both homogeneous and heterogeneous networks. We illustrate the method on simulated networks and on a fMRI dataset, where edge weights represent connectivity between brain regions and are expected to be close to normal in distribution by construction."]]></description>
<dc:subject>to:NB community_discovery network_data_analysis levina.elizaveta</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:df0c9e68ab6c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:levina.elizaveta"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2301.03630">
    <title>[2301.03630] Hierarchical core-periphery structure in networks</title>
    <dc:date>2023-01-19T04:14:18+00:00</dc:date>
    <link>https://arxiv.org/abs/2301.03630</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We study core-periphery structure in networks using inference methods based on a flexible network model that allows for traditional onion-like cores within cores, but also for hierarchical tree-like structures and more general non-nested types of structure. We propose an efficient Monte Carlo scheme for fitting the model to observed networks and report results for a selection of real-world data sets. Among other things, we observe an empirical distinction between networks showing traditional core-periphery structure with a dense core weakly connected to a sparse periphery, and an alternative structure in which the core is strongly connected both within itself and to the periphery. Networks vary in whether they are better represented by one type of structure or the other. We also observe structures that are a hybrid between core-periphery structure and community structure, in which networks have a set of non-overlapping cores that correspond roughly to communities, surrounded by a single undifferentiated periphery. Computer code implementing our methods is available."]]></description>
<dc:subject>to:NB network_data_analysis newman.mark kith_and_kin re:fractal_networks to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8847520c9000/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:newman.mark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:fractal_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2212.12839">
    <title>[2212.12839] Escape times for subgraph detection and graph partitioning</title>
    <dc:date>2023-01-18T03:12:24+00:00</dc:date>
    <link>https://arxiv.org/abs/2212.12839</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We provide a rearrangement based algorithm for fast detection of subgraphs of k vertices with long escape times for directed or undirected networks. Complementing other notions of densest subgraphs and graph cuts, our method is based on the mean hitting time required for a random walker to leave a designated set and hit the complement. We provide a new relaxation of this notion of hitting time on a given subgraph and use that relaxation to construct a fast subgraph detection algorithm and a generalization to K-partitioning schemes. Using a modification of the subgraph detector on each component, we propose a graph partitioner that identifies regions where random walks live for comparably large times. Importantly, our method implicitly respects the directed nature of the data for directed graphs while also being applicable to undirected graphs. We apply the partitioning method for community detection to a large class of model and real-world data sets."]]></description>
<dc:subject>to:NB networks network_data_analysis community_discovery stochastic_processes mucha.peter</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f14fb18e0235/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_processes"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:mucha.peter"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.nature.com/articles/s41567-022-01866-8">
    <title>Laplacian renormalization group for heterogeneous networks | Nature Physics</title>
    <dc:date>2023-01-17T05:01:26+00:00</dc:date>
    <link>https://www.nature.com/articles/s41567-022-01866-8</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The renormalization group is the cornerstone of the modern theory of universality and phase transitions and it is a powerful tool to scrutinize symmetries and organizational scales in dynamical systems. However, its application to complex networks has proven particularly challenging, owing to correlations between intertwined scales. To date, existing approaches have been based on hidden geometries hypotheses, which rely on the embedding of complex networks into underlying hidden metric spaces. Here we propose a Laplacian renormalization group diffusion-based picture for complex networks, which is able to identify proper spatiotemporal scales in heterogeneous networks. In analogy with real-space renormalization group procedures, we first introduce the concept of Kadanoff supernodes as block nodes across multiple scales, which helps to overcome detrimental small-world effects that are responsible for cross-scale correlations. We then rigorously define the momentum space procedure to progressively integrate out fast diffusion modes and generate coarse-grained graphs. We validate the method through application to several real-world networks, demonstrating its ability to perform network reduction keeping crucial properties of the systems intact."]]></description>
<dc:subject>to:NB network_data_analysis re:fractal_networks to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a6c4fb2e11be/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:fractal_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2210.07491">
    <title>[2210.07491] Latent process models for functional network data</title>
    <dc:date>2022-12-09T20:01:58+00:00</dc:date>
    <link>https://arxiv.org/abs/2210.07491</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Network data are often sampled with auxiliary information or collected through the observation of a complex system over time, leading to multiple network snapshots indexed by a continuous variable. Many methods in statistical network analysis are traditionally designed for a single network, and can be applied to an aggregated network in this setting, but that approach can miss important functional structure. Here we develop an approach to estimating the expected network explicitly as a function of a continuous index, be it time or another indexing variable. We parameterize the network expectation through low dimensional latent processes, whose components we represent with a fixed, finite-dimensional functional basis. We derive a gradient descent estimation algorithm, establish theoretical guarantees for recovery of the low-dimensional structure, compare our method to competitors, and apply it to a dataset of international political interactions over time, showing our proposed method to adapt well to data, outperform competitors, and provide interpretable and meaningful results."]]></description>
<dc:subject>to:NB network_data_analysis statistics inference_to_latent_objects functional_data_analysis levina.liza to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:df335b19ec43/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:functional_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:levina.liza"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.annualreviews.org/doi/abs/10.1146/annurev-soc-030320-102100">
    <title>Causal Network Analysis | Annual Review of Sociology</title>
    <dc:date>2022-08-06T16:26:17+00:00</dc:date>
    <link>https://www.annualreviews.org/doi/abs/10.1146/annurev-soc-030320-102100</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Fueled by recent advances in statistical modeling and the rapid growth of network data, social network analysis has become increasingly popular in sociology and related disciplines. However, a significant amount of work in the field has been descriptive and correlational, which prevents the findings from being more rigorously translated into practices and policies. This article provides a review of the popular models and methods for causal network analysis, with a focus on causal inference threats (such as measurement error, missing data, network endogeneity, contextual confounding, simultaneity, and collinearity) and potential solutions (such as instrumental variables, specialized experiments, and leveraging longitudinal data). It covers major models and methods for both network formation and network effects and for both sociocentric networks and egocentric networks. Lastly, this review also discusses future directions for causal network analysis."

--- Last tag applies to the proposed fixes.]]></description>
<dc:subject>to:NB to_read causal_inference network_data_analysis re:homophily_and_confounding social_networks color_me_skeptical</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c844f4b17fb8/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:homophily_and_confounding"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.05614">
    <title>[2206.05614] Information theory of spatial network ensembles</title>
    <dc:date>2022-07-14T18:26:24+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.05614</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This chapter provides a comprehensive and self-contained discussion of the most recent developments of information theory of networks. Maximum entropy models of networks are the least biased ensembles enforcing a set of constraints and are used in a number of application to produce null model of networks. Here maximum entropy ensembles of networks are introduced by distinguishing between microcanonical and canonical ensembles revealing the the non-equivalence of these two classes of ensembles in the case in which an extensive number of constraints is imposed. It is very common that network data includes also meta-data describing feature of the nodes such as their position in a real or in an abstract space. The features of the nodes can be treated as latent variables that determine the cost associated to each link. Maximum entropy network ensembles with latent variables include spatial networks and their generalisation. In this chapter we cover the case of transportation networks including airport and rail networks. Maximum entropy network ensemble satisfy a given set of constraints. However traditional approaches do not provide any insight on the origin of such constraints. We use information theory principles to find the optimal distribution of latent variables in the framework of the classical information theory of networks. This theory explains the "blessing of non-uniformity" of data guaranteeing the efficiency of inference algorithms."]]></description>
<dc:subject>exponential_family_random_graphs network_data_analysis in_NB</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ac3c8cad1b21/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:exponential_family_random_graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2008.05337">
    <title>[2008.05337] Inference of a universal social scale and segregation measures using social connectivity kernels</title>
    <dc:date>2022-07-02T14:15:55+00:00</dc:date>
    <link>https://arxiv.org/abs/2008.05337</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["How people connect with one another is a fundamental question in the social sciences, and the resulting social networks can have a profound impact on our daily lives. Blau offered a powerful explanation: people connect with one another based on their positions in a social space. Yet a principled measure of social distance, allowing comparison within and between societies, remains elusive. We use the connectivity kernel of conditionally-independent edge models to develop a family of segregation statistics with desirable properties: they offer an intuitive and universal characteristic scale on social space (facilitating comparison across datasets and societies), are applicable to multivariate and mixed node attributes, and capture segregation at the level of individuals, pairs of individuals, and society as a whole. We show that the segregation statistics can induce a metric on Blau space (a space spanned by the attributes of the members of society) and provide maps of two societies. Under a Bayesian paradigm, we infer the parameters of the connectivity kernel from eleven ego-network datasets collected in four surveys in the United Kingdom and United States. The importance of different dimensions of Blau space is similar across time and location, suggesting a macroscopically stable social fabric. Physical separation and age differences have the most significant impact on segregation within friendship networks with implications for intergenerational mixing and isolation in later stages of life."]]></description>
<dc:subject>to:NB network_data_analysis kernel_methods</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c48b6bbc70b3/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kernel_methods"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2206.01553">
    <title>[2206.01553] Detecting hyperbolic geometry in networks: why triangles are not enough</title>
    <dc:date>2022-06-09T08:40:37+00:00</dc:date>
    <link>https://arxiv.org/abs/2206.01553</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In the past decade, geometric network models have received vast attention in the literature. These models formalize the natural idea that similar vertices are likely to connect. Because of that, these models are able to adequately capture many common structural properties of real-world networks, such as self-invariance and high clustering. Indeed, many real-world networks can be accurately modeled by positioning vertices of a network graph in hyperbolic spaces. Nevertheless, if one observes only the network connections, the presence of geometry is not always evident. Currently, triangle counts and clustering coefficients are the standard statistics to signal the presence of geometry. In this paper we show that triangle counts or clustering coefficients are insufficient because they fail to detect geometry induced by hyperbolic spaces. We therefore introduce a novel triangle-based statistic, which weighs triangles based on their strength of evidence for geometry. We show analytically, as well as on synthetic and real-world data, that this is a powerful statistic to detect hyperbolic geometry in networks."]]></description>
<dc:subject>to:NB network_data_analysis hyperbolic_geometry re:hyperbolic_networks</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:83164d4efa1e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hyperbolic_geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:hyperbolic_networks"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2203.07230">
    <title>[2203.07230] Laplacian Renormalization Group for heterogeneous networks</title>
    <dc:date>2022-06-06T12:56:59+00:00</dc:date>
    <link>https://arxiv.org/abs/2203.07230</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["The renormalization group is the cornerstone of the modern theory of universality and phase transitions, a powerful tool to scrutinize symmetries and organizational scales in dynamical systems. However, its network counterpart is particularly challenging due to correlations between intertwined scales. To date, the explorations are based on hidden geometries hypotheses. Here, we propose a Laplacian RG diffusion-based picture in complex networks, defining both the Kadanoff supernodes' concept, the momentum space procedure, \emph{á la Wilson}, and applying this RG scheme to real networks in a natural and parsimonious way."]]></description>
<dc:subject>to:NB renormalization graph_theory network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9aea199820a2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:renormalization"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:graph_theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2203.06948">
    <title>[2203.06948] Continuous Time Graph Processes with Known ERGM Equilibria: Contextual Review, Extensions, and Synthesis</title>
    <dc:date>2022-06-05T22:18:06+00:00</dc:date>
    <link>https://arxiv.org/abs/2203.06948</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Graph processes that unfold in continuous time are of obvious theoretical and practical interest. Particularly useful are those whose long-term behavior converges to a graph distribution of known form. Here, we review some of the conditions for such convergence, and provide examples of novel and/or known processes that do so. These include subfamilies of the well-known stochastic actor oriented models, as well as continuum extensions of temporal and separable temporal exponential family random graph models. We also comment on some related threads in the broader work on network dynamics, which provide additional context for the continuous time case."]]></description>
<dc:subject>exponential_family_random_graphs networks_in_and_over_time network_data_analysis in_NB butts.carter_t.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9da48de8e600/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:exponential_family_random_graphs"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:networks_in_and_over_time"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:butts.carter_t."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://www.jmlr.org/papers/v23/19-1056.html">
    <title>Recovering shared structure from multiple networks with unknown edge distributions</title>
    <dc:date>2022-03-27T15:55:13+00:00</dc:date>
    <link>https://www.jmlr.org/papers/v23/19-1056.html</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In increasingly many settings, data sets consist of multiple samples from a population of networks, with vertices aligned across networks; for example, brain connectivity networks in neuroscience. We consider the setting where the observed networks have a shared expectation, but may differ in the noise structure on their edges. Our approach exploits the shared mean structure to denoise edge-level measurements of the observed networks and estimate the underlying population-level parameters. We also explore the extent to which edge-level errors influence estimation and downstream inference. In the process, we establish a finite-sample concentration inequality for the low-rank eigenvalue truncation of a random weighted adjacency matrix, which may be of independent interest. The proposed approach is illustrated on synthetic networks and on data from an fMRI study of schizophrenia."]]></description>
<dc:subject>to:NB network_data_analysis levina.liza</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4254f5793b91/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:levina.liza"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://projecteuclid.org/journals/bernoulli/volume-28/issue-2/Asymptotically-efficient-estimators-for-stochastic-blockmodels--The-naive-MLE/10.3150/21-BEJ1376.short">
    <title>Asymptotically efficient estimators for stochastic blockmodels: The naive MLE, the rank-constrained MLE, and the spectral estimator</title>
    <dc:date>2022-03-08T14:51:26+00:00</dc:date>
    <link>https://projecteuclid.org/journals/bernoulli/volume-28/issue-2/Asymptotically-efficient-estimators-for-stochastic-blockmodels--The-naive-MLE/10.3150/21-BEJ1376.short</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We establish asymptotic normality results for estimation of the block probability matrix B in stochastic blockmodel graphs using spectral embedding when the average degrees grows at the rate of ω(√n) in n, the number of vertices. As a corollary, we show that when B is of full-rank, estimates of B obtained from spectral embedding are asymptotically efficient. When B is singular the estimates obtained from spectral embedding can have smaller mean square error than those obtained from maximizing the log-likelihood under no rank assumption, and furthermore, can be almost as efficient as the true MLE that assumes the rank of B is known. Our results indicate, in the context of stochastic blockmodel graphs, that spectral embedding is not just computationally tractable, but that the resulting estimates are also admissible, even when compared to the purportedly optimal but computationally intractable maximum likelihood estimation under no rank assumption."]]></description>
<dc:subject>to:NB network_data_analysis stochastic_block_models spectral_methods priebe.carey_e.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:693e844626fb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spectral_methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:priebe.carey_e."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://journals.sagepub.com/doi/full/10.1177/2053951720949577">
    <title>Epistemic clashes in network science: Mapping the tensions between idiographic and nomothetic subcultures - Mathieu Jacomy, 2020</title>
    <dc:date>2022-03-01T02:45:07+00:00</dc:date>
    <link>https://journals.sagepub.com/doi/full/10.1177/2053951720949577</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This article maps a controversy in network science over the last 15 years, dividing the field about the epistemic status of a central notion, scale-freeness. The article accounts for the two main disputes, in 2005 and in 2018, as they unfolded in academic publications and on social media. This article analyzes the conflict, and the reasons why it reignited in 2018, to the surprise of many. It is argued that (1) the concept of complex networks is shared by the distinct subcultures of theorists and experimentalists; and that (2) these subcultures have incompatible approaches to knowledge: nomothetic (scale-freeness is the sign of a universal law) and idiographic (scale-freeness is an empirical characterization). Following Galison, this article contends that network science is a trading zone where theorists and experimentalists can trade knowledge across the epistemic divide."

--- Decorum precludes comment.]]></description>
<dc:subject>to:NB to_read heavy_tails network_data_analysis re:scale_free_networks_controversy</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:dd1171d1a56e/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heavy_tails"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:scale_free_networks_controversy"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://osf.io/preprints/socarxiv/4mp6x/">
    <title>SocArXiv Papers | Human-Network Regions as Effective Geographic Units for Disease Mitigation</title>
    <dc:date>2022-01-11T15:34:24+00:00</dc:date>
    <link>https://osf.io/preprints/socarxiv/4mp6x/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Susceptibility to infectious diseases such as COVID-19 depends on how they spread, and many studies have captured the decrease in COVID-19 spread due to reduction in travel. However, less is known about practical geographic boundaries for that limit the spread of COVID-19 to adjacent places. To detect such boundaries, we apply community-detection algorithms to large networks of mobility and social-media connections to construct geographic regions that reflect natural human movement and relationships at the county level for the continental United States. We measure COVID-19 cases, case rates, and case-rate variations across adjacent counties and examine how often COVID-19 crosses the boundaries of these functional regions. We find that regions that we construct using GPS-trace networks and especially commuter networks have the smallest rates of COVID-19 case rates along the boundaries, so these regions may reflect natural partitions in COVID-19 transmission. Conversely, regions that we construct from geolocated Facebook friendships and Twitter connections yield the least effective partitions. Our analysis reveals that regions that are derived from movement flows are more appropriate geographic units than states for making policy decisions about opening areas for activity, assessing vulnerability of populations, and allocating resources. Our insights are also relevant for policy decisions and public messaging in future emergency situations."

--- Cf. Asta (2012): biological contagions are distinguishable from social ones because they spread geographically rather than within online-social-network communities.]]></description>
<dc:subject>to:NB spatio-temporal_statistics network_data_analysis porter.mason_a. community_discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:610570028f53/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spatio-temporal_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:porter.mason_a."/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1017/9781108774116">
    <title>Modularity and Dynamics on Complex Networks</title>
    <dc:date>2022-01-10T20:06:19+00:00</dc:date>
    <link>https://doi.org/10.1017/9781108774116</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Complex networks are typically not homogeneous, as they tend to display an array of structures at different scales. A feature that has attracted a lot of research is their modular organisation, i.e., networks may often be considered as being composed of certain building blocks, or modules. In this Element, the authors discuss a number of ways in which this idea of modularity can be conceptualised, focusing specifically on the interplay between modular network structure and dynamics taking place on a network. They discuss, in particular, how modular structure and symmetries may impact on network dynamics and, vice versa, how observations of such dynamics may be used to infer the modular structure. They also revisit several other notions of modularity that have been proposed for complex networks and show how these can be related to and interpreted from the point of view of dynamical processes on networks."]]></description>
<dc:subject>to:NB to_read network_data_analysis community_discovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:088275d9040b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://doi.org/10.1017/9781108865791">
    <title>The Shortest Path to Network Geometry</title>
    <dc:date>2022-01-10T20:04:38+00:00</dc:date>
    <link>https://doi.org/10.1017/9781108865791</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Real networks comprise from hundreds to millions of interacting elements and permeate all contexts, from technology to biology to society. All of them display non-trivial connectivity patterns, including the small-world phenomenon, making nodes to be separated by a small number of intermediate links. As a consequence, networks present an apparent lack of metric structure and are difficult to map. Yet, many networks have a hidden geometry that enables meaningful maps in the two-dimensional hyperbolic plane. The discovery of such hidden geometry and the understanding of its role have become fundamental questions in network science giving rise to the field of network geometry. This Element reviews fundamental models and methods for the geometric description of real networks with a focus on applications of real network maps, including decentralized routing protocols, geometric community detection, and the self-similar multiscale unfolding of networks by geometric renormalization."]]></description>
<dc:subject>to:NB network_data_analysis hyperbolic_geometry geometry inference_to_latent_objects re:hyperbolic_networks to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:b1527a8324c7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:hyperbolic_geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:hyperbolic_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2112.00183">
    <title>[2112.00183] Descriptive vs. inferential community detection: pitfalls, myths and half-truths</title>
    <dc:date>2021-12-07T14:37:42+00:00</dc:date>
    <link>https://arxiv.org/abs/2112.00183</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Community detection is one of the most important methodological fields of network science, and one which has attracted a significant amount of attention over the past decades. This area deals with the automated division of a network into fundamental building blocks, with the objective of providing a summary of its large-scale structure. Despite its importance and widespread adoption, there is a noticeable gap between what is considered the state-of-the-art and the methods that are actually used in practice in a variety of fields. Here we attempt to address this discrepancy by dividing existing methods according to whether they have a "descriptive" or an "inferential" goal. While descriptive methods find patterns in networks based on intuitive notions of community structure, inferential methods articulate a precise generative model, and attempt to fit it to data. In this way, they are able to provide insights into the mechanisms of network formation, and separate structure from randomness in a manner supported by statistical evidence. We review how employing descriptive methods with inferential aims is riddled with pitfalls and misleading answers, and thus should be in general avoided. We argue that inferential methods are more typically aligned with clearer scientific questions, yield more robust results, and should be in general preferred. We attempt to dispel some myths and half-truths often believed when community detection is employed in practice, in an effort to improve both the use of such methods as well as the interpretation of their results."]]></description>
<dc:subject>to:NB to_read community_discovery network_data_analysis inference_to_latent_objects peixoto.tiago to_teach:baby-nets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:17d3ef93eb8c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:peixoto.tiago"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_teach:baby-nets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2107.00248">
    <title>[2107.00248] Randomization-only Inference in Experiments with Interference</title>
    <dc:date>2021-10-25T20:04:21+00:00</dc:date>
    <link>https://arxiv.org/abs/2107.00248</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In experiments that study social phenomena, such as peer influence or herd immunity, the treatment of one unit may influence the outcomes of others. Such "interference between units" violates traditional approaches for causal inference, so that additional assumptions are required to model the underlying social mechanism. We propose an approach that requires no such assumptions, allowing for interference that is both unmodeled and strong, with confidence intervals found using only the randomization of treatment. Additionally, the approach allows for the usage of regression, matching, or weighting, as may best fit the application at hand. Inference is done by bounding the distribution of the estimation error over all possible values of the unknown counterfactual, using an integer program. Examples are shown using a vaccine trial and two experiments investigating social influence."

--- I enjoyed hearing David talk about this, but I continue to feel like there's something fundamental I'm not getting here.]]></description>
<dc:subject>to:NB have_skimmed heard_the_talk experimental_design experiments_on_networks network_data_analysis statistics kith_and_kin to_read choi.david_s.</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4d7e366be396/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:have_skimmed"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experimental_design"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:experiments_on_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:choi.david_s."/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.428">
    <title>Community detection with nodal information: likelihood and its variational approximation - Weng - - Stat - Wiley Online Library</title>
    <dc:date>2021-10-18T13:55:08+00:00</dc:date>
    <link>https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.428</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Community detection is one of the fundamental problems in the study of network data. Most existing community detection approaches only consider edge information as inputs, and the output could be suboptimal when nodal information is available. In such cases, it is desirable to leverage nodal information for the improvement of community detection accuracy. Towards this goal, we propose a flexible network model incorporating nodal information, and develop likelihood-based inference methods. For the proposed methods, we establish favorable asymptotic properties as well as efficient algorithms for computation. Numerical experiments show the effectiveness of our methods in utilizing nodal information across a variety of simulated and real network data sets."

--- Not sure yet how this improves on the nine-and-sixty other ways of doing this appearing in the recent literature...]]></description>
<dc:subject>to:NB community_discovery network_data_analysis inference_to_latent_objects</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:14a346f001cd/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:inference_to_latent_objects"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.426">
    <title>Consistent Estimation of Number of Communities in Stochastic Block Models using Cross‐Validation - Qin - - Stat - Wiley Online Library</title>
    <dc:date>2021-10-11T19:50:46+00:00</dc:date>
    <link>https://onlinelibrary.wiley.com/doi/abs/10.1002/sta4.426</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Stochastic block model (SBM) and its variants constitute an important family of probabilistic tools for studying network data. There is a rich literature on methods for estimating the block labels and model parameters of stochastic block models. Most of these studies would require the number of communities $K$ as an input, making the estimation of K an important problem. Cross-validation is a natural option for this problem since it is a widely used generic method for evaluating model fitting. However, cross-validation is known to be inconsistent and prone to over-fitting unless impractical split ratios are used. Cross-validation with confidence (CVC) is proposed with better theoretical guarantees in conventional settings. We study the properties of CVC for stochastic block models. Our theoretical studies show that CVC, unlike the standard cross-validation, can consistently pick the optimal K under suitable conditions. We implement this method and check its performance against other established methods on both synthetic and real datasets."

--- Pretty sure I bookmarked the preprint.]]></description>
<dc:subject>to:NB stochastic_block_models community_discovery cross-validation network_data_analysis kith_and_kin lei.jing heard_the_talk</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:23e1965abcdf/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:stochastic_block_models"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:cross-validation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:lei.jing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:heard_the_talk"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2003.04235">
    <title>[2003.04235] Differential Network Analysis: A Statistical Perspective</title>
    <dc:date>2021-09-23T19:09:44+00:00</dc:date>
    <link>https://arxiv.org/abs/2003.04235</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA[Networks effectively capture interactions among components of complex systems, and have thus become a mainstay in many scientific disciplines. Growing evidence, especially from biology, suggest that networks undergo changes over time, and in response to external stimuli. In biology and medicine, these changes have been found to be predictive of complex diseases. They have also been used to gain insight into mechanisms of disease initiation and progression. Primarily motivated by biological applications, this article provides a review of recent statistical machine learning methods for inferring networks and identifying changes in their structures.]]></description>
<dc:subject>to:NB re:network_differences shojaie.ali network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:4406cafc2c4d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:shojaie.ali"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://dl.acm.org/doi/abs/10.1145/3447548.3470821">
    <title>Fairness in Networks | Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining</title>
    <dc:date>2021-09-06T13:23:33+00:00</dc:date>
    <link>https://dl.acm.org/doi/abs/10.1145/3447548.3470821</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["As ML systems have become more broadly adopted in high-stakes settings, our scrutiny of them should reflect their greater impact on real lives. The field of fairness in data mining and machine learning has blossomed in the last decade, but most of the attention has been directed at tabular and image data. In this tutorial, we will discuss recent advances in network fairness. Specifically, we focus on problems where one's position in a network holds predictive value (e.g., in a classification or regression setting) and favorable network position can lead to a cascading loop of positive outcomes, leading to increased inequality. We start by reviewing important sociological notions such as social capital, information access, and influence, as well as the now-standard definitions of fairness in ML settings. We will discuss the formalizations of these concepts in the network fairness setting, presenting recent work in the field, and future directions."]]></description>
<dc:subject>to:NB network_data_analysis social_networks algorithmic_fairness kith_and_kin clauset.aaron to_read</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:5cfc6bc2769a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:social_networks"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:algorithmic_fairness"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clauset.aaron"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://dl.acm.org/doi/abs/10.1145/3447548.3470795">
    <title>Causal Inference from Network Data | Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining</title>
    <dc:date>2021-09-06T13:22:45+00:00</dc:date>
    <link>https://dl.acm.org/doi/abs/10.1145/3447548.3470795</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This tutorial presents state-of-the-art research on causal inference from network data in the presence of interference. We start by motivating research in this area with real-world applications, such as measuring influence in social networks and market experimentation. We discuss the challenges of applying existing causal inference techniques designed for independent and identically distributed (i.i.d.) data to relational data, some of the solutions that currently exist and the gaps and opportunities for future research. We present existing network experiment designs for measuring different possible effects of interest. Then we focus on causal inference from observational data, its representation, identification, and estimation. We conclude with research on causal discovery in networks."]]></description>
<dc:subject>to:NB to_read network_data_analysis causal_inference causal_discovery re:homophily_and_confounding</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:c59868f8ecfb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:homophily_and_confounding"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2107.07489">
    <title>[2107.07489] Clustering of heterogeneous populations of networks</title>
    <dc:date>2021-08-11T18:42:50+00:00</dc:date>
    <link>https://arxiv.org/abs/2107.07489</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Statistical methods for reconstructing networks from repeated measurements typically assume that all measurements are generated from the same underlying network structure. This need not be the case, however. People's social networks might be different on weekdays and weekends, for instance. Brain networks may differ between healthy patients and those with dementia or other conditions. Here we describe a Bayesian analysis framework for such data that allows for the fact that network measurements may be reflective of multiple possible structures. We define a finite mixture model of the measurement process and derive a fast Gibbs sampling procedure that samples exactly from the full posterior distribution of model parameters. The end result is a clustering of the measured networks into groups with similar structure. We demonstrate the method on both real and synthetic network populations."]]></description>
<dc:subject>in_NB clustering network_data_analysis re:network_differences newman.mark kith_and_kin statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:15a8e80b2b7b/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:in_NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:re:network_differences"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:newman.mark"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:kith_and_kin"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2108.01149">
    <title>[2108.01149] Linking the Network Centrality Measures Closeness and Degree</title>
    <dc:date>2021-08-06T15:45:37+00:00</dc:date>
    <link>https://arxiv.org/abs/2108.01149</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["We propose a non-linear relationship between two of the most important measures of centrality in a network: degree and closeness. Based on a shortest-path tree approximation, we give an analytic derivation that shows the inverse of closeness is linearly dependent on the logarithm of degree. We show that our hypothesis works well for a range of networks produced from stochastic network models including the Erdos-Reyni and Barabasi-Albert models. We then test our relation on networks derived from a wide range of real-world data including social networks, communication networks, citation networks, co-author networks, and hyperlink networks. We find our relationship holds true within a few percent in most, but not all, cases. We suggest some ways that this relationship can be used to enhance network analysis."]]></description>
<dc:subject>to:NB network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:f98e7a4a3e59/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2007.02156">
    <title>[2007.02156] On spectral algorithms for community detection in stochastic blockmodel graphs with vertex covariates</title>
    <dc:date>2021-08-06T15:35:20+00:00</dc:date>
    <link>https://arxiv.org/abs/2007.02156</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["In network inference applications, it is often desirable to detect community structure, namely to cluster vertices into groups, or blocks, according to some measure of similarity. Beyond mere adjacency matrices, many real networks also involve vertex covariates that carry key information about underlying block structure in graphs. To assess the effects of such covariates on block recovery, we present a comparative analysis of two model-based spectral algorithms for clustering vertices in stochastic blockmodel graphs with vertex covariates. The first algorithm uses only the adjacency matrix, and directly estimates the block assignments. The second algorithm incorporates both the adjacency matrix and the vertex covariates into the estimation of block assignments, and moreover quantifies the explicit impact of the vertex covariates on the resulting estimate of the block assignments. We employ Chernoff information to analytically compare the algorithms' performance and derive the information-theoretic Chernoff ratio for certain models of interest. Analytic results and simulations suggest that the second algorithm is often preferred: we can often better estimate the induced block assignments by first estimating the effect of vertex covariates. In addition, real data examples also indicate that the second algorithm has the advantages of revealing underlying block structure and taking observed vertex heterogeneity into account in real applications. Our findings emphasize the importance of distinguishing between observed and unobserved factors that can affect block structure in graphs."]]></description>
<dc:subject>to:NB community_discovery network_data_analysis spectral_clustering statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:9371f0ecd0fc/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:community_discovery"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spectral_clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2107.14677">
    <title>[2107.14677] Inference for Dependent Data with Learned Clusters</title>
    <dc:date>2021-08-03T04:44:31+00:00</dc:date>
    <link>https://arxiv.org/abs/2107.14677</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["This paper presents and analyzes an approach to cluster-based inference for dependent data. The primary setting considered here is with spatially indexed data in which the dependence structure of observed random variables is characterized by a known, observed dissimilarity measure over spatial indices. Observations are partitioned into clusters with the use of an unsupervised clustering algorithm applied to the dissimilarity measure. Once the partition into clusters is learned, a cluster-based inference procedure is applied to a statistical hypothesis testing procedure. The procedure proposed in the paper allows the number of clusters to depend on the data, which gives researchers a principled method for choosing an appropriate clustering level. The paper gives conditions under which the proposed procedure asymptotically attains correct size. A simulation study shows that the proposed procedure attains near nominal size in finite samples in a variety of statistical testing problems with dependent data."]]></description>
<dc:subject>to:NB spatial_statistics clustering network_data_analysis to_read statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:a2dcdacdd605/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:spatial_statistics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to_read"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://sociologicalscience.com/articles-v8-14-285/">
    <title>Estimating Homophily in Social Networks Using Dyadic Predictions | Sociological Science</title>
    <dc:date>2021-08-03T04:38:11+00:00</dc:date>
    <link>https://sociologicalscience.com/articles-v8-14-285/</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Predictions of node categories are commonly used to estimate homophily and other relational properties in networks. However, little is known about the validity of using predictions for this task. We show that estimating homophily in a network is a problem of predicting categories of dyads (edges) in the graph. Homophily estimates are unbiased when predictions of dyad categories are unbiased. Node-level prediction models, such as the use of names to classify ethnicity or gender, do not generally produce unbiased predictions of dyad categories and therefore produce biased homophily estimates. Bias comes from three sources: sampling bias, correlation between model errors and node degree, and correlation between node-level model errors along dyads. We examine three methods for estimating homophily: predicting node categories, predicting dyad categories, and a hybrid “ego–alter” approach. This analysis indicates that only the dyadic prediction approach is unbiased, whereas the node-level approach produces both high bias and high overall error. We find that node-level classification performance is not a reliable indicator of accuracy for homophily. Although this article focuses on a particular version of homophily, results generalize to heterophilous cases and other dyadic measures."]]></description>
<dc:subject>to:NB homophily network_data_analysis</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:17e7deffa866/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:homophily"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/2107.04224">
    <title>[2107.04224] Causal Inference for Influence Propagation -- Identifiability of the Independent Cascade Model</title>
    <dc:date>2021-07-12T14:48:36+00:00</dc:date>
    <link>https://arxiv.org/abs/2107.04224</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Independent cascade (IC) model is a widely used influence propagation model for social networks. In this paper, we incorporate the concept and techniques from causal inference to study the identifiability of parameters from observational data in extended IC model with unobserved confounding factors, which models more realistic propagation scenarios but is rarely studied in influence propagation modeling before. We provide the conditions for the identifiability or unidentifiability of parameters for several special structures including the Markovian IC model, semi-Markovian IC model, and IC model with a global unobserved variable. Parameter identifiability is important for other tasks such as influence maximization under the diffusion networks with unobserved confounding factors."

--- The word "homophily" does not appear in the MS.]]></description>
<dc:subject>to:NB information_cascades causal_inference network_data_analysis color_me_skeptical statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:ee4bb288ef49/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:information_cascades"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:causal_inference"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:color_me_skeptical"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://icml.cc/Conferences/2021/ScheduleMultitrack?event=9083">
    <title>Consistent Nonparametric Methods for Network Assisted Covariate Estimation</title>
    <dc:date>2021-07-02T15:44:26+00:00</dc:date>
    <link>https://icml.cc/Conferences/2021/ScheduleMultitrack?event=9083</link>
    <dc:creator>cshalizi</dc:creator><description><![CDATA["Networks with node covariates are commonplace: for example, people in a social network have interests, or product preferences, etc. If we know the covariates for some nodes, can we infer them for the remaining nodes? In this paper we propose a new similarity measure between two nodes based on the patterns of their 2-hop neighborhoods. We show that a simple algorithm (CN-VEC) like nearest neighbor regression with this metric is consistent for a wide range of models when the degree grows faster than n^1/3 up-to logarithmic factors, where n is the number of nodes. For "low-rank" latent variable models, the natural contender will be to estimate the latent variables using SVD and use them for non-parametric regression. While we show consistency of this method under less stringent sparsity conditions, our experimental results suggest that the simple local CN-VEC method either outperforms the global SVD-RBF method, or has comparable performance for low rank models. We also present simulated and real data experiments to show the effectiveness of our algorithms compared to the state of the art."]]></description>
<dc:subject>to:NB network_data_analysis smoothing sarkar.purnamrita statistics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:cshalizi/b:8adffe044a85/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:to:NB"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:network_data_analysis"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:smoothing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:sarkar.purnamrita"/>
	<rdf:li rdf:resource="https://pinboard.in/u:cshalizi/t:statistics"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>