Source code for isweep.outgroups

# imports
import numpy as np
import networkx as nx
from copy import deepcopy

[docs]def make_ibd_graph(ibd_segments): '''Create IBD graph from pairwise segments Parameters ---------- ibd_segments : tuple Collection of pairwise segments Returns ------- networkx.Graph Graph w/ edges if haplotypes have a detectable IBD segment ''' ibdgraph=nx.Graph() edgelist=[] for edge in ibd_segments: edgelist.append((edge[0],edge[1])) ibdgraph.add_edges_from(edgelist) return ibdgraph
[docs]def diameter_communities(graph, K=3, max_communities=np.inf): '''Method to find connected communities with max diameter 2*K Parameters ---------- graph : networkx.Graph K : int Default is max diameter 2*3 max_communities : int Default is to find all communities Returns ------- list Node sets for fully connected subgraphs with max diameter 2*K ''' # initialize community detection loop copygraph = deepcopy(graph) communities = [] stayput=True if max_communities >= np.inf: max_communities = copygraph.number_of_nodes() # initialize degree centrality M = copygraph.number_of_nodes() - 1 degrees = nx.degree_centrality(copygraph) degrees = {key:round(val*M,0) for key, val in degrees.items()} sorted_degrees = sorted([(key, val) for key, val in degrees.items()], key=lambda x:x[1], reverse=True) for i in range(max_communities): # find community center community = dict() while stayput: try: center = sorted_degrees[0][0] if copygraph.has_node(center): stayput=False else: sorted_degrees.pop(0) except: return communities current_neighbors = [center] community[center] = 0 # find up to K-order neighbors k = 1 while k <= K: # up to K-order neighbors new_neighbors = [] for neighbor in current_neighbors: adjacent_neighbors = copygraph.neighbors(neighbor) for adjacent_neighbor in adjacent_neighbors: if adjacent_neighbor not in community: community[adjacent_neighbor] = 0 new_neighbors.append(adjacent_neighbor) copygraph.remove_node(neighbor) current_neighbors = new_neighbors k += 1 for neighbor in current_neighbors: if copygraph.has_node(neighbor): copygraph.remove_node(neighbor) communities.append(set(community.keys())) stayput=True return communities
def community_list(graph, communities): '''Return list of subgraphs Parameters ---------- graph : networkx.Graph communities : list of sets ''' return [graph.subgraph(comm) for comm in communities] def largest_community(communities): ''' Return haploids in largest community Parameters ---------- list Haplotype/sample IDs in the largest community ''' return list(communities[0]) def outlier_communities(communities, scalar=5): '''Combine outlier (or single largest) excess IBD cluster Parameters ---------- communities : list scalar : float Multiplier of numpy.std() Returns ------- list IDs for haploids in outlier communities ''' communities2 = deepcopy(communities) communities2 = sorted(communities2, key=len, reverse=True) community_sizes = [len(community) for community in communities] community_sizes = np.array(community_sizes) cutoff = community_sizes.mean() + community_sizes.std() * scalar oindices = (community_sizes >= cutoff).nonzero() outliers = set() for i in oindices[0]: community = communities[i] outliers = community.union(outliers) outliers = list(outliers) if len(outliers) == 0: return list(communities2[0]) else: return outliers