Source code for isweep.coalescent

# imports
import numpy as np
import pandas as pd
import networkx as nx
from scipy.stats import binom
from random import randint, sample
from math import floor, exp, log, ceil, sqrt
from numpy.random import exponential as Exp
from copy import deepcopy
from .utilities import *

H1 = nx.Graph()
H0 = nx.Graph()
H = nx.Graph()
ldist = []
ldist1 = []
ldist0 = []
tdist = []
tdist1 = []
tdist0 = []
ddist1 = []
ddist0 = []
ddist = []
pairwise_segments = []

### random walks ###

[docs]def walk_variant_backward(s, p0, Ne, random_walk = False, one_step_model = 'a', tau0 = 0, sv=-0.01, ploidy = 2): '''Variant frequencies backward in time Parameters ---------- s : float Selection coefficient p0 : float Variant frequency at generation 0 Ne : dict Effective population sizes random_walk : bool True for random walk one_step_model : str 'm', 'a', 'd', or 'r' tau0 : int Generation when neutrality begins sv: float Allele frequency of standing variation (Default -0.01 will assume de novo sweep) ploidy : int 1 for haploid or 2 for diploid Returns ------- tuple NumPy arrays for frequencies and sizes ''' # local functions assert ploidy in [1,2] assert one_step_model in ['m','a','r','d'] assert p0 <= 1 assert p0 >= 0 assert sv < 1 def haploid_bwd(p, s): # haploid is same as multiplicative (Felsenstein, 2017) return p / (1 + s - s * p) def multiplicative_bwd(p, s): num = 1 dnm = 1 + (1 - p) * s return p * num / dnm def dominant_bwd(p, s): if s <= 0: return p a = p * s b = 1 + s - 2 * p * s c = - p qf = - b + sqrt((b ** 2) - 4 * a * c) qf = qf / 2 / a return qf def additive_bwd(p, s): if s <= 0: return p a = s b = 1 + s - 2 * p * s c = - p qf = - b + sqrt((b ** 2) - 4 * a * c) qf = qf / 2 / a return qf def recessive_bwd(p, s): if s <= 0: return p a = (1 - p) * s b = 1 c = - p qf = - b + sqrt((b ** 2) - 4 * a * c) qf = qf / 2 / a return qf # one step calculation if ploidy == 1: one_step = haploid_bwd else: if one_step_model == 'a': one_step = additive_bwd elif one_step_model == 'r': one_step = recessive_bwd elif one_step_model == 'd': one_step = dominant_bwd else: one_step = multiplicative_bwd # initialize ps = [] # frequencies xs = [] # variants Ns = [] # sizes t = floor(tau0) p = p0 N = Ne[0] x = floor(p * ploidy * N) Ns.append(N) xs.append(x) ps.append(p) if random_walk: # random walk for G in range(1, max(Ne.keys())+1): try: # population size change N = Ne[G] except KeyError: pass if G > t: p = one_step(p, s) x = int(binom.rvs(int(ploidy * N), p)) p = x / ploidy / N if x < 1: break if p >= 1: break if p <= sv: s = 0 ps.append(p) xs.append(x) Ns.append(N) return np.array([ps, Ns, xs], dtype=float) # numpy-ify else: # deterministic for G in range(1, max(Ne.keys())+1): try: # population size change N = Ne[G] except KeyError: pass if G > t: p = one_step(p, s) x = floor(p * ploidy * N) if x < 1: break if p >= 1: break if p <= sv: s = 0 Ns.append(N) xs.append(x) ps.append(p) return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify
[docs]def walk_variant_forward(s, pG, Ne, random_walk = False, one_step_model = 'a', tau0 = 0, ploidy = 2): '''Variant frequencies forward in time Parameters ---------- s : float Selection coefficient pG : float Variant frequency at maximum generation Ne : dict Effective population sizes random_walk : bool True for random walk one_step_model : str 'm', 'a', 'd', or 'r' tau0 : int Generation when neutrality begins ploidy : int 1 for haploid or 2 for diploid Returns ------- tuple NumPy arrays for frequencies and sizes ''' # local functions assert ploidy in [1,2] def haploid_fwd(p, s): # haploid is same as multiplicative return p * (1 + s) / (1 + p * s) def additive_fwd(p, s): num = 1 + s + p * s dnm = 1 + 2 * p * s return p * num / dnm def dominant_fwd(p, s): num = 1 + s dnm = 1 + 2 * p * s * (1 - p) + p * p * s return p * num / dnm def multiplicative_fwd(p, s): num = 1 + s dnm = 1 + p * s return p * num / dnm def recessive_fwd(p, s): num = 1 + p * s dnm = 1 + p * p * s return p * num / dnm # one step calculation if ploidy == 1: one_step = haploid_fwd else: if one_step_model == 'a': one_step = additive_fwd elif one_step_model == 'r': one_step = recessive_fwd elif one_step_model == 'd': one_step = dominant_fwd else: one_step = multiplicative_fwd # initialize ps = [] # frequencies xs = [] # variants Ns = [] # sizes t = floor(tau0) p = pG G = max(Ne.keys()) N = Ne[G] x = ceil(p * ploidy * N) Ns.append(N) xs.append(x) ps.append(p) if random_walk: # random walk while G >= 0: G -= 1 try: N = Ne[G] except KeyError: pass if G > t: p = one_step(p, s) x = int(binom.rvs(int(ploidy * N), p)) p = x / ploidy / N if x < 1: break if p >= 1: break ps.append(p) xs.append(x) Ns.append(N) return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify else: # deterministic while G >= 0: G -= 1 try: N = Ne[G] except KeyError: pass if G > t: p = one_step(p, s) x = floor(p * ploidy * N) if x < 1: break if p >= 1: break Ns.append(N) xs.append(x) ps.append(p) return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify
### simple coalescent simulators ###
[docs]def basic_coalescent(n): '''Simulate times in basic coalescent (scale post-hoc by population size) Parameters ---------- n : int Sample size Returns ------- numpy.array Interarrival times ''' n = int(float(n)) k = n t = np.zeros(n - 1) itr = 0 curr = 0 while k > 1: bcoef = 2 / k / (k - 1) k -= 1 draw = Exp(bcoef) curr += draw t[itr] = curr itr += 1 return t
[docs]def varying_Ne_coalescent(n, Ne, ploidy = 2, to_tmrca=True): '''Simulate times in varying population size coalescent Parameters ---------- n : int Sample size Ne : dict Effective population sizes ploidy : int to_tmrca : bool Go to TMRCA Returns ------- numpy.array Arrival times in generations ''' # initialize Ne = zero_shift_Ne(Ne) maxG = max(Ne.keys()) N = Ne[maxG] Nt = np.array(list(Ne.values())) lambdas = N / Nt # relative (inverse) coalescent rates loglambdas = np.log(lambdas) Lambdas = np.cumsum(lambdas) # piecewise integral Lambdas = np.insert(Lambdas, 0, 0) times = np.log(basic_coalescent(n)) + log(ploidy) + log(N) # convert to generations times = np.exp(times) times = np.insert(times, 0, 0) dtime = np.diff(times) K = len(dtime) Taus = np.zeros(K) k = 0 itr = 0 LambdaNext = 0 vCurr = 0 # while k <= (K-1) means until most recent common ancestor (MRCA) # itr < maxG controls updates with piecewise Ne while k <= (K-1) and itr < maxG: vPrev = vCurr LambdaPrev = LambdaCurr = LambdaNext RHS = dtime[k] + LambdaPrev LambdaNext = RHS steps = 0 try: while LambdaCurr < RHS: itr += 1 steps += 1 LambdaCurr = Lambdas[itr] itr -= 1 boole = int(steps>1) LambdaCurr = Lambdas[itr]*boole + LambdaPrev*(1-boole) if boole: vCurr += exp(log(LambdaCurr-LambdaPrev) - loglambdas[itr-1]) RHS -= LambdaCurr logscalar = loglambdas[itr-1] logsoluti = log(RHS) - logscalar vCurr += exp(logsoluti) Taus[k] = (vCurr - vPrev) k += 1 except IndexError: boole = int(steps>1) LambdaCurr = Lambdas[-1]*boole + LambdaPrev*(1-boole) RHS -= LambdaCurr logscalar = loglambdas[-1] while steps > 1: vCurr += 1 steps -= 1 logsoluti = log(RHS) - logscalar vCurr += exp(logsoluti) Taus[k] = (vCurr - vPrev) k += 1 logscalar = loglambdas[-1] if to_tmrca: while k <= (K-1): # finish Taus[k] = exp(log(dtime[k]) - logscalar) k += 1 Etas = np.cumsum(Taus) Etas = Etas[Etas > 0] # return np.floor(Etas)+1 # round to discrete time return Etas # continuous time
def binomial_step(k, Nei, ploidy=2): '''Take a Binomial(n,p) step for 1 gen of WF process Parameters ---------- k : int Sample size Nei : float Effective size at time i ploidy : int Return ------ int Count from binomial experiments ''' denom = Nei * ploidy numer = int(k * (k - 1) / 2) bstep = binom.rvs(numer, 1/denom, size=1)[0] return bstep def binomial_pmf_leq1(k, Nei, ploidy=2): '''Compute mass P(\leq 1) for Binomial(n,p) for 1 gen of WF process Parameters ---------- k : int Sample size Nei : float Effective size at time i ploidy : int Return ------ float A probability mass ''' numer = int(k * (k - 1) / 2) denom = 1 / (Nei * ploidy) return binom.pmf(0, numer, denom) + binom.pmf(1, numer, denom) def wright_fisher(n, Ne, ploidy = 2, to_tmrca=True): '''Simulate times in Wright Fisher model with varying population size (Assumes that all coalescences are pairwise, i.e., no multiple mergers ) (After last generation in Ne, assume constant size and apply scaled basic_coalescent) Parameters ---------- n : int Sample size Ne : dict Effective population sizes ploidy : int to_tmrca : bool Continue to TMRCA Returns ------- numpy.array Arrival times in generations ''' # initalize n = int(float(n)) k = n itr = 0 gentimes = np.zeros(n-1) Me = deepcopy(Ne) timer = min(Me.keys()) lastG = max(Me.keys()) lastN = Me[lastG] cuml = 0 while k > 1 and timer <= lastG: # record coalescent events size = Me[timer] * ploidy draw = [randint(0, size - 1) for i in range(k)] table = {} for i in range(len(draw)): try: table[draw[i]] += 1 except KeyError: table[draw[i]] = 1 events = [val for key, val in table.items() if val >= 2] l = sum(events) - len(events) cuml += l timer += 1 gentimes[itr:cuml] = timer k -= l itr += l if k > 1: # finish with coalescent if to_tmrca: finish = basic_coalescent(k) * lastN + lastG finish = np.floor(finish) + 1 # round to discrete time gentimes[cuml:] = finish return gentimes[gentimes > 0] def simulate_coalgen(n, Ne, ploidy, binom_cut=0.01, to_tmrca=True, wf_approx=False): '''Simulate times in varying size population with large sample. Use binomial walk, Wright Fisher process, and the coalescent. Parameters ---------- n : int Sample size Ne : dict Effective population sizes ploidy : int binom_cut : float Threshold to conduct binomial update [P(X <= 1) <= \alpha] to_tmrca : bool Go to TMRCA wf_approx : bool Use Binomial approximation for early WF process (default False) Returns ------- numpy.array Arrival times in generations ''' assert n > 1 n = int(float(n)) # use the Binomial approximation for WF process # at the early steps of process if wf_approx: assert binom_cut > 0 assert binom_cut < 1 j = n Taus = np.zeros(n-1) itr = min(Ne.keys()) + 1 k = 0 try: # while loop ends, i.e. condition to switch from binomial walk achieved # binomial walk process while binomial_pmf_leq1(j, Ne[itr], ploidy) <= binom_cut: # i is binomial count # j is nodes in wf process # k is index iterating up i = binomial_step(j, Ne[itr], ploidy) j -= i if j > 1: itr += 1 Taus[k:(k+i)] = itr k += i else: itr += 1 Taus[k:] = itr Taus = Taus[Taus>0] # wright fisher / coalescent process # wright fisher runs a basic coalescent at end of Ne dictionary sNe = {k:v for k,v in Ne.items() if k >= itr} Etas = wright_fisher(j, sNe, ploidy, to_tmrca) Taus = np.concatenate((Taus,Etas)) except KeyError: # while loop does not end, i.e. reach end of defined Ne Taus = Taus[Taus>0] # do not use the Binomial approximation else: Taus = wright_fisher(n, Ne, ploidy, to_tmrca) return Taus ### coalescent network simulator ### import numpy as np import pandas as pd import networkx as nx from scipy.stats import binom from random import randint, sample from math import floor, exp, log, ceil from numpy.random import exponential as Exp ### coalescent network simulator ### class DirectedPaths: def __init__(self, _id, _pairwise_output, _left_length, _right_length): self._left_length = _left_length self._right_length = _right_length self._num_leaves = 1 self._leading_leaf = _id self._pairwise_output = _pairwise_output if _pairwise_output: self._leaves = [_id] return None def add_edge(self, _left_length, _right_length): self._left_length = min(self._left_length, _left_length) self._right_length = min(self._right_length, _right_length) return None class Node: def __init__(self, _id, _time, _pairwise_output, _left_length, _right_length): self._id = _id self._time = _time if _time <= 0: self._paths_list = [DirectedPaths(_id, _pairwise_output, _left_length, _right_length)] self._num_leaves = 1 else: self._paths_list = [] self._num_leaves = 0 self._num_tracts = 0 return None def __repr__(self): return 'Node(_id = ' + str(self._id) + ')' def __str__(self): return 'Node(_id = ' + str(self._id) + ')' def add_incoming_node(self, node): self._num_leaves += node._num_leaves self._num_tracts += node._num_tracts return None def add_incoming_edge(self, scale): if scale != 0: scale = 1 / scale _left_length = Exp(scale=scale, size=1)[0] _right_length = Exp(scale=scale, size=1)[0] for paths in self._paths_list: paths.add_edge(_left_length, _right_length) else: pass return None def merge(self): paths_list = self._paths_list merged = set() # merge set doskip = set() # skip set k = len(paths_list) for i in range(k): if i not in doskip: merged.add(i) for j in range(i+1,k): if j not in doskip: paths1 = paths_list[i] paths2 = paths_list[j] if paths1._left_length == paths2._left_length: if paths1._right_length == paths2._right_length: m1 = paths1._num_leaves m2 = paths2._num_leaves m = m1 + m2 paths_list[i]._num_leaves = m paths_list[i]._leading_leaf = min(paths1._leading_leaf, paths2._leading_leaf) if paths_list[i]._pairwise_output: paths_list[i]._leaves += paths2._leaves doskip.add(j) # merging self._paths_list = [paths_list[i] for i in merged] return None def prune_paths(self, cutoff): self._paths_list = [paths for paths in self._paths_list if paths._left_length + paths._right_length >= cutoff] return None def pair_coalesce(self, node1, node2, cutoff1, cutoff2, subgraph_id=2, record_dist=True, pairwise_output=True): deltas = [self._time - node1._time, self._time - node2._time] rates = [delta / 100 for delta in deltas] node1.add_incoming_edge(rates[0]) node1.prune_paths(cutoff2) node1.merge() node2.add_incoming_edge(rates[1]) node2.prune_paths(cutoff2) node2.merge() _num_tracts, n1, n2 = pairwise_compare(node1, node2, cutoff1, cutoff2, self._time, subgraph_id, record_dist, pairwise_output) node1 = n1 node2 = n2 self._num_tracts += _num_tracts self.add_incoming_node(node1) for paths in node1._paths_list: self._paths_list.append(paths) self.add_incoming_node(node2) for paths in node2._paths_list: self._paths_list.append(paths) return None def pairwise_compare(node1, node2, cutoff1, cutoff2, _time, subgraph_id=2, record_dist=True, pairwise_output=True): '''Compare paths up coalescent tree based in IBD Parameters ---------- cutoff1 : float Long IBD cM threshold cutoff2 : float Long IBD cM threshold _time : int Generation time Returns ------- tuple (int # of long IBD segments, Node class instance, Node class instance) ''' # initializing global H1, H0, H, ldist1, ldist0, ldist, tdist1, tdist0, tdist, ddist1, ddist0, ddist global pairwise_segments _num_tracts = 0 K = len(node1._paths_list) L = len(node2._paths_list) for k in range(K): i = node1._paths_list[k] for l in range(L): j = node2._paths_list[l] _left_length = min(i._left_length, j._left_length) _right_length = min(i._right_length, j._right_length) _length = _right_length + _left_length # networking if _length >= cutoff2: if subgraph_id == 1: H1.add_edge(i._leading_leaf, j._leading_leaf) elif subgraph_id == 0: H0.add_edge(i._leading_leaf, j._leading_leaf) else: H.add_edge(i._leading_leaf, j._leading_leaf) if i._leading_leaf <= j._leading_leaf: node2._paths_list[l]._leading_leaf = node1._paths_list[k]._leading_leaf else: node1._paths_list[k]._leading_leaf = node2._paths_list[l]._leading_leaf # segment counting if _length >= cutoff1: m1 = i._num_leaves m2 = j._num_leaves density = m1 * m2 _num_tracts += density if record_dist: if subgraph_id == 1: ldist1.append(_length) tdist1.append(_time) ddist1.append(density) elif subgraph_id == 0: ldist0.append(_length) tdist0.append(_time) ddist0.append(density) else: ldist.append(_length) tdist.append(_time) ddist.append(density) if i._pairwise_output: for a in i._leaves: for b in j._leaves: pairwise_segments.append((a,b,_length,_time)) return _num_tracts, node1, node2 def two_randint(n): x = randint(0,n) y = randint(0,n) while x == y: y = randint(0,n) return sorted([x,y])
[docs]def simulate_ibd(n, Ne, long_ibd=2.0, short_ibd=1.0, ploidy=2, record_dist=True, pairwise_output=True, left_length=np.inf, right_length=np.inf, wf_approx=False ): '''ibd segments from a coalescent Parameters ---------- n : int Sample size (individuals) Ne : dict Effective population sizes long_ibd: float cM length threshold short_ibd : float cM length threshold ploidy : int 1 for haploid or 2 for diploid record_dist : bool To save tract length and coalescent time distributions or not to (default True) pairwise_output : bool To save pairwise segments or not to (default True) left_length : float Distance to the left chromosome end (default numpy.inf) right_length : float Distance to the right chromosome end (default numpy.inf) wf_approx : bool Use Binomial approximation for early WF process (default False) Returns ------- tuple (number of tracts, group sizes, length distr., time distr., count distr., pairwise segments) ''' # checks assert long_ibd > 0 assert short_ibd > 0 assert long_ibd >= short_ibd n = int(float(n)) ploidy = int(float(ploidy)) # initialize graph network global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist global pairwise_segments pairwise_segments = [] H = nx.Graph() ldist = [] ddist = [] tdist = [] # initalize m = ploidy*n interiors = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(m)] itr = m # simulate times Ne=cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000) times = simulate_coalgen(m, Ne, ploidy, to_tmrca=True, wf_approx=False) indxs = [i for i in range(m)] for t in times: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist) interiors.append(new_node) interiors.pop(s1) interiors.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts = sum([node._num_tracts for node in interiors]) ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True) # tract count, group sizes, distributions: lengths, times, density return (numTracts, np.array(ibdGroupSizes), np.array(ldist), np.array(tdist), np.array(ddist), tuple(pairwise_segments) )
[docs]def simulate_ibd_isweep(n, s, p0, Ne, long_ibd=2.0, short_ibd=1.0, random_walk=True, one_step_model='a', tau0=0, sv=-0.01, ploidy=2, record_dist=True, pairwise_output=True, left_length=np.inf, right_length=np.inf, wf_approx=False ): '''ibd segments from a coalescent with selection Parameters ---------- n : int Sample size (individuals) s : float Selection coefficient p0 : float Variant frequency at generation 0 Ne : dict Effective population sizes long_ibd, short_ibd : float cM length threshold random_walk : bool True for random walk one_step_model : str 'm', 'a', 'd', or 'r' tau0 : int Generation when neutrality begins sv: float Allele frequency of standing variation (Default -0.01 will assume de novo sweep) ploidy : int 1 for haploid or 2 for diploid record_dist : bool To save tract length and coalescent time distributions or not to (default True) pairwise_output : bool To save pairwise segments or not to (default True) left_length : float Distance to the left chromosome end (default numpy.inf) right_length : float Distance to the right chromosome end (default numpy.inf) wf_approx : bool Use Binomial approximation for early WF process (default False) Returns ------- tuple(s) (all, adaptive allele, non-adaptive allele) then pairwise segments Each tuple is (number of tracts, group sizes, length distr., time distr., count distr.) ''' assert ploidy in [1,2] assert long_ibd > 0 assert short_ibd > 0 assert long_ibd >= short_ibd assert one_step_model in ['m','a','r','d'] assert p0 <= 1 assert p0 >= 0 assert sv < 1 global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist global pairwise_segments pairwise_segments = [] H1 = nx.Graph() H0 = nx.Graph() ldist1 = [] ldist0 = [] tdist1 = [] tdist0 = [] ddist1 = [] ddist0 = [] # renaming p = p0 t = tau0 mdl = one_step_model stoc = random_walk Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000) # should p0 be fixed if (p0 == 0) or (p0 == 1): out = simulate_ibd(n, Ne, long_ibd, short_ibd, ploidy, record_dist, pairwise_output, left_length, right_length, wf_approx ) # numpy-ify return (out, (np.nan,np.array([]),np.array([]),np.array([]),np.array([])), (np.nan,np.array([]),np.array([]),np.array([]),np.array([])), tuple(pairwise_segments) ) # calculating structured demographies ps, Ns, xs = walk_variant_backward(s, p, Ne, stoc, mdl, tau0, sv, ploidy) n = int(float(n)) n = n * ploidy G = max(Ne.keys()) n1 = floor(n * ps[0]) n0 = n - n1 N1 = [ceil(i) for i in Ns * ps] N1 = {i:N1[i] for i in range(len(N1))} qs = extend_vector(1 - ps, 1, G) Ns = np.array(list(Ne.values())) N0 = [ceil(i) for i in Ns * qs] N0 = {i:N0[i] for i in range(len(N0))} # arrival times times1 = simulate_coalgen(n1, N1, ploidy, to_tmrca=False, wf_approx=False) interiors1 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1)] interiors0 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1,n1+n0)] # pairwise comparisons pop 1 m = n1 indxs = [i for i in range(m)] itr = n1 + n0 for t in times1: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors1[s1], interiors1[s2], long_ibd, short_ibd, 1, record_dist, pairwise_output) interiors1.append(new_node) interiors1.pop(s1) interiors1.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts1 = sum([node._num_tracts for node in interiors1]) ibdGroupSizes1 = sorted([len(c) for c in nx.connected_components(H1)],reverse=True) # pairwise comparisons p0 maxt1=max([node._time for node in interiors1]) N00 = {key:val for key,val in N0.items() if key <= maxt1} N01 = {key:val for key,val in N0.items() if key > maxt1} times0 = simulate_coalgen(n0, N00, ploidy, to_tmrca=False, wf_approx=False) m = n0 indxs = [i for i in range(m)] for t in times0: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors0[s1], interiors0[s2], long_ibd, short_ibd, 0, record_dist, pairwise_output) interiors0.append(new_node) interiors0.pop(s1) interiors0.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts0 = sum([node._num_tracts for node in interiors0]) ibdGroupSizes0 = sorted([len(c) for c in nx.connected_components(H0)],reverse=True) # arrival times, pairwise comparisons to TMRCA H = nx.compose(H1,H0) ldist = [*ldist1, *ldist0] tdist = [*tdist1, *tdist0] ddist = [*ddist1, *ddist0] interiors = interiors1 + interiors0 m = len(interiors) indxs = [i for i in range(m)] if len(N01.keys()) > 0: times2 = simulate_coalgen(m, N01, ploidy, to_tmrca=True, wf_approx=False) for t in times2: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist, pairwise_output) interiors.append(new_node) interiors.pop(s1) interiors.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts = sum([node._num_tracts for node in interiors]) ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True) # numpy-ify return ((numTracts,np.array(ibdGroupSizes),np.array(ldist),np.array(tdist),np.array(ddist)), (numTracts1,np.array(ibdGroupSizes1),np.array(ldist1),np.array(tdist1),np.array(ddist1)), (numTracts0,np.array(ibdGroupSizes0),np.array(ldist0),np.array(tdist0),np.array(ddist0)), tuple(pairwise_segments) )
### segments ### # quasi geometric random variables def probability_quasi_geometric(ps, Ns, ploidy = 2): """Probability masses for quasi-geometric coalescent Parameters ---------- ps : numpy.array Variant frequencies (back in time) Ns : numpy.array Effective population sizes ploidy : int 1 for haploid or 2 for diploid Returns ------- numpy.array P(G = g) for generation g; zero element is P(G > g) for some large g """ assert ploidy in [1,2] ones = np.array([1 for i in range(len(ps))]) marg_mass = 1 / (ploidy * ps[:-1] * Ns[:-1]) marg_mass = np.insert(marg_mass, 0, 0) marg_mass = np.minimum(ones, marg_mass) surv_mass = ones - marg_mass surv_mass = np.cumprod(surv_mass) marg_mass = 1 / (ploidy * ps * Ns) marg_mass = np.minimum(ones, marg_mass) full_mass = surv_mass * marg_mass full_mass = np.insert(full_mass, 0, np.maximum(1 - np.sum(full_mass), 0)) return full_mass def simulate_quasi_geometric(n, mass): """Simulator for quasi-geometric coalescent Parameters ---------- n : int Simulation size mass : numpy.array Probability masses Returns ------- numpy.array Coalescent generation times; np.inf for P(G > g) for some large g """ n = int(float(n)) time = np.array([np.inf] + [i for i in range(1, len(mass))]) coal = np.random.choice(time, size = n, p = mass) return coal # erlang random variables def simulate_erlang_segments(geom): """Simulator for Erlang (shape=2) ibd segment lengths Parameters ---------- geom : numpy.array Coalescent generation times (quasi-geometric) Returns ------- numpy.array Independent ibd segment lengths """ geom = geom[geom != np.inf] m = len(geom) left = np.random.exponential(scale = 50 / geom, size = m) right = np.random.exponential(scale = 50 / geom, size = m) return left + right def probability_erlang_segments(ab, mass): '''Interval probabilities for Erlang (shape=2) ibd lengths Parameters ---------- ab : numpy.array Increasing floats in centiMorgans mass : numpy.array P(G = g) for generation g Returns ------- numpy.array P(\ell \in [u,v)) for many [u,v) intervals ''' # local function def puv(u, v, g): # error handling if u < 0: raise ValueError("Left endpoint less than zero") if u >= v: raise ValueError("Left endpoint exceeds right") # integral values U = np.exp(- g / 50 * u) * (u * g + 50) if np.isinf(v): V = 0 else: V = np.exp(- g / 50 * v) * (v * g + 50) return (U - V) / 50 # initialize mass = mass[1:] geom = np.array([i for i in range(1, len(mass) + 1)]) M = len(ab) # double integral puvs = [] for m in range(1, M): pab = puv(ab[m-1], ab[m], geom) puvs.append(sum(mass * pab)) return np.array(puvs) # probability of ibd
[docs]def probability_ibd(ps, Ns, long_ibd = 2, ploidy = 2): """Approximate probability of ibd Parameters ---------- ps : numpy.array Variant frequencies (back in time) Ns : numpy.array Effective population sizes long_ibd : float cM length threshold ploidy : int 1 for haploid or 2 for diploid Returns ------- float approx P(\ell > c) where \ell is ibd length """ assert ploidy in [1,2] # initialize p0 = ps[0] # cumulative products with np.errstate(divide = 'ignore'): dnm1 = 1 / (ploidy * ps[:-1] * Ns[:-1]) dnm1 = np.insert(dnm1, 0, 0) one = np.array([1 for i in range(len(dnm1))]) sub1 = np.minimum(one, dnm1) onesub1 = one - sub1 cp1 = np.cumprod(onesub1) # products with np.errstate(divide = 'ignore'): dnm1 = 1 / (ploidy * ps * Ns) dnm1 = np.minimum(one, dnm1) # partial sum pgs = (p0 ** 2) * cp1 * dnm1 gstar = len(pgs) masses = [] for g in range(1, gstar + 1): masses.append((long_ibd * g / 50 + 1) * exp(- g * long_ibd / 50)) masses = np.array(masses) partialp = sum(pgs * masses) # approximate coalescent if dnm1[-1] >= 0: # check this FRp = - np.inf else: hp = (p0 ** 2) * cp1[-1] * (1 - dnm1[-1]) dp = 1 / ploidy / Ns[-1] / ps[-1] Cp = log(1 - dp) if not np.isinf(dp) else 0 CCp = Cp - long_ibd / 50 FRp = log(-(CCp + long_ibd / 50 * ((gstar + 1) * CCp - 1))) FRp = FRp + CCp * (gstar + 1) - log(CCp ** 2) FRp = FRp + log(hp) + log(dp) - Cp * (gstar + 1) return partialp + exp(FRp)
[docs]def probability_ibd_isweep(s, p0, Ne, long_ibd = 2, one_step_model = 'a', tau0 = 0, sv=-0.01, ploidy = 2): '''Approximate probability of ibd given a sweep model Parameters ---------- s : float Selection coefficient p0 : float Variant frequency at generation 0 Ne : dict Effective population sizes long_ibd : float cM length threshold one_step_model : str 'm', 'a', 'd', or 'r' tau0 : int Generation when neutrality begins sv: float Allele frequency of standing variation (Default -0.01 will assume de novo sweep) ploidy : int 1 for haploid or 2 for diploid Returns ------- float approx P(\ell > c) where \ell is ibd length ''' assert ploidy in [1,2] assert p0 >= 0 assert p0 <= 1 assert sv < 1 if p0==1 or p0==0: Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),1000) b = np.array(list(Ne.values())) a = extend_vector(np.array([1]),1,max(Ne.keys())) d = probability_ibd(a, b, long_ibd, ploidy) f = 0 else: Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),1000) a, b, c = walk_variant_backward(s, p0, Ne, False, one_step_model, tau0, sv, ploidy) a = extend_vector(a, a[-1], max(Ne.keys())) b = extend_vector(b, b[-1], max(Ne.keys())) d = probability_ibd(a, b, long_ibd, ploidy) f = probability_ibd(1-a, b, long_ibd, ploidy) return d + f
### for asymptotic tests ###
[docs]def simulate_ibd_constant(n, Ne, long_ibd=2.0, short_ibd=1.0, ploidy=2, record_dist=True, pairwise_output=True, left_length=np.inf, right_length=np.inf ): '''ibd segments from a coalescent w/ constant Ne Parameters ---------- n : int Sample size (individuals) Ne : int Constant effective population size long_ibd, short_ibd : float cM length threshold ploidy : int 1 for haploid or 2 for diploid record_dist : bool To save tract length and coalescent time distributions or not to (default True) pairwise_output : bool To save pairwise segments or not to (default True) left_length : float Distance to the left chromosome end (default numpy.inf) right_length : float Distance to the right chromosome end (default numpy.inf) Returns ------- tuple (number of tracts, group sizes, length distr., time distr., count distr., pairwise segments) ''' # checks assert long_ibd > 0 assert short_ibd > 0 assert long_ibd >= short_ibd n = int(float(n)) ploidy = int(float(ploidy)) # initialize graph network global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist global pairwise_segments pairwise_segments = [] H = nx.Graph() ldist = [] ddist = [] tdist = [] # initalize m = ploidy*n interiors = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(m)] itr = m # this is the simple change # do basic coalescent instead of wright-fisher times = basic_coalescent(m) * float(int(Ne)*ploidy) indxs = [i for i in range(m)] for t in times: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist) interiors.append(new_node) interiors.pop(s1) interiors.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts = sum([node._num_tracts for node in interiors]) ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True) # tract count, group sizes, distributions: lengths, times, density return (numTracts, np.array(ibdGroupSizes), np.array(ldist), np.array(tdist), np.array(ddist), tuple(pairwise_segments) )
### time-varying selection ### def walk_variant_forward_tv(s_s, g_s, pG, Ne, random_walk = False, ploidy = 2): '''Variant frequencies forward in time Parameters ---------- s_s : list List of selection coefficients g_s : list List of transition times for selection coefficients This should be aligned with the s_s parameter e.g., s_s=[0.01,0.03,0.02] and g_s=[50,100] means s=0.02 between 100-, s=0.03 between 100-50 and s=0.01 between 50-0 Should be length one less than s_s pG : float Variant frequency at maximum generation Ne : dict Effective population sizes random_walk : bool True for random walk ploidy : int 1 for haploid or 2 for diploid Additive genic selection is assumed Returns ------- tuple NumPy arrays for frequencies and sizes ''' # local functions assert ploidy in [1,2] def haploid_fwd(p, s): # haploid is same as multiplicative return p * (1 + s) / (1 + p * s) def additive_fwd(p, s): num = 1 + s + p * s dnm = 1 + 2 * p * s return p * num / dnm # one step calculation if ploidy == 1: one_step = haploid_fwd else: one_step = additive_fwd # initialize ps = [] # frequencies xs = [] # variants Ns = [] # sizes p = pG G = max(Ne.keys()) assert len(s_s) == (len(g_s) + 1) s_s2 = deepcopy(s_s) g_s2 = deepcopy(g_s) s2 = s_s2[-1] s_s2 = s_s2[:-1] sdict = {g_s2[i]:s_s2[i] for i in range(len(g_s))} N = Ne[G] x = ceil(p * ploidy * N) Ns.append(N) xs.append(x) ps.append(p) if random_walk: # random walk while G >= 0: G -= 1 try: N = Ne[G] except KeyError: pass try: s2 = sdict[G] except KeyError: pass p = one_step(p, s2) x = int(binom.rvs(int(ploidy * N), p)) p = x / ploidy / N if x < 1: break if p >= 1: break ps.append(p) xs.append(x) Ns.append(N) return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify else: # deterministic while G >= 0: G -= 1 try: N = Ne[G] except KeyError: pass try: s2 = sdict[G] except KeyError: pass p = one_step(p, s2) x = floor(p * ploidy * N) if x < 1: break if p >= 1: break Ns.append(N) xs.append(x) ps.append(p) return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify
[docs]def walk_variant_backward_tv(s_s, g_s, p0, Ne, random_walk = False, ploidy = 2): '''Variant frequencies backward in time Parameters ---------- s_s : list List of selection coefficients g_s : list List of transition times for selection coefficients This should be aligned with the s_s parameter e.g., s_s=[0.01,0.03,0.02] and g_s=[50,100] means s=0.01 between 0-50, s=0.03 between 50-100 and s=0.02 between 100- Should be length one less than s_s p0 : float Variant frequency at generation 0 Ne : dict Effective population sizes random_walk : bool True for random walk ploidy : int 1 for haploid or 2 for diploid Returns ------- tuple NumPy arrays for frequencies and sizes ''' # local functions assert ploidy in [1,2] assert p0 <= 1 assert p0 >= 0 def haploid_bwd(p, s): # haploid is same as multiplicative (Felsenstein, 2017) return p / (1 + s - s * p) def additive_bwd(p, s): if s <= 0: return p a = s b = 1 + s - 2 * p * s c = - p qf = - b + sqrt((b ** 2) - 4 * a * c) qf = qf / 2 / a return qf # one step calculation if ploidy == 1: one_step = haploid_bwd else: one_step = additive_bwd # initialize ps = [] # frequencies xs = [] # variants Ns = [] # sizes p = p0 N = Ne[0] assert len(s_s) == (len(g_s)+1) s_s2 = deepcopy(s_s) g_s2 = deepcopy(g_s) s2 = deepcopy(s_s2[0]) s_s2 = s_s2[1:] sdict = {g_s2[i]:s_s2[i] for i in range(len(g_s))} x = floor(p * ploidy * N) Ns.append(N) xs.append(x) ps.append(p) if random_walk: # random walk for G in range(1, max(Ne.keys())+1): try: # population size change N = Ne[G] except KeyError: pass try: s2 = sdict[G] except KeyError: pass p = one_step(p, s2) x = int(binom.rvs(int(ploidy * N), p)) p = x / ploidy / N if x < 1: break if p >= 1: break ps.append(p) xs.append(x) Ns.append(N) return np.array([ps, Ns, xs], dtype=float) # numpy-ify else: # deterministic for G in range(1, max(Ne.keys())+1): try: # population size change N = Ne[G] except KeyError: pass try: s2 = sdict[G] except KeyError: pass p = one_step(p, s2) x = floor(p * ploidy * N) if x < 1: break if p >= 1: break Ns.append(N) xs.append(x) ps.append(p) return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify
[docs]def simulate_ibd_isweep_tv(n, s_s, g_s, p0, Ne, long_ibd=2.0, short_ibd=1.0, random_walk=True, ploidy=2, record_dist=True, pairwise_output=True, left_length=np.inf, right_length=np.inf, wf_approx=False ): '''ibd segments from a coalescent with selection Parameters ---------- n : int Sample size (individuals) s_s : list List of selection coefficients g_s : list List of transition times for selection coefficients This should be aligned with the s_s parameter e.g., s_s=[0.01,0.03,0.02] and g_s=[50,100] means s=0.01 between 0-50, s=0.03 between 50-100 and s=0.02 between 100- Should be length one less than s_s p0 : float Variant frequency at generation 0 Ne : dict Effective population sizes long_ibd, short_ibd : float cM length threshold random_walk : bool True for random walk ploidy : int 1 for haploid or 2 for diploid record_dist : bool To save tract length and coalescent time distributions or not to (default True) pairwise_output : bool To save pairwise segments or not to (default True) left_length : float Distance to the left chromosome end (default numpy.inf) right_length : float Distance to the right chromosome end (default numpy.inf) wf_approx : bool Use Binomial approximation for early WF process (default False) Returns ------- tuple(s) (all, adaptive allele, non-adaptive allele) then pairwise segments Each tuple is (number of tracts, group sizes, length distr., time distr., count distr.) ''' assert ploidy in [1,2] assert long_ibd > 0 assert short_ibd > 0 assert long_ibd >= short_ibd assert p0 <= 1 assert p0 >= 0 global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist global pairwise_segments pairwise_segments = [] H1 = nx.Graph() H0 = nx.Graph() ldist1 = [] ldist0 = [] tdist1 = [] tdist0 = [] ddist1 = [] ddist0 = [] # renaming p = p0 stoc = random_walk Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000) # should p0 be fixed if (p0 == 0) or (p0 == 1): out = simulate_ibd(n, Ne, long_ibd, short_ibd, ploidy, record_dist, pairwise_output, left_length, right_length, wf_approx ) # numpy-ify return (out, (np.nan,np.array([]),np.array([]),np.array([]),np.array([])), (np.nan,np.array([]),np.array([]),np.array([]),np.array([])), tuple(pairwise_segments) ) # calculating structured demographies ps, Ns, xs = walk_variant_backward_tv(s_s, g_s, p, Ne, stoc, ploidy) n = int(float(n)) n = n * ploidy G = max(Ne.keys()) n1 = floor(n * ps[0]) n0 = n - n1 N1 = [ceil(i) for i in Ns * ps] N1 = {i:N1[i] for i in range(len(N1))} qs = extend_vector(1 - ps, 1, G) Ns = np.array(list(Ne.values())) N0 = [ceil(i) for i in Ns * qs] N0 = {i:N0[i] for i in range(len(N0))} # arrival times times1 = simulate_coalgen(n1, N1, ploidy, to_tmrca=False, wf_approx=False) interiors1 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1)] interiors0 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1,n1+n0)] # pairwise comparisons pop 1 m = n1 indxs = [i for i in range(m)] itr = n1 + n0 for t in times1: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors1[s1], interiors1[s2], long_ibd, short_ibd, 1, record_dist, pairwise_output) interiors1.append(new_node) interiors1.pop(s1) interiors1.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts1 = sum([node._num_tracts for node in interiors1]) ibdGroupSizes1 = sorted([len(c) for c in nx.connected_components(H1)],reverse=True) # pairwise comparisons p0 maxt1=max([node._time for node in interiors1]) N00 = {key:val for key,val in N0.items() if key <= maxt1} N01 = {key:val for key,val in N0.items() if key > maxt1} times0 = simulate_coalgen(n0, N00, ploidy, to_tmrca=False, wf_approx=False) m = n0 indxs = [i for i in range(m)] for t in times0: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors0[s1], interiors0[s2], long_ibd, short_ibd, 0, record_dist, pairwise_output) interiors0.append(new_node) interiors0.pop(s1) interiors0.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts0 = sum([node._num_tracts for node in interiors0]) ibdGroupSizes0 = sorted([len(c) for c in nx.connected_components(H0)],reverse=True) # arrival times, pairwise comparisons to TMRCA H = nx.compose(H1,H0) ldist = [*ldist1, *ldist0] tdist = [*tdist1, *tdist0] ddist = [*ddist1, *ddist0] interiors = interiors1 + interiors0 m = len(interiors) indxs = [i for i in range(m)] if len(N01.keys()) > 0: times2 = simulate_coalgen(m, N01, ploidy, to_tmrca=True, wf_approx=False) for t in times2: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist, pairwise_output) interiors.append(new_node) interiors.pop(s1) interiors.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts = sum([node._num_tracts for node in interiors]) ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True) # numpy-ify return ((numTracts,np.array(ibdGroupSizes),np.array(ldist),np.array(tdist),np.array(ddist)), (numTracts1,np.array(ibdGroupSizes1),np.array(ldist1),np.array(tdist1),np.array(ddist1)), (numTracts0,np.array(ibdGroupSizes0),np.array(ldist0),np.array(tdist0),np.array(ddist0)), tuple(pairwise_segments) )
[docs]def simulate_ibd_split(n, q, split_gen, Ne, long_ibd=2.0, short_ibd=1.0, ploidy=2, record_dist=True, pairwise_output=True, left_length=np.inf, right_length=np.inf, wf_approx=False ): '''ibd segments from a coalescent with selection Parameters ---------- n : int Sample size (individuals) q : float Split proportion split_gen : int Time ago of the population split Ne : dict Effective population sizes long_ibd, short_ibd : float cM length threshold ploidy : int 1 for haploid or 2 for diploid record_dist : bool To save tract length and coalescent time distributions or not to (default True) pairwise_output : bool To save pairwise segments or not to (default True) left_length : float Distance to the left chromosome end (default numpy.inf) right_length : float Distance to the right chromosome end (default numpy.inf) wf_approx : bool Use Binomial approximation for early WF process (default False) Returns ------- tuple(s) (all, first admix allele, second admix allele) then pairwise segments Each tuple is (number of tracts, group sizes, length distr., time distr., count distr.) ''' p0 = q assert ploidy in [1,2] assert long_ibd > 0 assert short_ibd > 0 assert long_ibd >= short_ibd assert p0 <= 1 assert p0 >= 0 split_gen = int(float(split_gen)) global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist global pairwise_segments pairwise_segments = [] H1 = nx.Graph() H0 = nx.Graph() ldist1 = [] ldist0 = [] tdist1 = [] tdist0 = [] ddist1 = [] ddist0 = [] # renaming p = p0 Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000) # should p0 be fixed if (p0 == 0) or (p0 == 1): out = simulate_ibd(n, Ne, long_ibd, short_ibd, ploidy, record_dist, pairwise_output, left_length, right_length, wf_approx ) # numpy-ify return (out, (np.nan,np.array([]),np.array([]),np.array([]),np.array([])), (np.nan,np.array([]),np.array([]),np.array([]),np.array([])), tuple(pairwise_segments) ) # calculating structured demographies n = int(float(n)) n = n * ploidy G = max(Ne.keys()) n1 = floor(n * p0) n0 = n - n1 N1 = {i:floor(Ne[i]*p0) for i in range(split_gen)} N00 = {i:(Ne[i]-N1[i]) for i in range(split_gen)} N01 = {i:Ne[i] for i in range(split_gen, G+1)} # arrival times times1 = simulate_coalgen(n1, N1, ploidy, to_tmrca=False, wf_approx=False) interiors1 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1)] interiors0 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1,n1+n0)] # pairwise comparisons pop 1 m = n1 indxs = [i for i in range(m)] itr = n1 + n0 for t in times1: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors1[s1], interiors1[s2], long_ibd, short_ibd, 1, record_dist, pairwise_output) interiors1.append(new_node) interiors1.pop(s1) interiors1.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts1 = sum([node._num_tracts for node in interiors1]) ibdGroupSizes1 = sorted([len(c) for c in nx.connected_components(H1)],reverse=True) # pairwise comparisons p0 maxt1=max([node._time for node in interiors1]) times0 = simulate_coalgen(n0, N00, ploidy, to_tmrca=False, wf_approx=False) m = n0 indxs = [i for i in range(m)] for t in times0: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors0[s1], interiors0[s2], long_ibd, short_ibd, 0, record_dist, pairwise_output) interiors0.append(new_node) interiors0.pop(s1) interiors0.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts0 = sum([node._num_tracts for node in interiors0]) ibdGroupSizes0 = sorted([len(c) for c in nx.connected_components(H0)],reverse=True) # arrival times, pairwise comparisons to TMRCA H = nx.compose(H1,H0) ldist = [*ldist1, *ldist0] tdist = [*tdist1, *tdist0] ddist = [*ddist1, *ddist0] interiors = interiors1 + interiors0 m = len(interiors) indxs = [i for i in range(m)] if len(N01.keys()) > 0: times2 = simulate_coalgen(m, N01, ploidy, to_tmrca=True, wf_approx=False) for t in times2: m -= 1 new_node = Node(itr, t, pairwise_output, left_length, right_length) sindx = sorted(two_randint(m)) s1 = sindx[0] s2 = sindx[1] new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist, pairwise_output) interiors.append(new_node) interiors.pop(s1) interiors.pop(s2-1) indxs.pop(s1) indxs.pop(s2-1) indxs.append(itr) itr += 1 numTracts = sum([node._num_tracts for node in interiors]) ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True) # numpy-ify return ((numTracts,np.array(ibdGroupSizes),np.array(ldist),np.array(tdist),np.array(ddist)), (numTracts1,np.array(ibdGroupSizes1),np.array(ldist1),np.array(tdist1),np.array(ddist1)), (numTracts0,np.array(ibdGroupSizes0),np.array(ldist0),np.array(tdist0),np.array(ddist0)), tuple(pairwise_segments) )