Source code for isweep.coalescent

# imports
import numpy as np
import pandas as pd
import networkx as nx
from scipy.stats import binom
from random import randint, sample
from math import floor, exp, log, ceil, sqrt
from numpy.random import exponential as Exp
from copy import deepcopy
from .utilities import *

H1 = nx.Graph()
H0 = nx.Graph()
H = nx.Graph()
ldist = []
ldist1 = []
ldist0 = []
tdist = []
tdist1 = []
tdist0 = []
ddist1 = []
ddist0 = []
ddist = []
pairwise_segments = []

### random walks ###

[docs]def walk_variant_backward(s, p0, Ne, random_walk = False, one_step_model = 'a', tau0 = 0, sv=-0.01, ploidy = 2):
    '''Variant frequencies backward in time

    Parameters
    ----------
    s : float
        Selection coefficient
    p0 : float
        Variant frequency at generation 0
    Ne : dict
        Effective population sizes
    random_walk : bool
        True for random walk
    one_step_model : str
        'm', 'a', 'd', or 'r'
    tau0 : int
        Generation when neutrality begins
    sv: float
        Allele frequency of standing variation
        (Default -0.01 will assume de novo sweep)
    ploidy : int
        1 for haploid or 2 for diploid


    Returns
    -------
    tuple
        NumPy arrays for frequencies and sizes
    '''

    # local functions
    assert ploidy in [1,2]
    assert one_step_model in ['m','a','r','d']
    assert p0 <= 1
    assert p0 >= 0
    assert sv < 1
    def haploid_bwd(p, s): # haploid is same as multiplicative (Felsenstein, 2017)
        return p / (1 + s - s * p)
    def multiplicative_bwd(p, s):
        num = 1
        dnm = 1 + (1 - p) * s
        return p * num / dnm
    def dominant_bwd(p, s):
        if s <= 0:
            return p
        a = p * s
        b = 1 + s - 2 * p * s
        c = - p
        qf = - b + sqrt((b ** 2) - 4 * a * c)
        qf = qf / 2 / a
        return qf
    def additive_bwd(p, s):
        if s <= 0:
            return p
        a = s
        b = 1 + s - 2 * p * s
        c = - p
        qf = - b + sqrt((b ** 2) - 4 * a * c)
        qf = qf / 2 / a
        return qf
    def recessive_bwd(p, s):
        if s <= 0:
            return p
        a = (1 - p) * s
        b = 1
        c = - p
        qf = - b + sqrt((b ** 2) - 4 * a * c)
        qf = qf / 2 / a
        return qf

    # one step calculation
    if ploidy == 1:
        one_step = haploid_bwd
    else:
        if one_step_model == 'a':
            one_step = additive_bwd
        elif one_step_model == 'r':
            one_step = recessive_bwd
        elif one_step_model == 'd':
            one_step = dominant_bwd
        else:
            one_step = multiplicative_bwd

    # initialize
    ps = [] # frequencies
    xs = [] # variants
    Ns = [] # sizes
    t = floor(tau0)
    p = p0
    N = Ne[0]
    x = floor(p * ploidy * N)
    Ns.append(N)
    xs.append(x)
    ps.append(p)

    if random_walk: # random walk

        for G in range(1, max(Ne.keys())+1):
            try: # population size change
                N = Ne[G]
            except KeyError:
                pass
            if G > t:
                p = one_step(p, s)
            x = int(binom.rvs(int(ploidy * N), p))
            p = x / ploidy / N
            if x < 1:
                break
            if p >= 1:
                break
            if p <= sv:
                s = 0
            ps.append(p)
            xs.append(x)
            Ns.append(N)

        return np.array([ps, Ns, xs], dtype=float) # numpy-ify

    else: # deterministic

        for G in range(1, max(Ne.keys())+1):
            try: # population size change
                N = Ne[G]
            except KeyError:
                pass
            if G > t:
                p = one_step(p, s)
            x = floor(p * ploidy * N)
            if x < 1:
                break
            if p >= 1:
                break
            if p <= sv:
                s = 0
            Ns.append(N)
            xs.append(x)
            ps.append(p)

        return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify

[docs]def walk_variant_forward(s, pG, Ne, random_walk = False, one_step_model = 'a', tau0 = 0, ploidy = 2):
    '''Variant frequencies forward in time

    Parameters
    ----------
    s : float
        Selection coefficient
    pG : float
        Variant frequency at maximum generation
    Ne : dict
        Effective population sizes
    random_walk : bool
        True for random walk
    one_step_model : str
        'm', 'a', 'd', or 'r'
    tau0 : int
        Generation when neutrality begins
    ploidy : int
        1 for haploid or 2 for diploid

    Returns
    -------
    tuple
        NumPy arrays for frequencies and sizes
    '''

    # local functions
    assert ploidy in [1,2]
    def haploid_fwd(p, s): # haploid is same as multiplicative
        return p * (1 + s) / (1 + p * s)
    def additive_fwd(p, s):
        num = 1 + s + p * s
        dnm = 1 + 2 * p * s
        return p * num / dnm
    def dominant_fwd(p, s):
        num = 1 + s
        dnm = 1 + 2 * p * s * (1 - p) + p * p * s
        return p * num / dnm
    def multiplicative_fwd(p, s):
        num = 1 + s
        dnm = 1 + p * s
        return p * num / dnm
    def recessive_fwd(p, s):
        num = 1 + p * s
        dnm = 1 + p * p * s
        return p * num / dnm

    # one step calculation
    if ploidy == 1:
        one_step = haploid_fwd
    else:
        if one_step_model == 'a':
            one_step = additive_fwd
        elif one_step_model == 'r':
            one_step = recessive_fwd
        elif one_step_model == 'd':
            one_step = dominant_fwd
        else:
            one_step = multiplicative_fwd

    # initialize
    ps = [] # frequencies
    xs = [] # variants
    Ns = [] # sizes
    t = floor(tau0)
    p = pG
    G = max(Ne.keys())
    N = Ne[G]
    x = ceil(p * ploidy * N)
    Ns.append(N)
    xs.append(x)
    ps.append(p)

    if random_walk: # random walk

        while G >= 0:
            G -= 1
            try:
                N = Ne[G]
            except KeyError:
                pass
            if G > t:
                p = one_step(p, s)
            x = int(binom.rvs(int(ploidy * N), p))
            p = x / ploidy / N
            if x < 1:
                break
            if p >= 1:
                break
            ps.append(p)
            xs.append(x)
            Ns.append(N)

        return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify

    else: # deterministic

        while G >= 0:
            G -= 1
            try:
                N = Ne[G]
            except KeyError:
                pass
            if G > t:
                p = one_step(p, s)
            x = floor(p * ploidy * N)
            if x < 1:
                break
            if p >= 1:
                break
            Ns.append(N)
            xs.append(x)
            ps.append(p)

        return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify

### simple coalescent simulators ###

[docs]def basic_coalescent(n):
    '''Simulate times in basic coalescent (scale post-hoc by population size)

    Parameters
    ----------
    n : int
        Sample size

    Returns
    -------
    numpy.array
        Interarrival times
    '''
    n = int(float(n))
    k = n
    t = np.zeros(n - 1)
    itr = 0
    curr = 0
    while k > 1:
        bcoef = 2 / k / (k - 1)
        k -= 1
        draw = Exp(bcoef)
        curr += draw
        t[itr] = curr
        itr += 1
    return t

[docs]def varying_Ne_coalescent(n, Ne, ploidy = 2, to_tmrca=True):
    '''Simulate times in varying population size coalescent

    Parameters
    ----------
    n : int
        Sample size
    Ne : dict
        Effective population sizes
    ploidy : int
    to_tmrca : bool
        Go to TMRCA

    Returns
    -------
    numpy.array
        Arrival times in generations
    '''

    # initialize
    Ne = zero_shift_Ne(Ne)
    maxG = max(Ne.keys())
    N = Ne[maxG]
    Nt = np.array(list(Ne.values()))
    lambdas = N / Nt # relative (inverse) coalescent rates
    loglambdas = np.log(lambdas)
    Lambdas = np.cumsum(lambdas) # piecewise integral
    Lambdas = np.insert(Lambdas, 0, 0)
    times = np.log(basic_coalescent(n)) + log(ploidy) + log(N) # convert to generations
    times = np.exp(times)
    times = np.insert(times, 0, 0)
    dtime = np.diff(times)
    K = len(dtime)
    Taus = np.zeros(K)
    k = 0
    itr = 0
    LambdaNext = 0
    vCurr = 0
    # while k <= (K-1) means until most recent common ancestor (MRCA)
    # itr < maxG controls updates with piecewise Ne
    while k <= (K-1) and itr < maxG:
        vPrev = vCurr
        LambdaPrev = LambdaCurr = LambdaNext
        RHS = dtime[k] + LambdaPrev
        LambdaNext = RHS
        steps = 0
        try:
            while LambdaCurr < RHS:
                itr += 1
                steps += 1
                LambdaCurr = Lambdas[itr]
            itr -= 1
            boole = int(steps>1)
            LambdaCurr = Lambdas[itr]*boole + LambdaPrev*(1-boole)
            if boole:
                vCurr += exp(log(LambdaCurr-LambdaPrev) - loglambdas[itr-1])
            RHS -= LambdaCurr
            logscalar = loglambdas[itr-1]
            logsoluti = log(RHS) - logscalar
            vCurr += exp(logsoluti)
            Taus[k] = (vCurr - vPrev)
            k += 1
        except IndexError:
            boole = int(steps>1)
            LambdaCurr = Lambdas[-1]*boole + LambdaPrev*(1-boole)
            RHS -= LambdaCurr
            logscalar = loglambdas[-1]
            while steps > 1:
                vCurr += 1
                steps -= 1
            logsoluti = log(RHS) - logscalar
            vCurr += exp(logsoluti)
            Taus[k] = (vCurr - vPrev)
            k += 1
    logscalar = loglambdas[-1]
    if to_tmrca:
        while k <= (K-1): # finish
            Taus[k] = exp(log(dtime[k]) - logscalar)
            k += 1
    Etas = np.cumsum(Taus)
    Etas = Etas[Etas > 0]
#     return np.floor(Etas)+1 # round to discrete time
    return Etas # continuous time

def binomial_step(k, Nei, ploidy=2):
    '''Take a Binomial(n,p) step for 1 gen of WF process

    Parameters
    ----------
    k : int
        Sample size
    Nei : float
        Effective size at time i
    ploidy : int

    Return
    ------
    int
        Count from binomial experiments
    '''
    denom = Nei * ploidy
    numer = int(k * (k - 1) / 2)
    bstep = binom.rvs(numer, 1/denom, size=1)[0]
    return bstep

def binomial_pmf_leq1(k, Nei, ploidy=2):
    '''Compute mass P(\leq 1) for Binomial(n,p) for 1 gen of WF process

    Parameters
    ----------
    k : int
        Sample size
    Nei : float
        Effective size at time i
    ploidy : int

    Return
    ------
    float
        A probability mass
    '''
    numer = int(k * (k - 1) / 2)
    denom = 1 / (Nei * ploidy)
    return binom.pmf(0, numer, denom) + binom.pmf(1, numer, denom)

def wright_fisher(n, Ne, ploidy = 2, to_tmrca=True):
    '''Simulate times in Wright Fisher model with varying population size
    (Assumes that all coalescences are pairwise, i.e., no multiple mergers )
    (After last generation in Ne, assume constant size
    and apply scaled basic_coalescent)

    Parameters
    ----------
    n : int
        Sample size
    Ne : dict
        Effective population sizes
    ploidy : int
    to_tmrca : bool
        Continue to TMRCA

    Returns
    -------
    numpy.array
        Arrival times in generations
    '''
    # initalize
    n = int(float(n))
    k = n
    itr = 0
    gentimes = np.zeros(n-1)
    Me = deepcopy(Ne)
    timer = min(Me.keys())
    lastG = max(Me.keys())
    lastN = Me[lastG]
    cuml = 0
    while k > 1 and timer <= lastG:
        # record coalescent events
        size = Me[timer] * ploidy
        draw = [randint(0, size - 1) for i in range(k)]
        table = {}
        for i in range(len(draw)):
            try:
                table[draw[i]] += 1
            except KeyError:
                table[draw[i]] = 1
        events = [val for key, val in table.items() if val >= 2]
        l = sum(events) - len(events)
        cuml += l
        timer += 1
        gentimes[itr:cuml] = timer
        k -= l
        itr += l
    if k > 1: # finish with coalescent
        if to_tmrca:
            finish = basic_coalescent(k) * lastN + lastG
            finish = np.floor(finish) + 1 # round to discrete time
            gentimes[cuml:] = finish
    return gentimes[gentimes > 0]

def simulate_coalgen(n, Ne, ploidy, binom_cut=0.01, to_tmrca=True, wf_approx=False):
    '''Simulate times in varying size population with large sample.
    Use binomial walk, Wright Fisher process, and the coalescent.

    Parameters
    ----------
    n : int
        Sample size
    Ne : dict
        Effective population sizes
    ploidy : int
    binom_cut : float
        Threshold to conduct binomial update [P(X <= 1) <= \alpha]
    to_tmrca : bool
        Go to TMRCA
    wf_approx : bool
        Use Binomial approximation for early WF process (default False)

    Returns
    -------
    numpy.array
        Arrival times in generations
    '''

    assert n > 1
    n = int(float(n))

    # use the Binomial approximation for WF process
    # at the early steps of process
    if wf_approx:

        assert binom_cut > 0
        assert binom_cut < 1
        j = n
        Taus = np.zeros(n-1)
        itr = min(Ne.keys()) + 1
        k = 0

        try: # while loop ends, i.e. condition to switch from binomial walk achieved

            # binomial walk process
            while binomial_pmf_leq1(j, Ne[itr], ploidy) <= binom_cut:
                # i is binomial count
                # j is nodes in wf process
                # k is index iterating up
                i = binomial_step(j, Ne[itr], ploidy)
                j -= i
                if j > 1:
                    itr += 1
                    Taus[k:(k+i)] = itr
                    k += i
                else:
                    itr += 1
                    Taus[k:] = itr
            Taus = Taus[Taus>0]

            # wright fisher / coalescent process
            # wright fisher runs a basic coalescent at end of Ne dictionary
            sNe = {k:v for k,v in Ne.items() if k >= itr}
            Etas = wright_fisher(j, sNe, ploidy, to_tmrca)
            Taus = np.concatenate((Taus,Etas))

        except KeyError: # while loop does not end, i.e. reach end of defined Ne
            Taus = Taus[Taus>0]

    # do not use the Binomial approximation
    else:
        Taus = wright_fisher(n, Ne, ploidy, to_tmrca)

    return Taus

### coalescent network simulator ###

import numpy as np
import pandas as pd
import networkx as nx
from scipy.stats import binom
from random import randint, sample
from math import floor, exp, log, ceil
from numpy.random import exponential as Exp

### coalescent network simulator ###

class DirectedPaths:

    def __init__(self, _id, _pairwise_output, _left_length, _right_length):
        self._left_length = _left_length
        self._right_length = _right_length
        self._num_leaves = 1
        self._leading_leaf = _id
        self._pairwise_output = _pairwise_output
        if _pairwise_output:
            self._leaves = [_id]
        return None

    def add_edge(self, _left_length, _right_length):
        self._left_length = min(self._left_length, _left_length)
        self._right_length = min(self._right_length, _right_length)
        return None

class Node:

    def __init__(self, _id, _time, _pairwise_output, _left_length, _right_length):
        self._id = _id
        self._time = _time
        if _time <= 0:
            self._paths_list = [DirectedPaths(_id, _pairwise_output, _left_length, _right_length)]
            self._num_leaves = 1
        else:
            self._paths_list = []
            self._num_leaves = 0
        self._num_tracts = 0
        return None
    def __repr__(self):
        return 'Node(_id = ' + str(self._id) + ')'
    def __str__(self):
        return 'Node(_id = ' + str(self._id) + ')'

    def add_incoming_node(self, node):
        self._num_leaves += node._num_leaves
        self._num_tracts += node._num_tracts
        return None

    def add_incoming_edge(self, scale):
        if scale != 0:
            scale = 1 / scale
            _left_length = Exp(scale=scale, size=1)[0]
            _right_length = Exp(scale=scale, size=1)[0]
            for paths in self._paths_list:
                paths.add_edge(_left_length, _right_length)
        else:
            pass
        return None

    def merge(self):
        paths_list = self._paths_list
        merged = set() # merge set
        doskip = set() # skip set
        k = len(paths_list)
        for i in range(k):
            if i not in doskip:
                merged.add(i)
                for j in range(i+1,k):
                    if j not in doskip:
                        paths1 = paths_list[i]
                        paths2 = paths_list[j]
                        if paths1._left_length == paths2._left_length:
                            if paths1._right_length == paths2._right_length:
                                m1 = paths1._num_leaves
                                m2 = paths2._num_leaves
                                m = m1 + m2
                                paths_list[i]._num_leaves = m
                                paths_list[i]._leading_leaf = min(paths1._leading_leaf, paths2._leading_leaf)
                                if paths_list[i]._pairwise_output:
                                    paths_list[i]._leaves += paths2._leaves
                                doskip.add(j)
        # merging
        self._paths_list = [paths_list[i] for i in merged]
        return None

    def prune_paths(self, cutoff):
        self._paths_list = [paths for paths in self._paths_list if paths._left_length + paths._right_length >= cutoff]
        return None

    def pair_coalesce(self, node1, node2, cutoff1, cutoff2, subgraph_id=2, record_dist=True, pairwise_output=True):
        deltas = [self._time - node1._time, self._time - node2._time]
        rates = [delta / 100 for delta in deltas]
        node1.add_incoming_edge(rates[0])
        node1.prune_paths(cutoff2)
        node1.merge()
        node2.add_incoming_edge(rates[1])
        node2.prune_paths(cutoff2)
        node2.merge()
        _num_tracts, n1, n2 = pairwise_compare(node1, node2, cutoff1, cutoff2, self._time, subgraph_id, record_dist, pairwise_output)
        node1 = n1
        node2 = n2
        self._num_tracts += _num_tracts
        self.add_incoming_node(node1)
        for paths in node1._paths_list:
            self._paths_list.append(paths)
        self.add_incoming_node(node2)
        for paths in node2._paths_list:
            self._paths_list.append(paths)
        return None

def pairwise_compare(node1, node2, cutoff1, cutoff2, _time, subgraph_id=2, record_dist=True, pairwise_output=True):
    '''Compare paths up coalescent tree based in IBD

    Parameters
    ----------
    cutoff1 : float
        Long IBD cM threshold
    cutoff2 : float
        Long IBD cM threshold
    _time : int
        Generation time

    Returns
    -------
    tuple
        (int # of long IBD segments, Node class instance, Node class instance)
    '''
    # initializing
    global H1, H0, H, ldist1, ldist0, ldist, tdist1, tdist0, tdist, ddist1, ddist0, ddist
    global pairwise_segments
    _num_tracts = 0
    K = len(node1._paths_list)
    L = len(node2._paths_list)
    for k in range(K):
        i = node1._paths_list[k]
        for l in range(L):
            j = node2._paths_list[l]
            _left_length = min(i._left_length, j._left_length)
            _right_length = min(i._right_length, j._right_length)
            _length = _right_length + _left_length
            # networking
            if _length >= cutoff2:
                if subgraph_id == 1:
                    H1.add_edge(i._leading_leaf, j._leading_leaf)
                elif subgraph_id == 0:
                    H0.add_edge(i._leading_leaf, j._leading_leaf)
                else:
                    H.add_edge(i._leading_leaf, j._leading_leaf)
                if i._leading_leaf <= j._leading_leaf:
                    node2._paths_list[l]._leading_leaf = node1._paths_list[k]._leading_leaf
                else:
                    node1._paths_list[k]._leading_leaf = node2._paths_list[l]._leading_leaf
                # segment counting
                if _length >= cutoff1:
                    m1 = i._num_leaves
                    m2 = j._num_leaves
                    density = m1 * m2
                    _num_tracts += density
                    if record_dist:
                        if subgraph_id == 1:
                            ldist1.append(_length)
                            tdist1.append(_time)
                            ddist1.append(density)
                        elif subgraph_id == 0:
                            ldist0.append(_length)
                            tdist0.append(_time)
                            ddist0.append(density)
                        else:
                            ldist.append(_length)
                            tdist.append(_time)
                            ddist.append(density)
                    if i._pairwise_output:
                        for a in i._leaves:
                            for b in j._leaves:
                                pairwise_segments.append((a,b,_length,_time))

    return _num_tracts, node1, node2

def two_randint(n):
    x = randint(0,n)
    y = randint(0,n)
    while x == y:
        y = randint(0,n)
    return sorted([x,y])

[docs]def simulate_ibd(n, Ne, 
                 long_ibd=2.0, short_ibd=1.0, ploidy=2, 
                 record_dist=True, pairwise_output=True,
                 left_length=np.inf, right_length=np.inf,
                 wf_approx=False
                 ):
    '''ibd segments from a coalescent

    Parameters
    ----------
    n : int
        Sample size (individuals)
    Ne : dict
        Effective population sizes
    long_ibd: float
        cM length threshold
    short_ibd : float
        cM length threshold
    ploidy : int
        1 for haploid or 2 for diploid
    record_dist : bool
        To save tract length and coalescent time distributions or not to (default True)
    pairwise_output : bool
        To save pairwise segments or not to (default True)
    left_length : float
        Distance to the left chromosome end (default numpy.inf)
    right_length : float
        Distance to the right chromosome end (default numpy.inf)
    wf_approx : bool
        Use Binomial approximation for early WF process (default False)

    Returns
    -------
    tuple
        (number of tracts, group sizes, length distr., time distr., count distr., pairwise segments)
    '''

    # checks
    assert long_ibd > 0
    assert short_ibd > 0
    assert long_ibd >= short_ibd
    n = int(float(n))
    ploidy = int(float(ploidy))

    # initialize graph network
    global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist
    global pairwise_segments
    pairwise_segments = []
    H = nx.Graph()
    ldist = []
    ddist = []
    tdist = []

    # initalize
    m = ploidy*n
    interiors = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(m)]
    itr = m

    # simulate times
    Ne=cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000)
    times = simulate_coalgen(m, Ne, ploidy, to_tmrca=True, wf_approx=False)

    indxs = [i for i in range(m)]
    for t in times:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist)
        interiors.append(new_node)
        interiors.pop(s1)
        interiors.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1

    numTracts = sum([node._num_tracts for node in interiors])
    ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True)

    # tract count, group sizes, distributions: lengths, times, density
    return (numTracts,
            np.array(ibdGroupSizes),
            np.array(ldist),
            np.array(tdist),
            np.array(ddist),
            tuple(pairwise_segments)
           )

[docs]def simulate_ibd_isweep(n, s, p0, Ne, 
                        long_ibd=2.0, short_ibd=1.0, 
                        random_walk=True, one_step_model='a', 
                        tau0=0, sv=-0.01, ploidy=2, 
                        record_dist=True, pairwise_output=True,
                        left_length=np.inf, right_length=np.inf,
                        wf_approx=False
                        ):
    '''ibd segments from a coalescent with selection

    Parameters
    ----------
    n : int
        Sample size (individuals)
    s : float
        Selection coefficient
    p0 : float
        Variant frequency at generation 0
    Ne : dict
        Effective population sizes
    long_ibd, short_ibd : float
        cM length threshold
    random_walk : bool
        True for random walk
    one_step_model : str
        'm', 'a', 'd', or 'r'
    tau0 : int
        Generation when neutrality begins
    sv: float
        Allele frequency of standing variation
        (Default -0.01 will assume de novo sweep)
    ploidy : int
        1 for haploid or 2 for diploid
    record_dist : bool
        To save tract length and coalescent time distributions or not to (default True)
    pairwise_output : bool
        To save pairwise segments or not to (default True)
    left_length : float
        Distance to the left chromosome end (default numpy.inf)
    right_length : float
        Distance to the right chromosome end (default numpy.inf)
    wf_approx : bool
        Use Binomial approximation for early WF process (default False)

    Returns
    -------
    tuple(s)
        (all, adaptive allele, non-adaptive allele) then pairwise segments
        Each tuple is (number of tracts, group sizes, length distr., time distr., count distr.)
    '''

    assert ploidy in [1,2]
    assert long_ibd > 0
    assert short_ibd > 0
    assert long_ibd >= short_ibd
    assert one_step_model in ['m','a','r','d']
    assert p0 <= 1
    assert p0 >= 0
    assert sv < 1

    global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist
    global pairwise_segments
    pairwise_segments = []
    H1 = nx.Graph()
    H0 = nx.Graph()
    ldist1 = []
    ldist0 = []
    tdist1 = []
    tdist0 = []
    ddist1 = []
    ddist0 = []

    # renaming
    p = p0
    t = tau0
    mdl = one_step_model
    stoc = random_walk
    Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000)

    # should p0 be fixed
    if (p0 == 0) or (p0 == 1):
        out = simulate_ibd(n, Ne,
                           long_ibd, 
                           short_ibd,
                           ploidy,
                           record_dist, 
                           pairwise_output,
                           left_length,
                           right_length,
                           wf_approx
                          )
        # numpy-ify
        return (out,
                (np.nan,np.array([]),np.array([]),np.array([]),np.array([])),
                (np.nan,np.array([]),np.array([]),np.array([]),np.array([])),
                tuple(pairwise_segments)
               )

    # calculating structured demographies
    ps, Ns, xs = walk_variant_backward(s, p, Ne, stoc, mdl, tau0, sv, ploidy)
    n = int(float(n))
    n = n * ploidy
    G = max(Ne.keys())
    n1 = floor(n * ps[0])
    n0 = n - n1
    N1 = [ceil(i) for i in Ns * ps]
    N1 = {i:N1[i] for i in range(len(N1))}
    qs = extend_vector(1 - ps, 1, G)
    Ns = np.array(list(Ne.values()))
    N0 = [ceil(i) for i in Ns * qs]
    N0 = {i:N0[i] for i in range(len(N0))}

    # arrival times
    times1 = simulate_coalgen(n1, N1, ploidy, to_tmrca=False, wf_approx=False)
    interiors1 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1)]
    interiors0 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1,n1+n0)]

    # pairwise comparisons pop 1
    m = n1
    indxs = [i for i in range(m)]
    itr = n1 + n0
    for t in times1:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors1[s1], interiors1[s2], long_ibd, short_ibd, 1, record_dist, pairwise_output)
        interiors1.append(new_node)
        interiors1.pop(s1)
        interiors1.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1
    numTracts1 = sum([node._num_tracts for node in interiors1])
    ibdGroupSizes1 = sorted([len(c) for c in nx.connected_components(H1)],reverse=True)

    # pairwise comparisons p0
    maxt1=max([node._time for node in interiors1])
    N00 = {key:val for key,val in N0.items() if key <= maxt1}
    N01 = {key:val for key,val in N0.items() if key > maxt1}
    times0 = simulate_coalgen(n0, N00, ploidy, to_tmrca=False, wf_approx=False)
    m = n0
    indxs = [i for i in range(m)]
    for t in times0:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors0[s1], interiors0[s2], long_ibd, short_ibd, 0, record_dist, pairwise_output)
        interiors0.append(new_node)
        interiors0.pop(s1)
        interiors0.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1
    numTracts0 = sum([node._num_tracts for node in interiors0])
    ibdGroupSizes0 = sorted([len(c) for c in nx.connected_components(H0)],reverse=True)

    # arrival times, pairwise comparisons to TMRCA
    H = nx.compose(H1,H0)
    ldist = [*ldist1, *ldist0]
    tdist = [*tdist1, *tdist0]
    ddist = [*ddist1, *ddist0]
    interiors = interiors1 + interiors0
    m = len(interiors)
    indxs = [i for i in range(m)]
    if len(N01.keys()) > 0:
        times2 = simulate_coalgen(m, N01, ploidy, to_tmrca=True, wf_approx=False)
        for t in times2:
            m -= 1
            new_node = Node(itr, t, pairwise_output, left_length, right_length)
            sindx = sorted(two_randint(m))
            s1 = sindx[0]
            s2 = sindx[1]
            new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist, pairwise_output)
            interiors.append(new_node)
            interiors.pop(s1)
            interiors.pop(s2-1)
            indxs.pop(s1)
            indxs.pop(s2-1)
            indxs.append(itr)
            itr += 1
    numTracts = sum([node._num_tracts for node in interiors])
    ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True)

    # numpy-ify
    return ((numTracts,np.array(ibdGroupSizes),np.array(ldist),np.array(tdist),np.array(ddist)),
            (numTracts1,np.array(ibdGroupSizes1),np.array(ldist1),np.array(tdist1),np.array(ddist1)),
            (numTracts0,np.array(ibdGroupSizes0),np.array(ldist0),np.array(tdist0),np.array(ddist0)),
            tuple(pairwise_segments)
           )

### segments ###

# quasi geometric random variables

def probability_quasi_geometric(ps, Ns, ploidy = 2):
    """Probability masses for quasi-geometric coalescent

    Parameters
    ----------
    ps : numpy.array
        Variant frequencies (back in time)
    Ns : numpy.array
        Effective population sizes
    ploidy : int
        1 for haploid or 2 for diploid

    Returns
    -------
    numpy.array
        P(G = g) for generation g; zero element is P(G > g) for some large g
    """

    assert ploidy in [1,2]
    ones = np.array([1 for i in range(len(ps))])
    marg_mass = 1 / (ploidy * ps[:-1] * Ns[:-1])
    marg_mass = np.insert(marg_mass, 0, 0)
    marg_mass = np.minimum(ones, marg_mass)
    surv_mass = ones - marg_mass
    surv_mass = np.cumprod(surv_mass)
    marg_mass = 1 / (ploidy * ps * Ns)
    marg_mass = np.minimum(ones, marg_mass)
    full_mass = surv_mass * marg_mass
    full_mass = np.insert(full_mass, 0, np.maximum(1 - np.sum(full_mass), 0))

    return full_mass

def simulate_quasi_geometric(n, mass):
    """Simulator for quasi-geometric coalescent

    Parameters
    ----------
    n : int
        Simulation size
    mass : numpy.array
        Probability masses

    Returns
    -------
    numpy.array
        Coalescent generation times; np.inf for P(G > g) for some large g
    """

    n = int(float(n))
    time = np.array([np.inf] + [i for i in range(1, len(mass))])
    coal = np.random.choice(time, size = n, p = mass)

    return coal

# erlang random variables

def simulate_erlang_segments(geom):
    """Simulator for Erlang (shape=2) ibd segment lengths

    Parameters
    ----------
    geom : numpy.array
        Coalescent generation times (quasi-geometric)

    Returns
    -------
    numpy.array
        Independent ibd segment lengths
    """

    geom = geom[geom != np.inf]
    m = len(geom)
    left = np.random.exponential(scale = 50 / geom, size = m)
    right = np.random.exponential(scale = 50 / geom, size = m)

    return left + right

def probability_erlang_segments(ab, mass):
    '''Interval probabilities for Erlang (shape=2) ibd lengths

    Parameters
    ----------
    ab : numpy.array
        Increasing floats in centiMorgans
    mass : numpy.array
        P(G = g) for generation g

    Returns
    -------
    numpy.array
        P(\ell \in [u,v)) for many [u,v) intervals
    '''

    # local function
    def puv(u, v, g):

        # error handling
        if u < 0:
            raise ValueError("Left endpoint less than zero")
        if u >= v:
            raise ValueError("Left endpoint exceeds right")

        # integral values
        U = np.exp(- g / 50 * u) * (u * g + 50)
        if np.isinf(v):
            V = 0
        else:
            V = np.exp(- g / 50 * v) * (v * g + 50)

        return (U - V) / 50

    # initialize
    mass = mass[1:]
    geom = np.array([i for i in range(1, len(mass) + 1)])
    M = len(ab)

    # double integral
    puvs = []
    for m in range(1, M):
        pab = puv(ab[m-1], ab[m], geom)
        puvs.append(sum(mass * pab))

    return np.array(puvs)

# probability of ibd

[docs]def probability_ibd(ps, Ns, long_ibd = 2, ploidy = 2):
    """Approximate probability of ibd

    Parameters
    ----------
    ps : numpy.array
        Variant frequencies (back in time)
    Ns : numpy.array
        Effective population sizes
    long_ibd : float
        cM length threshold
    ploidy : int
        1 for haploid or 2 for diploid

    Returns
    -------
    float
        approx P(\ell > c) where \ell is ibd length
    """

    assert ploidy in [1,2]

    # initialize
    p0 = ps[0]

    # cumulative products
    with np.errstate(divide = 'ignore'):
        dnm1 = 1 / (ploidy * ps[:-1] * Ns[:-1])
    dnm1 = np.insert(dnm1, 0, 0)
    one = np.array([1 for i in range(len(dnm1))])
    sub1 = np.minimum(one, dnm1)
    onesub1 = one - sub1
    cp1 = np.cumprod(onesub1)

    # products
    with np.errstate(divide = 'ignore'):
        dnm1 = 1 / (ploidy * ps * Ns)
    dnm1 = np.minimum(one, dnm1)

    # partial sum
    pgs = (p0 ** 2) * cp1 * dnm1
    gstar = len(pgs)

    masses = []
    for g in range(1, gstar + 1):
        masses.append((long_ibd * g / 50 + 1) * exp(- g * long_ibd / 50))
    masses = np.array(masses)
    partialp = sum(pgs * masses)

    # approximate coalescent
    if dnm1[-1] >= 0: # check this
        FRp = - np.inf
    else:
        hp = (p0 ** 2) * cp1[-1] * (1 - dnm1[-1])
        dp = 1 / ploidy / Ns[-1] / ps[-1]
        Cp = log(1 - dp) if not np.isinf(dp) else 0
        CCp = Cp - long_ibd / 50
        FRp = log(-(CCp + long_ibd / 50 * ((gstar + 1) * CCp - 1)))
        FRp = FRp + CCp * (gstar + 1) - log(CCp ** 2)
        FRp = FRp + log(hp) + log(dp) - Cp * (gstar + 1)

    return partialp + exp(FRp)

[docs]def probability_ibd_isweep(s, p0, Ne, long_ibd = 2, one_step_model = 'a', tau0 = 0, sv=-0.01, ploidy = 2):
    '''Approximate probability of ibd given a sweep model

    Parameters
    ----------
    s : float
        Selection coefficient
    p0 : float
        Variant frequency at generation 0
    Ne : dict
        Effective population sizes
    long_ibd : float
        cM length threshold
    one_step_model : str
        'm', 'a', 'd', or 'r'
    tau0 : int
        Generation when neutrality begins
    sv: float
        Allele frequency of standing variation
        (Default -0.01 will assume de novo sweep)
    ploidy : int
        1 for haploid or 2 for diploid

    Returns
    -------
    float
        approx P(\ell > c) where \ell is ibd length
    '''
    assert ploidy in [1,2]
    assert p0 >= 0
    assert p0 <= 1
    assert sv < 1
    if p0==1 or p0==0:
        Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),1000)
        b = np.array(list(Ne.values()))
        a = extend_vector(np.array([1]),1,max(Ne.keys()))
        d = probability_ibd(a, b, long_ibd, ploidy)
        f = 0
    else:
        Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),1000)
        a, b, c = walk_variant_backward(s, p0, Ne, False, one_step_model, tau0, sv, ploidy)
        a = extend_vector(a, a[-1], max(Ne.keys()))
        b = extend_vector(b, b[-1], max(Ne.keys()))
        d = probability_ibd(a, b, long_ibd, ploidy)
        f = probability_ibd(1-a, b, long_ibd, ploidy)
    return d + f

### for asymptotic tests ###

[docs]def simulate_ibd_constant(n, Ne, 
                          long_ibd=2.0, short_ibd=1.0, 
                          ploidy=2, 
                          record_dist=True, pairwise_output=True,
                          left_length=np.inf, right_length=np.inf
                          ):
    '''ibd segments from a coalescent w/ constant Ne

    Parameters
    ----------
    n : int
        Sample size (individuals)
    Ne : int
        Constant effective population size
    long_ibd, short_ibd : float
        cM length threshold
    ploidy : int
        1 for haploid or 2 for diploid
    record_dist : bool
        To save tract length and coalescent time distributions or not to (default True)
    pairwise_output : bool
        To save pairwise segments or not to (default True)
    left_length : float
        Distance to the left chromosome end (default numpy.inf)
    right_length : float
        Distance to the right chromosome end (default numpy.inf)

    Returns
    -------
    tuple
        (number of tracts, group sizes, length distr., time distr., count distr., pairwise segments)
    '''

    # checks
    assert long_ibd > 0
    assert short_ibd > 0
    assert long_ibd >= short_ibd
    n = int(float(n))
    ploidy = int(float(ploidy))

    # initialize graph network
    global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist
    global pairwise_segments
    pairwise_segments = []
    H = nx.Graph()
    ldist = []
    ddist = []
    tdist = []

    # initalize
    m = ploidy*n
    interiors = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(m)]
    itr = m

    # this is the simple change
    # do basic coalescent instead of wright-fisher
    times = basic_coalescent(m) * float(int(Ne)*ploidy)

    indxs = [i for i in range(m)]
    for t in times:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist)
        interiors.append(new_node)
        interiors.pop(s1)
        interiors.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1

    numTracts = sum([node._num_tracts for node in interiors])
    ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True)

    # tract count, group sizes, distributions: lengths, times, density
    return (numTracts,
            np.array(ibdGroupSizes),
            np.array(ldist),
            np.array(tdist),
            np.array(ddist),
            tuple(pairwise_segments)
           )


### time-varying selection ###

def walk_variant_forward_tv(s_s, g_s, pG, Ne, random_walk = False, ploidy = 2):
    '''Variant frequencies forward in time

    Parameters
    ----------
    s_s : list
        List of selection coefficients
    g_s : list
        List of transition times for selection coefficients
        This should be aligned with the s_s parameter
            e.g., s_s=[0.01,0.03,0.02] and g_s=[50,100] means 
            s=0.02 between 100-, s=0.03 between 100-50 and s=0.01 between 50-0
        Should be length one less than s_s
    pG : float
        Variant frequency at maximum generation
    Ne : dict
        Effective population sizes
    random_walk : bool
        True for random walk
    ploidy : int
        1 for haploid or 2 for diploid

    Additive genic selection is assumed

    Returns
    -------
    tuple
        NumPy arrays for frequencies and sizes
    '''

    # local functions
    assert ploidy in [1,2]
    def haploid_fwd(p, s): # haploid is same as multiplicative
        return p * (1 + s) / (1 + p * s)
    def additive_fwd(p, s):
        num = 1 + s + p * s
        dnm = 1 + 2 * p * s
        return p * num / dnm

    # one step calculation
    if ploidy == 1:
        one_step = haploid_fwd
    else:
        one_step = additive_fwd

    # initialize
    ps = [] # frequencies
    xs = [] # variants
    Ns = [] # sizes
    p = pG
    G = max(Ne.keys())
    assert len(s_s) == (len(g_s) + 1)
    s_s2 = deepcopy(s_s)
    g_s2 = deepcopy(g_s)
    s2 = s_s2[-1]
    s_s2 = s_s2[:-1]
    sdict = {g_s2[i]:s_s2[i] for i in range(len(g_s))}
    N = Ne[G]
    x = ceil(p * ploidy * N)
    Ns.append(N)
    xs.append(x)
    ps.append(p)

    if random_walk: # random walk

        while G >= 0:
            G -= 1
            try:
                N = Ne[G]
            except KeyError:
                pass
            try:
                s2 = sdict[G]
            except KeyError:
                pass
            p = one_step(p, s2)
            x = int(binom.rvs(int(ploidy * N), p))
            p = x / ploidy / N
            if x < 1:
                break
            if p >= 1:
                break
            ps.append(p)
            xs.append(x)
            Ns.append(N)

        return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify

    else: # deterministic

        while G >= 0:
            G -= 1
            try:
                N = Ne[G]
            except KeyError:
                pass
            try:
                s2 = sdict[G]
            except KeyError:
                pass
            p = one_step(p, s2)
            x = floor(p * ploidy * N)
            if x < 1:
                break
            if p >= 1:
                break
            Ns.append(N)
            xs.append(x)
            ps.append(p)

        return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify

[docs]def walk_variant_backward_tv(s_s, g_s, p0, Ne, random_walk = False, ploidy = 2):
    '''Variant frequencies backward in time

    Parameters
    ----------
    s_s : list
        List of selection coefficients
    g_s : list
        List of transition times for selection coefficients
        This should be aligned with the s_s parameter
            e.g., s_s=[0.01,0.03,0.02] and g_s=[50,100] means 
            s=0.01 between 0-50, s=0.03 between 50-100 and s=0.02 between 100-
        Should be length one less than s_s
    p0 : float
        Variant frequency at generation 0
    Ne : dict
        Effective population sizes
    random_walk : bool
        True for random walk
    ploidy : int
        1 for haploid or 2 for diploid


    Returns
    -------
    tuple
        NumPy arrays for frequencies and sizes
    '''

    # local functions
    assert ploidy in [1,2]
    assert p0 <= 1
    assert p0 >= 0
    def haploid_bwd(p, s): # haploid is same as multiplicative (Felsenstein, 2017)
        return p / (1 + s - s * p)
    def additive_bwd(p, s):
        if s <= 0:
            return p
        a = s
        b = 1 + s - 2 * p * s
        c = - p
        qf = - b + sqrt((b ** 2) - 4 * a * c)
        qf = qf / 2 / a
        return qf

    # one step calculation
    if ploidy == 1:
        one_step = haploid_bwd
    else:
        one_step = additive_bwd

    # initialize
    ps = [] # frequencies
    xs = [] # variants
    Ns = [] # sizes
    p = p0
    N = Ne[0]
    assert len(s_s) == (len(g_s)+1)
    s_s2 = deepcopy(s_s)
    g_s2 = deepcopy(g_s) 
    s2 = deepcopy(s_s2[0])
    s_s2 = s_s2[1:]
    sdict = {g_s2[i]:s_s2[i] for i in range(len(g_s))}
    x = floor(p * ploidy * N)
    Ns.append(N)
    xs.append(x)
    ps.append(p)

    if random_walk: # random walk

        for G in range(1, max(Ne.keys())+1):
            try: # population size change
                N = Ne[G]
            except KeyError:
                pass
            try:
                s2 = sdict[G]
            except KeyError:
                pass
            p = one_step(p, s2)
            x = int(binom.rvs(int(ploidy * N), p))
            p = x / ploidy / N
            if x < 1:
                break
            if p >= 1:
                break
            ps.append(p)
            xs.append(x)
            Ns.append(N)

        return np.array([ps, Ns, xs], dtype=float) # numpy-ify

    else: # deterministic

        for G in range(1, max(Ne.keys())+1):
            try: # population size change
                N = Ne[G]
            except KeyError:
                pass
            try:
                s2 = sdict[G]
            except KeyError:
                pass
            p = one_step(p, s2)
            x = floor(p * ploidy * N)
            if x < 1:
                break
            if p >= 1:
                break
            Ns.append(N)
            xs.append(x)
            ps.append(p)

        return np.array([np.array(ps), np.array(Ns), np.array(xs)]) # numpy-ify
    
[docs]def simulate_ibd_isweep_tv(n, 
                           s_s, 
                           g_s, 
                           p0, 
                           Ne, 
                           long_ibd=2.0, 
                           short_ibd=1.0, 
                           random_walk=True, 
                           ploidy=2, 
                           record_dist=True, 
                           pairwise_output=True,
                           left_length=np.inf, right_length=np.inf,
                           wf_approx=False
                           ):
    '''ibd segments from a coalescent with selection

    Parameters
    ----------
    n : int
        Sample size (individuals)
    s_s : list
        List of selection coefficients
    g_s : list
        List of transition times for selection coefficients
        This should be aligned with the s_s parameter
            e.g., s_s=[0.01,0.03,0.02] and g_s=[50,100] means 
            s=0.01 between 0-50, s=0.03 between 50-100 and s=0.02 between 100-
        Should be length one less than s_s
    p0 : float
        Variant frequency at generation 0
    Ne : dict
        Effective population sizes
    long_ibd, short_ibd : float
        cM length threshold
    random_walk : bool
        True for random walk
    ploidy : int
        1 for haploid or 2 for diploid
    record_dist : bool
        To save tract length and coalescent time distributions or not to (default True)
    pairwise_output : bool
        To save pairwise segments or not to (default True)
    left_length : float
        Distance to the left chromosome end (default numpy.inf)
    right_length : float
        Distance to the right chromosome end (default numpy.inf)
    wf_approx : bool
        Use Binomial approximation for early WF process (default False)

    Returns
    -------
    tuple(s)
        (all, adaptive allele, non-adaptive allele) then pairwise segments
        Each tuple is (number of tracts, group sizes, length distr., time distr., count distr.)
    '''

    assert ploidy in [1,2]
    assert long_ibd > 0
    assert short_ibd > 0
    assert long_ibd >= short_ibd
    assert p0 <= 1
    assert p0 >= 0

    global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist
    global pairwise_segments
    pairwise_segments = []
    H1 = nx.Graph()
    H0 = nx.Graph()
    ldist1 = []
    ldist0 = []
    tdist1 = []
    tdist0 = []
    ddist1 = []
    ddist0 = []

    # renaming
    p = p0
    stoc = random_walk
    Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000)

    # should p0 be fixed
    if (p0 == 0) or (p0 == 1):
        out = simulate_ibd(n, Ne,
                           long_ibd, 
                           short_ibd,
                           ploidy,
                           record_dist, 
                           pairwise_output,
                           left_length,
                           right_length,
                           wf_approx
                          )
        # numpy-ify
        return (out,
                (np.nan,np.array([]),np.array([]),np.array([]),np.array([])),
                (np.nan,np.array([]),np.array([]),np.array([]),np.array([])),
                tuple(pairwise_segments)
               )

    # calculating structured demographies
    ps, Ns, xs = walk_variant_backward_tv(s_s, g_s, p, Ne, stoc, ploidy)
    n = int(float(n))
    n = n * ploidy
    G = max(Ne.keys())
    n1 = floor(n * ps[0])
    n0 = n - n1
    N1 = [ceil(i) for i in Ns * ps]
    N1 = {i:N1[i] for i in range(len(N1))}
    qs = extend_vector(1 - ps, 1, G)
    Ns = np.array(list(Ne.values()))
    N0 = [ceil(i) for i in Ns * qs]
    N0 = {i:N0[i] for i in range(len(N0))}

    # arrival times
    times1 = simulate_coalgen(n1, N1, ploidy, to_tmrca=False, wf_approx=False)
    interiors1 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1)]
    interiors0 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1,n1+n0)]

    # pairwise comparisons pop 1
    m = n1
    indxs = [i for i in range(m)]
    itr = n1 + n0
    for t in times1:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors1[s1], interiors1[s2], long_ibd, short_ibd, 1, record_dist, pairwise_output)
        interiors1.append(new_node)
        interiors1.pop(s1)
        interiors1.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1
    numTracts1 = sum([node._num_tracts for node in interiors1])
    ibdGroupSizes1 = sorted([len(c) for c in nx.connected_components(H1)],reverse=True)

    # pairwise comparisons p0
    maxt1=max([node._time for node in interiors1])
    N00 = {key:val for key,val in N0.items() if key <= maxt1}
    N01 = {key:val for key,val in N0.items() if key > maxt1}
    times0 = simulate_coalgen(n0, N00, ploidy, to_tmrca=False, wf_approx=False)
    m = n0
    indxs = [i for i in range(m)]
    for t in times0:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors0[s1], interiors0[s2], long_ibd, short_ibd, 0, record_dist, pairwise_output)
        interiors0.append(new_node)
        interiors0.pop(s1)
        interiors0.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1
    numTracts0 = sum([node._num_tracts for node in interiors0])
    ibdGroupSizes0 = sorted([len(c) for c in nx.connected_components(H0)],reverse=True)

    # arrival times, pairwise comparisons to TMRCA
    H = nx.compose(H1,H0)
    ldist = [*ldist1, *ldist0]
    tdist = [*tdist1, *tdist0]
    ddist = [*ddist1, *ddist0]
    interiors = interiors1 + interiors0
    m = len(interiors)
    indxs = [i for i in range(m)]
    if len(N01.keys()) > 0:
        times2 = simulate_coalgen(m, N01, ploidy, to_tmrca=True, wf_approx=False)
        for t in times2:
            m -= 1
            new_node = Node(itr, t, pairwise_output, left_length, right_length)
            sindx = sorted(two_randint(m))
            s1 = sindx[0]
            s2 = sindx[1]
            new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist, pairwise_output)
            interiors.append(new_node)
            interiors.pop(s1)
            interiors.pop(s2-1)
            indxs.pop(s1)
            indxs.pop(s2-1)
            indxs.append(itr)
            itr += 1
    numTracts = sum([node._num_tracts for node in interiors])
    ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True)

    # numpy-ify
    return ((numTracts,np.array(ibdGroupSizes),np.array(ldist),np.array(tdist),np.array(ddist)),
            (numTracts1,np.array(ibdGroupSizes1),np.array(ldist1),np.array(tdist1),np.array(ddist1)),
            (numTracts0,np.array(ibdGroupSizes0),np.array(ldist0),np.array(tdist0),np.array(ddist0)),
            tuple(pairwise_segments)
           )

[docs]def simulate_ibd_split(n, q, split_gen, Ne, 
                        long_ibd=2.0, short_ibd=1.0, ploidy=2, 
                        record_dist=True, pairwise_output=True,
                        left_length=np.inf, right_length=np.inf,
                        wf_approx=False
                        ):
    '''ibd segments from a coalescent with selection

    Parameters
    ----------
    n : int
        Sample size (individuals)
    q : float
        Split proportion
    split_gen : int
        Time ago of the population split 
    Ne : dict
        Effective population sizes
    long_ibd, short_ibd : float
        cM length threshold
    ploidy : int
        1 for haploid or 2 for diploid
    record_dist : bool
        To save tract length and coalescent time distributions or not to (default True)
    pairwise_output : bool
        To save pairwise segments or not to (default True)
    left_length : float
        Distance to the left chromosome end (default numpy.inf)
    right_length : float
        Distance to the right chromosome end (default numpy.inf)
    wf_approx : bool
        Use Binomial approximation for early WF process (default False)

    Returns
    -------
    tuple(s)
        (all, first admix allele, second admix allele) then pairwise segments
        Each tuple is (number of tracts, group sizes, length distr., time distr., count distr.)
    '''

    p0 = q
    assert ploidy in [1,2]
    assert long_ibd > 0
    assert short_ibd > 0
    assert long_ibd >= short_ibd
    assert p0 <= 1
    assert p0 >= 0
    split_gen = int(float(split_gen))

    global H, H1, H0, ldist, ldist1, ldist0, tdist1, tdist0, tdist, ddist1, ddist0, ddist
    global pairwise_segments
    pairwise_segments = []
    H1 = nx.Graph()
    H0 = nx.Graph()
    ldist1 = []
    ldist0 = []
    tdist1 = []
    tdist0 = []
    ddist1 = []
    ddist0 = []

    # renaming
    p = p0
    Ne = cut_Ne(to_max_Ne(fill_Ne(Ne),500),2000)

    # should p0 be fixed
    if (p0 == 0) or (p0 == 1):
        out = simulate_ibd(n, Ne,
                           long_ibd, 
                           short_ibd,
                           ploidy,
                           record_dist, 
                           pairwise_output,
                           left_length,
                           right_length,
                           wf_approx
                          )
        # numpy-ify
        return (out,
                (np.nan,np.array([]),np.array([]),np.array([]),np.array([])),
                (np.nan,np.array([]),np.array([]),np.array([]),np.array([])),
                tuple(pairwise_segments)
               )

    # calculating structured demographies
    n = int(float(n))
    n = n * ploidy
    G = max(Ne.keys())
    n1 = floor(n * p0)
    n0 = n - n1
    N1 = {i:floor(Ne[i]*p0) for i in range(split_gen)}
    N00 = {i:(Ne[i]-N1[i]) for i in range(split_gen)}
    N01 = {i:Ne[i] for i in range(split_gen, G+1)}

    # arrival times
    times1 = simulate_coalgen(n1, N1, ploidy, to_tmrca=False, wf_approx=False)
    interiors1 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1)]
    interiors0 = [Node(i, 0, pairwise_output, left_length, right_length) for i in range(n1,n1+n0)]

    # pairwise comparisons pop 1
    m = n1
    indxs = [i for i in range(m)]
    itr = n1 + n0
    for t in times1:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors1[s1], interiors1[s2], long_ibd, short_ibd, 1, record_dist, pairwise_output)
        interiors1.append(new_node)
        interiors1.pop(s1)
        interiors1.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1
    numTracts1 = sum([node._num_tracts for node in interiors1])
    ibdGroupSizes1 = sorted([len(c) for c in nx.connected_components(H1)],reverse=True)

    # pairwise comparisons p0
    maxt1=max([node._time for node in interiors1])
    times0 = simulate_coalgen(n0, N00, ploidy, to_tmrca=False, wf_approx=False)
    m = n0
    indxs = [i for i in range(m)]
    for t in times0:
        m -= 1
        new_node = Node(itr, t, pairwise_output, left_length, right_length)
        sindx = sorted(two_randint(m))
        s1 = sindx[0]
        s2 = sindx[1]
        new_node.pair_coalesce(interiors0[s1], interiors0[s2], long_ibd, short_ibd, 0, record_dist, pairwise_output)
        interiors0.append(new_node)
        interiors0.pop(s1)
        interiors0.pop(s2-1)
        indxs.pop(s1)
        indxs.pop(s2-1)
        indxs.append(itr)
        itr += 1
    numTracts0 = sum([node._num_tracts for node in interiors0])
    ibdGroupSizes0 = sorted([len(c) for c in nx.connected_components(H0)],reverse=True)

    # arrival times, pairwise comparisons to TMRCA
    H = nx.compose(H1,H0)
    ldist = [*ldist1, *ldist0]
    tdist = [*tdist1, *tdist0]
    ddist = [*ddist1, *ddist0]
    interiors = interiors1 + interiors0
    m = len(interiors)
    indxs = [i for i in range(m)]
    if len(N01.keys()) > 0:
        times2 = simulate_coalgen(m, N01, ploidy, to_tmrca=True, wf_approx=False)
        for t in times2:
            m -= 1
            new_node = Node(itr, t, pairwise_output, left_length, right_length)
            sindx = sorted(two_randint(m))
            s1 = sindx[0]
            s2 = sindx[1]
            new_node.pair_coalesce(interiors[s1], interiors[s2], long_ibd, short_ibd, 2, record_dist, pairwise_output)
            interiors.append(new_node)
            interiors.pop(s1)
            interiors.pop(s2-1)
            indxs.pop(s1)
            indxs.pop(s2-1)
            indxs.append(itr)
            itr += 1
    numTracts = sum([node._num_tracts for node in interiors])
    ibdGroupSizes = sorted([len(c) for c in nx.connected_components(H)],reverse=True)

    # numpy-ify
    return ((numTracts,np.array(ibdGroupSizes),np.array(ldist),np.array(tdist),np.array(ddist)),
            (numTracts1,np.array(ibdGroupSizes1),np.array(ldist1),np.array(tdist1),np.array(ddist1)),
            (numTracts0,np.array(ibdGroupSizes0),np.array(ldist0),np.array(tdist0),np.array(ddist0)),
            tuple(pairwise_segments)
           )