Source code for transmission_models.classes.host

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from random import choice,randint,random,sample,choices
from scipy.stats import nbinom, gamma, binom, expon, norm
from matplotlib.lines import Line2D
import pandas as pd
from networkx.drawing.nx_pydot import graphviz_layout



try:
    import imageio
except ImportError:
    pass


#CLASSES
[docs]class host:
    """
    Represents a host that has been infected with a virus.

    A host object contains information about an infected individual,
    including their genetic data, infection time, sampling time,
    and other attributes.

    Attributes
    ----------
    index : int
        The index of the host.
    sampled : bool
        Indicates whether the host has been sampled or not.
    genetic_data : list
        The genetic data of the host.
    dict_attributes : dict
        A dictionary to store additional attributes.
    t_inf : int
        Time of infection.
    t_sample : int, optional
        The time the host was sampled.
    id : str
        The identifier of the host.

    Methods
    -------
    t_inf : property
        Getter and setter for the time of infection attribute.
    get_genetic_str() : str
        Returns the genetic data as a string.
    __str__() : str
        Returns a string with the id of the host.
    __int__() : int
        Returns the index of the host.

    Examples
    --------
    >>> h = host('host1', 1, ['A', 'T', 'C', 'G'], 10, t_sample=15)
    >>> print(h.t_inf)
    10
    >>> h.t_inf = 20
    >>> print(h.t_inf)
    20
    >>> print(h.get_genetic_str())
    ATCG
    >>> print(h)
    host1

    Notes
    -----
    This class follows the Python naming convention for class names
    (using PascalCase).
    """

[docs]    def __init__(self, id, index, genetic_data=[], t_inf=0, t_sample=None):
        """
        Initialize a new instance of the Host class.

        Parameters
        ----------
        id : str
            The id of the host.
        index : int
            The index of the host.
        genetic_data : list, optional
            The genetic data of the host. Defaults to an empty list.
        t_inf : int, optional
            Time of infection. Defaults to 0.
        t_sample : int, optional
            The time the host was sampled. Defaults to None.
        """
        if t_sample is not None:
            self.sampled = True
        else:
            self.sampled = False
        self.t_sample = t_sample
        self.genetic_data = genetic_data
        self._t_inf = t_inf
        self.index = int(index)
        self.id = id
        self.dict_attributes = {}

    @property
    def t_inf(self):
        """
        Getter for the time of infection attribute.

        Returns
        -------
        int
            The time of infection.
        """
        return self._t_inf

    @t_inf.setter
    def t_inf(self, t_inf):
        """
        Setter for the time of infection attribute.

        Parameters
        ----------
        t_inf : int
            The time of infection.
        """
        self._t_inf = t_inf

[docs]    def get_genetic_str(self):
        """
        Return the genetic data of the host as a string.

        Returns
        -------
        str
            The genetic data as a string.
        """
        return "".join(self.genetic_data)

[docs]    def __str__(self):
        """
        Return a string with the id of the host.

        Returns
        -------
        str
            The id of the host.
        """
        return str(self.id)

[docs]    def __int__(self):
        """
        Return the index of the host.

        Returns
        -------
        int
            The index of the host.
        """
        return self.index


#Functions
[docs]def create_genome(chain_length):
    """
    Create a random genome sequence of specified length.

    Parameters
    ----------
    chain_length : int
        The length of the genome sequence to create.

    Returns
    -------
    list
        A list of random nucleotides (A, G, C, T) of length chain_length.

    Examples
    --------
    >>> genome = create_genome(10)
    >>> print(genome)
    ['A', 'T', 'C', 'G', 'A', 'T', 'C', 'G', 'A', 'T']
    """
    return [choice("AGCT") for i in range(chain_length)]

[docs]def binom_mutation(chain_length, p, genome):
    """
    Perform binomial mutation on a given genome.

    This function generates changes in a genome by randomly selecting 'k' positions
    to mutate, where 'k' follows a binomial distribution with parameters
    'chain_length' and 'p'. The elements at the selected positions are replaced
    with new randomly chosen nucleotides.

    Parameters
    ----------
    chain_length : int
        The length of the genome chain.
    p : float
        The probability of mutation for each element in the chain.
    genome : str or list
        The original genome sequence.

    Returns
    -------
    list
        The mutated genome sequence.

    Notes
    -----
    The function operates as follows:

    1. Calculates the number of positions to mutate, 'k', by sampling from a
       binomial distribution with 'chain_length' trials and success probability 'p'.

    2. Randomly selects 'k' positions from the range [0, chain_length) without replacement.

    3. Creates a new list 'new_genome' from the original genome.

    4. Iterates over the selected positions and replaces the corresponding elements
       in 'new_genome' with randomly chosen nucleotides based on the original
       nucleotide at that position:

       - If the original nucleotide is 'A', it is replaced with a randomly chosen
         nucleotide from 'CTG'.
       - If the original nucleotide is 'C', it is replaced with a randomly chosen
         nucleotide from 'ATG'.
       - If the original nucleotide is 'T', it is replaced with a randomly chosen
         nucleotide from 'ACG'.
       - If the original nucleotide is 'G', it is replaced with a randomly chosen
         nucleotide from 'ACT'.

    5. Returns the mutated genome sequence as 'new_genome'.

    Examples
    --------
    >>> genome = ['A', 'T', 'C', 'G', 'G', 'A', 'T', 'C', 'G', 'A']
    >>> mutated_genome = binom_mutation(len(genome), 0.2, genome)
    >>> print(mutated_genome)
    ['A', 'T', 'C', 'A', 'G', 'A', 'T', 'C', 'G', 'A']

    See Also
    --------
    one_mutation : Perform a single mutation on a genome
    """
    k = np.random.binomial(n=chain_length, p=p, size=1)[0]
    to_change = sample(range(chain_length),k)
    new_genome = list(genome)
    for i in to_change:
        g = new_genome[i]
        if g=="A":
            new_genome[i] = choice("CTG")
        if g=="C":
            new_genome[i] = choice("ATG")
        if g=="T":
            new_genome[i] = choice("ACG")
        if g=="G":
            new_genome[i] = choice("ACT")
    return new_genome


[docs]def one_mutation(chain_length, p, genome):
    """
    Perform one mutation on a given genome.

    This function generates a single mutation in a genome by randomly selecting
    one position to mutate. The selected position is replaced with a new
    randomly chosen nucleotide.

    Parameters
    ----------
    chain_length : int
        The length of the genome chain.
    p : float
        The probability of mutation for each element in the chain.
    genome : str or list
        The original genome sequence.

    Returns
    -------
    list
        The mutated genome sequence.

    Notes
    -----
    The function operates as follows:

    1. Randomly selects one position from the range [0, chain_length) to mutate.

    2. Creates a new list 'new_genome' from the original genome.

    3. Checks the original nucleotide at the selected position and replaces it
       with a randomly chosen nucleotide based on the following rules:

       - If the original nucleotide is 'A', it is replaced with a randomly chosen
         nucleotide from 'CTG'.
       - If the original nucleotide is 'C', it is replaced with a randomly chosen
         nucleotide from 'ATG'.
       - If the original nucleotide is 'T', it is replaced with a randomly chosen
         nucleotide from 'ACG'.
       - If the original nucleotide is 'G', it is replaced with a randomly chosen
         nucleotide from 'ACT'.

    4. Returns the mutated genome sequence as 'new_genome'.

    Examples
    --------
    >>> genome = ['A', 'T', 'C', 'G', 'G', 'A', 'T', 'C', 'G', 'A']
    >>> mutated_genome = one_mutation(len(genome), 0.2, genome)
    >>> print(mutated_genome)
    ['A', 'T', 'C', 'A', 'G', 'A', 'T', 'C', 'G', 'T']

    See Also
    --------
    binom_mutation : Perform binomial mutation on a genome
    """
    to_change = sample(range(chain_length),1)
    new_genome = list(genome)
    for i in to_change:
        g = new_genome[i]
        if g=="A":
            new_genome[i] = choice("CTG")
        if g=="C":
            new_genome[i] = choice("ATG")
        if g=="T":
            new_genome[i] = choice("ACG")
        if g=="G":
            new_genome[i] = choice("ACT")
    return new_genome


[docs]def average_mutations(mu, P_mut, tau, Dt, host_genetic):
    """
    Generate a list of mutations proportional to a time interval.

    The number of mutations is proportional to a given time interval (Dt)
    where the proportion factor is the mutation rate (mu).

    Parameters
    ----------
    mu : float
        The mutation rate.
    P_mut : float
        The probability of mutation.
    tau : float
        The current time.
    Dt : float
        The time interval.
    host_genetic : list
        The genetic sequence of the host.

    Returns
    -------
    tuple
        A tuple containing:

        - mutations : list
            List of mutated genetic sequences.
        - t_mutations : list
            List of mutation times.

    Notes
    -----
    The function calculates the number of mutations as int(mu * Dt / P_mut)
    and generates that many mutations using the one_mutation function.
    """
    #Mutations
    n_mut = int(mu*(Dt)/P_mut)#number of mutations

    mutations = [one_mutation(len(host_genetic),P_mut,host_genetic)] #First mutation
    tau += t_inf/n_mut
    t_mutations = [tau]#List of mutations times
    if int(np.floor(n_mut)) > 0:
        for l in range(n_mut-1):
            mutations.append(one_mutation(len(host_genetic),P_mut,mutations[-1])) #First mutatiom
            tau += t_inf/n_mut
            t_mutations.append(tau)
    else:
        return [host_genetic],[]
    return mutations,t_mutations