Source code for transmission_models.classes.host

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from random import choice,randint,random,sample,choices
from scipy.stats import nbinom, gamma, binom, expon, norm
from matplotlib.lines import Line2D
import pandas as pd
from networkx.drawing.nx_pydot import graphviz_layout



try:
    import imageio
except ImportError:
    pass


#CLASSES
[docs]class host: """ Represents a host that has been infected with a virus. A host object contains information about an infected individual, including their genetic data, infection time, sampling time, and other attributes. Attributes ---------- index : int The index of the host. sampled : bool Indicates whether the host has been sampled or not. genetic_data : list The genetic data of the host. dict_attributes : dict A dictionary to store additional attributes. t_inf : int Time of infection. t_sample : int, optional The time the host was sampled. id : str The identifier of the host. Methods ------- t_inf : property Getter and setter for the time of infection attribute. get_genetic_str() : str Returns the genetic data as a string. __str__() : str Returns a string with the id of the host. __int__() : int Returns the index of the host. Examples -------- >>> h = host('host1', 1, ['A', 'T', 'C', 'G'], 10, t_sample=15) >>> print(h.t_inf) 10 >>> h.t_inf = 20 >>> print(h.t_inf) 20 >>> print(h.get_genetic_str()) ATCG >>> print(h) host1 Notes ----- This class follows the Python naming convention for class names (using PascalCase). """
[docs] def __init__(self, id, index, genetic_data=[], t_inf=0, t_sample=None): """ Initialize a new instance of the Host class. Parameters ---------- id : str The id of the host. index : int The index of the host. genetic_data : list, optional The genetic data of the host. Defaults to an empty list. t_inf : int, optional Time of infection. Defaults to 0. t_sample : int, optional The time the host was sampled. Defaults to None. """ if t_sample is not None: self.sampled = True else: self.sampled = False self.t_sample = t_sample self.genetic_data = genetic_data self._t_inf = t_inf self.index = int(index) self.id = id self.dict_attributes = {}
@property def t_inf(self): """ Getter for the time of infection attribute. Returns ------- int The time of infection. """ return self._t_inf @t_inf.setter def t_inf(self, t_inf): """ Setter for the time of infection attribute. Parameters ---------- t_inf : int The time of infection. """ self._t_inf = t_inf
[docs] def get_genetic_str(self): """ Return the genetic data of the host as a string. Returns ------- str The genetic data as a string. """ return "".join(self.genetic_data)
[docs] def __str__(self): """ Return a string with the id of the host. Returns ------- str The id of the host. """ return str(self.id)
[docs] def __int__(self): """ Return the index of the host. Returns ------- int The index of the host. """ return self.index
#Functions
[docs]def create_genome(chain_length): """ Create a random genome sequence of specified length. Parameters ---------- chain_length : int The length of the genome sequence to create. Returns ------- list A list of random nucleotides (A, G, C, T) of length chain_length. Examples -------- >>> genome = create_genome(10) >>> print(genome) ['A', 'T', 'C', 'G', 'A', 'T', 'C', 'G', 'A', 'T'] """ return [choice("AGCT") for i in range(chain_length)]
[docs]def binom_mutation(chain_length, p, genome): """ Perform binomial mutation on a given genome. This function generates changes in a genome by randomly selecting 'k' positions to mutate, where 'k' follows a binomial distribution with parameters 'chain_length' and 'p'. The elements at the selected positions are replaced with new randomly chosen nucleotides. Parameters ---------- chain_length : int The length of the genome chain. p : float The probability of mutation for each element in the chain. genome : str or list The original genome sequence. Returns ------- list The mutated genome sequence. Notes ----- The function operates as follows: 1. Calculates the number of positions to mutate, 'k', by sampling from a binomial distribution with 'chain_length' trials and success probability 'p'. 2. Randomly selects 'k' positions from the range [0, chain_length) without replacement. 3. Creates a new list 'new_genome' from the original genome. 4. Iterates over the selected positions and replaces the corresponding elements in 'new_genome' with randomly chosen nucleotides based on the original nucleotide at that position: - If the original nucleotide is 'A', it is replaced with a randomly chosen nucleotide from 'CTG'. - If the original nucleotide is 'C', it is replaced with a randomly chosen nucleotide from 'ATG'. - If the original nucleotide is 'T', it is replaced with a randomly chosen nucleotide from 'ACG'. - If the original nucleotide is 'G', it is replaced with a randomly chosen nucleotide from 'ACT'. 5. Returns the mutated genome sequence as 'new_genome'. Examples -------- >>> genome = ['A', 'T', 'C', 'G', 'G', 'A', 'T', 'C', 'G', 'A'] >>> mutated_genome = binom_mutation(len(genome), 0.2, genome) >>> print(mutated_genome) ['A', 'T', 'C', 'A', 'G', 'A', 'T', 'C', 'G', 'A'] See Also -------- one_mutation : Perform a single mutation on a genome """ k = np.random.binomial(n=chain_length, p=p, size=1)[0] to_change = sample(range(chain_length),k) new_genome = list(genome) for i in to_change: g = new_genome[i] if g=="A": new_genome[i] = choice("CTG") if g=="C": new_genome[i] = choice("ATG") if g=="T": new_genome[i] = choice("ACG") if g=="G": new_genome[i] = choice("ACT") return new_genome
[docs]def one_mutation(chain_length, p, genome): """ Perform one mutation on a given genome. This function generates a single mutation in a genome by randomly selecting one position to mutate. The selected position is replaced with a new randomly chosen nucleotide. Parameters ---------- chain_length : int The length of the genome chain. p : float The probability of mutation for each element in the chain. genome : str or list The original genome sequence. Returns ------- list The mutated genome sequence. Notes ----- The function operates as follows: 1. Randomly selects one position from the range [0, chain_length) to mutate. 2. Creates a new list 'new_genome' from the original genome. 3. Checks the original nucleotide at the selected position and replaces it with a randomly chosen nucleotide based on the following rules: - If the original nucleotide is 'A', it is replaced with a randomly chosen nucleotide from 'CTG'. - If the original nucleotide is 'C', it is replaced with a randomly chosen nucleotide from 'ATG'. - If the original nucleotide is 'T', it is replaced with a randomly chosen nucleotide from 'ACG'. - If the original nucleotide is 'G', it is replaced with a randomly chosen nucleotide from 'ACT'. 4. Returns the mutated genome sequence as 'new_genome'. Examples -------- >>> genome = ['A', 'T', 'C', 'G', 'G', 'A', 'T', 'C', 'G', 'A'] >>> mutated_genome = one_mutation(len(genome), 0.2, genome) >>> print(mutated_genome) ['A', 'T', 'C', 'A', 'G', 'A', 'T', 'C', 'G', 'T'] See Also -------- binom_mutation : Perform binomial mutation on a genome """ to_change = sample(range(chain_length),1) new_genome = list(genome) for i in to_change: g = new_genome[i] if g=="A": new_genome[i] = choice("CTG") if g=="C": new_genome[i] = choice("ATG") if g=="T": new_genome[i] = choice("ACG") if g=="G": new_genome[i] = choice("ACT") return new_genome
[docs]def average_mutations(mu, P_mut, tau, Dt, host_genetic): """ Generate a list of mutations proportional to a time interval. The number of mutations is proportional to a given time interval (Dt) where the proportion factor is the mutation rate (mu). Parameters ---------- mu : float The mutation rate. P_mut : float The probability of mutation. tau : float The current time. Dt : float The time interval. host_genetic : list The genetic sequence of the host. Returns ------- tuple A tuple containing: - mutations : list List of mutated genetic sequences. - t_mutations : list List of mutation times. Notes ----- The function calculates the number of mutations as int(mu * Dt / P_mut) and generates that many mutations using the one_mutation function. """ #Mutations n_mut = int(mu*(Dt)/P_mut)#number of mutations mutations = [one_mutation(len(host_genetic),P_mut,host_genetic)] #First mutation tau += t_inf/n_mut t_mutations = [tau]#List of mutations times if int(np.floor(n_mut)) > 0: for l in range(n_mut-1): mutations.append(one_mutation(len(host_genetic),P_mut,mutations[-1])) #First mutatiom tau += t_inf/n_mut t_mutations.append(tau) else: return [host_genetic],[] return mutations,t_mutations