diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6019138..294aeb0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # pre-commit run --all-files repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-merge-conflict - id: debug-statements @@ -9,31 +9,31 @@ repos: - id: check-case-conflict - id: check-yaml - repo: https://github.com/asottile/reorder_python_imports - rev: v3.9.0 + rev: v3.10.0 hooks: - id: reorder-python-imports args: [--application-directories=python, ] - repo: https://github.com/asottile/pyupgrade - rev: v3.2.2 + rev: v3.10.1 hooks: - id: pyupgrade args: [--py3-plus, --py37-plus] - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 23.7.0 hooks: - id: black language_version: python3 - repo: https://github.com/pycqa/flake8 - rev: 5.0.4 + rev: 6.1.0 hooks: - id: flake8 args: [--config=.flake8] additional_dependencies: ["flake8-bugbear==22.10.27", "flake8-builtins==2.0.1"] - - repo: https://github.com/asottile/blacken-docs - rev: v1.12.1 + - repo: https://github.com/adamchainz/blacken-docs + rev: 1.16.0 hooks: - id: blacken-docs args: [--skip-errors] - additional_dependencies: [black==22.3.0] + additional_dependencies: [black==23.7.0] language_version: python3 \ No newline at end of file diff --git a/stats_tests.py b/stats_tests.py deleted file mode 100644 index f852a33..0000000 --- a/stats_tests.py +++ /dev/null @@ -1,484 +0,0 @@ -import inspect -import itertools -import pathlib -import sys - -import matplotlib.pyplot as plt -import numpy as np -import scipy.stats as stats -import statsmodels.api as sm -import tskit -import tstrait -from tqdm import tqdm - - -def sim_tree_seq(): - """ - The following tree sequence will be generated: - 10 - ┏━┻┓ - ┃ 9 - ┃ ┏┻━┓ - ┃ ┃ 8 - ┃ ┃ ┏┻━┓ - ┃ ┃ ┃ 7 - ┃ ┃ ┃ ┏┻━┓ - ┃ ┃ ┃ ┃ 6 - ┃ ┃ ┃ ┃ ┏┻━┓ - 0 1 2 3 4 5 - - Individual 0: Node 0 and 1 - Individual 1: Node 2 and 3 - Individual 3: Node 4 and 5 - - Site 0 Ancestral State: "A" - Causal Mutation: Node 8 - Reverse Mutation: Node 4 - - Site 1 Ancestral State: "A" - Causal Mutation: Node 5 - - Site 0 Genotype: - [A, A, T, T, A, T] - Ancestral state: A - Causal allele freq: 0.5 - Individual 0: 0 causal - Individual 1: 2 causal - Individual 2: 1 causal - - Site 1 Genotype: - [A, A, A, A, A, T] - Ancestral state: T - Causal allele freq: 1/6 - Individual 0: 0 causal - Individual 1: 0 causal - Individual 2: 1 causal - - Effect size distribution: - - SD Formula: - trait_sd / sqrt(2) * [sqrt(2 * freq * (1 - freq))] ^ alpha - sqrt(2) from 2 causal sites - """ - ts = tskit.Tree.generate_comb(6, span=2).tree_sequence - - tables = ts.dump_tables() - for j in range(2): - tables.sites.add_row(j, "A") - - tables.individuals.add_row() - tables.individuals.add_row() - tables.individuals.add_row() - individuals = tables.nodes.individual - individuals[0] = 0 - individuals[1] = 0 - individuals[2] = 1 - individuals[3] = 1 - individuals[4] = 2 - individuals[5] = 2 - tables.nodes.individual = individuals - tables.mutations.add_row(site=0, node=8, derived_state="T") - tables.mutations.add_row(site=0, node=4, derived_state="A", parent=0) - tables.mutations.add_row(site=1, node=5, derived_state="T") - ts = tables.tree_sequence() - return ts - - -def sim_tree_seq_freq(alpha): - const1 = np.sqrt(pow(2 * 0.5 * (1 - 0.5), alpha)) - const2 = np.sqrt(pow(2 * 1 / 6 * (1 - 1 / 6), alpha)) - - return const1, const2 - - -class Test: - def __init__(self, basedir, cl_name): - self.basedir = basedir - self.name = cl_name - self.set_output_dir(cl_name) - - def set_output_dir(self, cl_name): - output_dir = pathlib.Path(self.basedir) / cl_name - output_dir.mkdir(parents=True, exist_ok=True) - self.output_dir = output_dir - - def require_output_dir(self, folder_name): - output_dir = self.output_dir / folder_name - output_dir.mkdir(parents=True, exist_ok=True) - - def _get_tests(self): - return [ - value - for name, value in inspect.getmembers(self) - if name.startswith("test_") - ] - - def _run_tests(self): - all_results = self._get_tests() - print(f"[+] Running: {self.name}.") - print(f"[+] Collected {len(all_results)} subtest(s).") - for method in all_results: - method() - - def _build_filename(self, filename, extension=".png"): - return self.output_dir / (filename + extension) - - def plot_qq_compare(self, v1, v2, x_label, y_label, filename, title=""): - sm.qqplot_2samples(v1, v2, x_label, y_label, line="45") - plt.title(title) - f = self._build_filename(filename) - plt.savefig(f, dpi=72) - plt.close("all") - - def plot_qq_normal(self, data, loc, scale, filename, title=""): - sm.qqplot(data, stats.norm, loc=loc, scale=scale, line="45") - plt.title(title) - f = self._build_filename(filename) - plt.savefig(f, dpi=72) - plt.close("all") - - def plot_qq_exponential(self, data, scale, filename, title=""): - sm.qqplot(data, stats.expon, scale=scale, line="45") - plt.title(title) - f = self._build_filename(filename) - plt.savefig(f, dpi=72) - plt.close("all") - - -class TestNormal(Test): - def test_normal(self): - - alpha_array = np.array([0, -0.3, 1]) - mean_array = np.array([0, 1]) - var_array = np.array([1, 2]) - - ts = sim_tree_seq() - - count = 0 - - prod = itertools.product(alpha_array, mean_array, var_array) - for element in tqdm(prod, total=12): - alpha = element[0] - mean = element[1] - var = element[2] - sd = np.sqrt(var) - const0, const1 = sim_tree_seq_freq(alpha) - model = tstrait.trait_model(distribution="normal", mean=mean, var=var) - effect_size0 = np.zeros(1000) - effect_size1 = np.zeros(1000) - for i in range(1000): - sim_result = tstrait.sim_trait( - ts=ts, - num_causal=2, - model=model, - alpha=alpha, - random_seed=i + 1000 * count, - ) - effect_size0[i] = sim_result["effect_size"][0] - effect_size1[i] = sim_result["effect_size"][1] - self.plot_qq_normal( - data=effect_size0, - loc=mean * const0 / 2, - scale=sd * const0 / 2, - filename=f"effect_size_0_{count}", - title=f"EffectSize0, Normal, alpha = {alpha}, " - f"mean = {mean}, var = {var}", - ) - self.plot_qq_normal( - data=effect_size1, - loc=mean * const1 / 2, - scale=sd * const1 / 2, - filename=f"effect_size_1_{count}", - title=f"EffectSize1, Normal, alpha = {alpha}, " - f"mean = {mean}, var = {var}", - ) - count += 1 - - -class TestExponential(Test): - def test_exponential(self): - - alpha_array = np.array([0, -0.3, 1]) - scale_array = np.array([1, 2.5]) - - ts = sim_tree_seq() - - count = 0 - - prod = itertools.product(alpha_array, scale_array) - for element in tqdm(prod, total=6): - alpha = element[0] - scale = element[1] - const0, const1 = sim_tree_seq_freq(alpha) - model = tstrait.trait_model( - distribution="exponential", scale=scale, negative=False - ) - effect_size0 = np.zeros(1000) - effect_size1 = np.zeros(1000) - for i in range(1000): - sim_result = tstrait.sim_trait( - ts=ts, - num_causal=2, - model=model, - alpha=alpha, - random_seed=i + 1000 * count, - ) - effect_size0[i] = sim_result["effect_size"][0] - effect_size1[i] = sim_result["effect_size"][1] - self.plot_qq_exponential( - data=effect_size0, - scale=scale / 2 * const0, - filename=f"effect_size_0_{count}", - title=f"EffectSize0, Exponential, alpha = {alpha}, " - f"scale = {scale}, negative = False", - ) - self.plot_qq_exponential( - data=effect_size1, - scale=scale / 2 * const1, - filename=f"effect_size_1_{count}", - title=f"EffectSize1, Expnential, alpha = {alpha}, " - f"scale = {scale}, negative = False", - ) - count += 1 - - def test_exponential_negative(self): - - alpha_array = np.array([0, -0.3, 1]) - scale_array = np.array([1, 2.5]) - - ts = sim_tree_seq() - - count = 0 - - prod = itertools.product(alpha_array, scale_array) - for element in tqdm(prod, total=6): - alpha = element[0] - scale = element[1] - const0, const1 = sim_tree_seq_freq(alpha) - model = tstrait.trait_model( - distribution="exponential", scale=scale, negative=True - ) - effect_size0 = np.zeros(1000) - effect_size1 = np.zeros(1000) - for i in range(1000): - sim_result = tstrait.sim_trait( - ts=ts, - num_causal=2, - model=model, - alpha=alpha, - random_seed=i + 1000 * count, - ) - effect_size0[i] = sim_result["effect_size"][0] - effect_size1[i] = sim_result["effect_size"][1] - rng = np.random.default_rng(count + 1) - exponential = rng.exponential(scale=scale / 2, size=1000) - exponential *= rng.choice([-1, 1], size=1000) - - self.plot_qq_compare( - v1=effect_size0, - v2=exponential * const0, - x_label="Simulated Effect Size", - y_label="Exponential Distribution", - filename=f"effect_size_0_negative_{count}", - title=f"EffectSize0, Exponential, alpha = {alpha}, " - f"scale = {scale}, negative = True", - ) - self.plot_qq_compare( - v1=effect_size1, - v2=exponential * const1, - x_label="Simulated Effect Size", - y_label="Exponential Distribution", - filename=f"effect_size_1_negative_{count}", - title=f"EffectSize1, Exponential, alpha = {alpha}, " - f"scale = {scale}, negative = True", - ) - - count += 1 - - -class TestT(Test): - def test_t(self): - - alpha_array = np.array([0, -0.3, 1]) - mean_array = np.array([0, 1]) - var_array = np.array([1, 2]) - df_array = np.array([10, 13.5]) - - ts = sim_tree_seq() - - count = 0 - - prod = itertools.product(alpha_array, mean_array, var_array, df_array) - for element in tqdm(prod, total=24): - alpha = element[0] - mean = element[1] - var = element[2] - df = element[3] - sd = np.sqrt(var) - const0, const1 = sim_tree_seq_freq(alpha) - model = tstrait.trait_model(distribution="t", mean=mean, var=var, df=df) - effect_size0 = np.zeros(1000) - effect_size1 = np.zeros(1000) - for i in range(1000): - sim_result = tstrait.sim_trait( - ts=ts, - num_causal=2, - model=model, - alpha=alpha, - random_seed=i + 1000 * count, - ) - effect_size0[i] = sim_result["effect_size"][0] - effect_size1[i] = sim_result["effect_size"][1] - rng = np.random.default_rng(count + 1) - simulated_t = rng.standard_t(df, size=1000) - simulated_t = simulated_t * sd + mean - simulated_t /= 2 - - self.plot_qq_compare( - v1=effect_size0, - v2=simulated_t * const0, - x_label="Simulated Effect Size", - y_label="T Distribution", - filename=f"effect_size_0_{count}", - title=f"EffectSize0, T, alpha = {alpha}, mean = {mean}, " - f"var = {var}, df = {df}", - ) - self.plot_qq_compare( - v1=effect_size1, - v2=simulated_t * const1, - x_label="Simulated Effect Size", - y_label="T Distribution", - filename=f"effect_size_1_{count}", - title=f"EffectSize1, T, alpha = {alpha}, mean = {mean}, " - f"var = {var}, df = {df}", - ) - count += 1 - - -class TestGamma(Test): - def test_gamma(self): - - alpha_array = np.array([0, -0.3, 1]) - shape_array = np.array([1, 3.5]) - scale_array = np.array([1, 2.5]) - - ts = sim_tree_seq() - - count = 0 - - prod = itertools.product(alpha_array, shape_array, scale_array) - for element in tqdm(prod, total=12): - alpha = element[0] - shape = element[1] - scale = element[2] - const0, const1 = sim_tree_seq_freq(alpha) - model = tstrait.trait_model( - distribution="gamma", shape=shape, scale=scale, negative=False - ) - effect_size0 = np.zeros(1000) - effect_size1 = np.zeros(1000) - for i in range(1000): - sim_result = tstrait.sim_trait( - ts=ts, - num_causal=2, - model=model, - alpha=alpha, - random_seed=i + 1000 * count, - ) - effect_size0[i] = sim_result["effect_size"][0] - effect_size1[i] = sim_result["effect_size"][1] - - rng = np.random.default_rng(count + 1) - gamma = rng.gamma(shape=shape, scale=scale, size=1000) - gamma /= 2 - - self.plot_qq_compare( - v1=effect_size0, - v2=gamma * const0, - x_label="Simulated Effect Size", - y_label="Gamma Distribution", - filename=f"effect_size_0_{count}", - title=f"EffectSize0, Gamma, alpha = {alpha}, " - f"shape = {shape}, scale = {scale}, negative = False", - ) - self.plot_qq_compare( - v1=effect_size1, - v2=gamma * const1, - x_label="Simulated Effect Size", - y_label="Gamma Distribution", - filename=f"effect_size_1_{count}", - title=f"EffectSize1, Gamma, alpha = {alpha}, " - f"shape = {shape}, scale = {scale}, negative = False", - ) - count += 1 - - def test_gamma_negative(self): - - alpha_array = np.array([0, -0.3, 1]) - shape_array = np.array([1, 3.5]) - scale_array = np.array([1, 2.5]) - - ts = sim_tree_seq() - - count = 0 - - prod = itertools.product(alpha_array, shape_array, scale_array) - for element in tqdm(prod, total=12): - alpha = element[0] - shape = element[1] - scale = element[2] - const0, const1 = sim_tree_seq_freq(alpha) - model = tstrait.trait_model( - distribution="gamma", shape=shape, scale=scale, negative=True - ) - effect_size0 = np.zeros(1000) - effect_size1 = np.zeros(1000) - for i in range(1000): - sim_result = tstrait.sim_trait( - ts=ts, - num_causal=2, - model=model, - alpha=alpha, - random_seed=i + 1000 * count, - ) - effect_size0[i] = sim_result["effect_size"][0] - effect_size1[i] = sim_result["effect_size"][1] - - rng = np.random.default_rng(count + 1) - gamma = rng.gamma(shape=shape, scale=scale, size=1000) - gamma /= 2 - gamma *= rng.choice([-1, 1], size=1000) - - self.plot_qq_compare( - v1=effect_size0, - v2=gamma * const0, - x_label="Simulated Effect Size", - y_label="Gamma Distribution", - filename=f"effect_size_0_negative_{count}", - title=f"EffectSize0, Gamma, alpha = {alpha}, " - f"shape = {shape}, scale = {scale}, negative = True", - ) - self.plot_qq_compare( - v1=effect_size1, - v2=gamma * const1, - x_label="Simulated Effect Size", - y_label="Gamma Distribution", - filename=f"effect_size_1_negative_{count}", - title=f"EffectSize1, Gamma, alpha = {alpha}, " - f"shape = {shape}, scale = {scale}, negative = True", - ) - count += 1 - - -def run_tests(suite, output_dir): - print(f"[+] Test suite contains {len(suite)} tests.") - for cl_name in suite: - instance = getattr(sys.modules[__name__], cl_name)(output_dir, cl_name) - instance._run_tests() - - -if __name__ == "__main__": - run_tests( - ["TestNormal", "TestExponential", "TestT", "TestGamma"], - "_output/stats_tests_output", - ) diff --git a/tests/test_base.py b/tests/test_base.py index a84d537..6eed76f 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -31,7 +31,7 @@ class TestVal: ) def test_val(self, n): n = _check_val(n, "n") - assert type(n) == float + assert isinstance(n, float) @pytest.mark.parametrize("n", [4j, np.array([4]), "1"]) def test_val_bad(self, n): diff --git a/tests/test_genetic_value.py b/tests/test_genetic_value.py index 685adb7..ed3233c 100644 --- a/tests/test_genetic_value.py +++ b/tests/test_genetic_value.py @@ -96,6 +96,7 @@ def check_dimensions(self, result, nrow, input_df): "effect_size", "trait_id", "causal_state", + "allele_frequency", ] assert len(input_df) == len(effect_size_df) @@ -193,7 +194,9 @@ def test_binary_tree(self): ts = binary_tree() tree = ts.first() - genetic = tstrait.GeneticValue(ts, trait_df, alpha=-1, random_seed=1) + genetic = tstrait.genetic_value._GeneticValue( + ts, trait_df, alpha=-1, random_seed=1 + ) g0 = genetic._individual_genotype(tree, ts.site(0), "T", ts.num_nodes) g1 = genetic._individual_genotype(tree, ts.site(1), "T", ts.num_nodes) g2 = genetic._individual_genotype(tree, ts.site(2), "C", ts.num_nodes) @@ -225,6 +228,7 @@ def test_binary_tree(self): "effect_size": [first, second], "trait_id": [0, 0], "causal_state": ["T", "C"], + "allele_frequency": [1 / 2, 1 / 4], } ) genetic_df = pd.DataFrame( @@ -253,7 +257,9 @@ def test_diff_ind_tree(self): ts = diff_ind_tree() tree = ts.first() - genetic = tstrait.GeneticValue(ts, trait_df, alpha=0, random_seed=1) + genetic = tstrait.genetic_value._GeneticValue( + ts, trait_df, alpha=0, random_seed=1 + ) g0 = genetic._individual_genotype(tree, ts.site(0), "T", ts.num_nodes) g1 = genetic._individual_genotype(tree, ts.site(1), "T", ts.num_nodes) g2 = genetic._individual_genotype(tree, ts.site(2), "C", ts.num_nodes) @@ -285,6 +291,7 @@ def test_diff_ind_tree(self): "effect_size": [first, second], "trait_id": [0, 0], "causal_state": ["T", "C"], + "allele_frequency": [1 / 2, 1 / 4], } ) genetic_df = pd.DataFrame( @@ -313,7 +320,9 @@ def test_non_binary_tree(self): ts = non_binary_tree() tree = ts.first() - genetic = tstrait.GeneticValue(ts, trait_df, alpha=-1, random_seed=0) + genetic = tstrait.genetic_value._GeneticValue( + ts, trait_df, alpha=-1, random_seed=0 + ) g0 = genetic._individual_genotype(tree, ts.site(0), "T", ts.num_nodes) g1 = genetic._individual_genotype(tree, ts.site(1), "C", ts.num_nodes) @@ -335,6 +344,7 @@ def test_non_binary_tree(self): "effect_size": [freqdep(-1, 1 / 2)], "trait_id": [0], "causal_state": ["T"], + "allele_frequency": [1 / 2], } ) genetic_df = pd.DataFrame( @@ -363,7 +373,9 @@ def test_triploid(self, sample_df): ts = triploid_tree() tree = ts.first() - genetic = tstrait.GeneticValue(ts, sample_df, alpha=-1, random_seed=1) + genetic = tstrait.genetic_value._GeneticValue( + ts, sample_df, alpha=-1, random_seed=1 + ) g0 = genetic._individual_genotype(tree, ts.site(0), "T", ts.num_nodes) g1 = genetic._individual_genotype(tree, ts.site(1), "C", ts.num_nodes) @@ -385,6 +397,7 @@ def test_triploid(self, sample_df): "effect_size": [freqdep(-1, 1 / 2)], "trait_id": [0], "causal_state": ["T"], + "allele_frequency": [1 / 2], } ) genetic_df = pd.DataFrame( @@ -420,7 +433,13 @@ def test_allele_freq_one(self, sample_trait_model): ts=ts, trait_df=trait_df, alpha=-1, random_seed=2 ) effect_size_df = pd.DataFrame( - {"site_id": [4], "effect_size": [0], "trait_id": [0], "causal_state": ["A"]} + { + "site_id": [4], + "effect_size": [0], + "trait_id": [0], + "causal_state": ["A"], + "allele_frequency": [1], + } ) genetic_df = pd.DataFrame( { @@ -463,6 +482,7 @@ def test_tree_seq(self): "effect_size": [first, second], "trait_id": [0, 0], "causal_state": ["T", "C"], + "allele_frequency": [3 / 4, 1 / 2], } ) genetic_df = pd.DataFrame( @@ -502,6 +522,7 @@ def test_tree_seq_multiple_trait(self): "effect_size": [first, first * 2, second, second * 2], "trait_id": np.tile([0, 1], 2), "causal_state": np.repeat(["T", "C"], 2), + "allele_frequency": np.repeat([3 / 4, 1 / 2], 2), } ) genetic_df = pd.DataFrame( diff --git a/tstrait/__init__.py b/tstrait/__init__.py index fb1f3a7..8dccf45 100644 --- a/tstrait/__init__.py +++ b/tstrait/__init__.py @@ -9,7 +9,6 @@ from .provenance import __version__ # NOQA from .simulate_effect_size import ( sim_trait, - TraitSimulator, ) # noreorder from .simulate_phenotype import ( PhenotypeResult, @@ -28,17 +27,14 @@ from .genetic_value import ( sim_genetic, GeneticResult, - GeneticValue, ) # noreorder from .simulate_environment import ( sim_env, - EnvSimulator, ) # noreorder __all__ = [ "__version__", "sim_trait", - "TraitSimulator", "PhenotypeResult", "sim_phenotype", "trait_model", @@ -51,7 +47,5 @@ "TraitModelT", "sim_genetic", "GeneticResult", - "GeneticValue", "sim_env", - "EnvSimulator", ] diff --git a/tstrait/genetic_value.py b/tstrait/genetic_value.py index 1c5d021..7ce5d2f 100644 --- a/tstrait/genetic_value.py +++ b/tstrait/genetic_value.py @@ -14,7 +14,7 @@ class GeneticResult: """ Data class that contains effect size and genetic value dataframe. - Parameters + Attributes ---------- effect_size : pandas.DataFrame Dataframe that includes simulated effect sizes. @@ -70,7 +70,7 @@ def _traversal_genotype( return genotype -class GeneticValue: +class _GeneticValue: """GeneticValue class to compute genetic values of individuals. Parameters @@ -150,7 +150,7 @@ def _individual_genotype(self, tree, site, causal_state, num_nodes): return genotype - def compute_genetic_value(self): + def _compute_genetic_value(self): """Computes genetic values of individuals. Returns @@ -165,6 +165,7 @@ def compute_genetic_value(self): genetic_val_array = np.zeros((num_trait, num_ind)) causal_state_array = np.zeros(len(self.trait_df), dtype=object) freq_dep = np.zeros(len(self.trait_df)) + allele_freq_array = np.zeros(len(self.trait_df)) num_nodes = self.ts.num_nodes tree = tskit.Tree(self.ts) @@ -176,6 +177,7 @@ def compute_genetic_value(self): causal_state_array[i] = causal_state allele_freq = counts[causal_state] / num_samples freq_dep[i] = self._frequency_dependence(allele_freq) + allele_freq_array[i] = allele_freq individual_genotype = self._individual_genotype( tree=tree, @@ -197,6 +199,7 @@ def compute_genetic_value(self): self.trait_df["effect_size"] = np.multiply(self.trait_df.effect_size, freq_dep) self.trait_df["causal_state"] = causal_state_array + self.trait_df["allele_frequency"] = allele_freq_array genetic_result = GeneticResult(effect_size=self.trait_df, genetic=df) @@ -248,11 +251,11 @@ def sim_genetic(ts, trait_df, alpha=0, random_seed=None): 2. Data requirements - * Site IDs in **site_id** column must be sorted in an ascending order. Please refer - to :func:`pandas.DataFrame.sort_values` for details on sorting values in a - :class:`pandas.DataFrame`. + * Site IDs in **site_id** column must be sorted in an ascending order. Please + refer to :py:meth:`pandas.DataFrame.sort_values` for details on sorting + values in a :class:`pandas.DataFrame`. - * Trait IDs in **trait_id** column must start from zero and be consecutive. + * Trait IDs in **trait_id** column must start from zero and be consecutive. The simulation outputs of effect sizes and phenotypes are given as a :py:class:`pandas.DataFrame`. @@ -261,9 +264,10 @@ def sim_genetic(ts, trait_df, alpha=0, random_seed=None): resulting object and contains the following columns: * **site_id**: ID of sites that have causal mutation. - * **causal_state**: Causal state. * **effect_size**: Simulated effect size of causal mutation. * **trait_id**: Trait ID. + * **causal_state**: Causal state. + * **allele_frequency**: Allele frequency of causal mutation. The genetic value dataframe can be extracted by using ``.genetic`` in the resulting object and contains the following columns: @@ -288,10 +292,10 @@ def sim_genetic(ts, trait_df, alpha=0, random_seed=None): if np.min(trait_id) != 0 or np.max(trait_id) != len(trait_id) - 1: raise ValueError("trait_id must be consecutive and start from 0") - genetic = GeneticValue( + genetic = _GeneticValue( ts=ts, trait_df=trait_df, alpha=alpha, random_seed=random_seed ) - genetic_result = genetic.compute_genetic_value() + genetic_result = genetic._compute_genetic_value() return genetic_result diff --git a/tstrait/simulate_effect_size.py b/tstrait/simulate_effect_size.py index fd07cb6..fcc9c5c 100644 --- a/tstrait/simulate_effect_size.py +++ b/tstrait/simulate_effect_size.py @@ -6,7 +6,7 @@ from .base import _check_instance -class TraitSimulator: +class _TraitSimulator: """Simulator class to select causal alleles and simulate effect sizes of causal mutations. @@ -42,7 +42,7 @@ def _choose_causal_site(self): return site_id - def sim_causal_mutation(self): + def _sim_causal_mutation(self): """This method randomly chooses causal sites and the corresponding causal state based on the `num_causal` input. Afterwards, effect size of each causal site is simulated based on the trait model given by the `model` input. @@ -105,8 +105,8 @@ def sim_trait(ts, num_causal, model, random_seed=None): sim_genetic : The trait dataframe output can be used as an input to simulate genetic values. - Note - ---- + Notes + ----- The simulation output is given as a :py:class:`pandas.DataFrame` and contains the following columns: @@ -128,12 +128,12 @@ def sim_trait(ts, num_causal, model, random_seed=None): "num_causal must be an integer not greater than the number of sites in ts" ) - simulator = TraitSimulator( + simulator = _TraitSimulator( ts=ts, num_causal=num_causal, model=model, random_seed=random_seed, ) - trait_df = simulator.sim_causal_mutation() + trait_df = simulator._sim_causal_mutation() return trait_df diff --git a/tstrait/simulate_environment.py b/tstrait/simulate_environment.py index f71c9af..2db7333 100644 --- a/tstrait/simulate_environment.py +++ b/tstrait/simulate_environment.py @@ -5,7 +5,7 @@ from .base import _check_dataframe -class EnvSimulator: +class _EnvSimulator: """Simulator class to simulate environmental noise of individuals. Parameters @@ -33,7 +33,7 @@ def _sim_env(self, var, h2): return env_noise - def sim_environment(self): + def _sim_environment(self): """Simulate environmental values based on genetic values of individuals and narrow-sense heritability """ @@ -59,8 +59,8 @@ def sim_env(genetic_df, h2, random_seed=None): genetic_df : pandas.DataFrame Genetic value dataframe. h2 : float or array-like - Narrow-sense heritability. The dimension of `h2` must match the number of traits - to be simulated. + Narrow-sense heritability. When it is 0, environmental noise will be a vector of + zeros. The dimension of `h2` must match the number of traits to be simulated. random_seed : int, default None Random seed of simulation. If None, simulation will be conducted randomly. @@ -105,7 +105,7 @@ def sim_env(genetic_df, h2, random_seed=None): Examples -------- - See :ref:`env_simulation` for worked examples. + See :ref:`environment` for worked examples. """ genetic_df = _check_dataframe( genetic_df, ["trait_id", "individual_id", "genetic_value"], "genetic_df" @@ -125,12 +125,12 @@ def sim_env(genetic_df, h2, random_seed=None): if np.min(h2) <= 0 or np.max(h2) > 1: raise ValueError("Narrow-sense heritability must be 0 < h2 <= 1") - simulator = EnvSimulator( + simulator = _EnvSimulator( genetic_df=genetic_df, h2=h2, random_seed=random_seed, ) - phenotype_df = simulator.sim_environment() + phenotype_df = simulator._sim_environment() return phenotype_df diff --git a/tstrait/simulate_phenotype.py b/tstrait/simulate_phenotype.py index 0def1f9..6faf166 100644 --- a/tstrait/simulate_phenotype.py +++ b/tstrait/simulate_phenotype.py @@ -9,7 +9,7 @@ class PhenotypeResult: """ Dataclass that contains effect size dataframe and phenotype dataframe. - Parameters + Attributes ---------- effect_size : pandas.DataFrame Dataframe that includes simulated effect sizes. @@ -45,8 +45,8 @@ def sim_phenotype(ts, num_causal, model, h2, alpha=0, random_seed=None): model : tstrait.TraitModel Trait model that will be used to simulate effect sizes. h2 : float or array-like - Narrow-sense heritability. The dimension of `h2` must match the number of - traits to be simulated. + Narrow-sense heritability. When it is 0, environmental noise will be a vector of + zeros. The dimension of `h2` must match the number of traits to be simulated. alpha : float, default 0 Parameter that determines the degree of the frequency dependence model. Please see :ref:`frequency_dependence` for details on how this parameter influences @@ -79,10 +79,11 @@ def sim_phenotype(ts, num_causal, model, h2, alpha=0, random_seed=None): The effect size dataframe can be extracted by using ``.effect_size`` in the resulting object and contains the following columns: - * **site_id**: Site IDs that have causal mutation. + * **site_id**: ID of sites that have causal mutation + * **effect_size**: Genetic effect size of causal mutation + * **trait_id**: Trait ID and will be used in multi-trait simulation. * **causal_state**: Causal state. - * **effect_size**: Simulated effect size of causal mutation. - * **trait_id**: Trait ID. + * **allele_frequency**: Allele frequency of causal mutation. The phenotype dataframe can be extracted by using ``.phenotype`` in the resulting object and contains the following columns: diff --git a/tstrait/trait_model.py b/tstrait/trait_model.py index 6669c83..c172119 100644 --- a/tstrait/trait_model.py +++ b/tstrait/trait_model.py @@ -18,6 +18,8 @@ class TraitModel(metaclass=ABCMeta): ---------- name : str Name of the trait model. + num_trait : int + Number of traits to be simulated. See Also -------- @@ -30,8 +32,8 @@ class TraitModel(metaclass=ABCMeta): TraitModelMultivariateNormal : Return a multivariate normal distribution trait model. - Note - ---- + Notes + ----- This is the base class for all trait models in tstrait. All trait models should set all parameters in their ``__init__`` as arguments. """ @@ -72,13 +74,13 @@ class TraitModelNormal(TraitModel): trait_model : Construct a trait model. numpy.random.Generator.normal : Details on the input parameters and distribution. - Note - ---- - This is a trait model built on top of :py:func:`numpy.random.Generator.normal`, so + Notes + ----- + This is a trait model built on top of :py:meth:`numpy.random.Generator.normal`, so please see its documentation for the details of the normal distribution simulation. - Example - ------- + Examples + -------- Please see the docstring example of :func:`trait_model` for constructing a normal distribution trait model. """ @@ -134,16 +136,16 @@ class TraitModelExponential(TraitModel): -------- trait_model : Construct a trait model. numpy.random.Generator.exponential : Details on the input parameters and - distribution. + distribution. - Note - ---- - This is a trait model built on top of :py:func:`numpy.random.Generator.exponential`, + Notes + ----- + This is a trait model built on top of :py:meth:`numpy.random.Generator.exponential`, so please see its documentation for the details of the exponential distribution simulation. - Example - ------- + Examples + -------- Please see the docstring example of :func:`trait_model` for constructing an exponential distribution trait model. """ @@ -194,13 +196,13 @@ class TraitModelFixed(TraitModel): -------- trait_model : Construct a trait model. - Note - ---- + Notes + ----- This is a trait model that only gives the fixed value that is specified in `value`. - Example - ------- + Examples + -------- Please see the docstring example of :func:`trait_model` for constructing a fixed value trait model. """ @@ -253,14 +255,14 @@ class TraitModelT(TraitModel): trait_model : Construct a trait model. numpy.random.Generator.standard_t : Details on the input parameters and distribution. - Note - ---- - This is a trait model built on top of :py:func:`numpy.random.Generator.standard_t`, + Notes + ----- + This is a trait model built on top of :py:meth:`numpy.random.Generator.standard_t`, so please see its documentation for the details of the normal distribution simulation. - Example - ------- + Examples + -------- Please see the docstring example of :func:`trait_model` for constructing a student's t distribution trait model. """ @@ -318,14 +320,14 @@ class TraitModelGamma(TraitModel): trait_model : Construct a trait model. numpy.random.Generator.gamma : Details on the input parameters and distribution. - Note - ---- - This is a trait model built on top of :py:func:`numpy.random.Generator.gamma`, + Notes + ----- + This is a trait model built on top of :py:meth:`numpy.random.Generator.gamma`, so please see its documentation for the details of the gamma distribution simulation. - Example - ------- + Examples + -------- Please see the docstring example of :func:`trait_model` for constructing an gamma distribution trait model. """ @@ -382,17 +384,18 @@ class TraitModelMultivariateNormal(TraitModel): numpy.random.Generator.multivariate_normal : Details on the input parameters and distribution. - Note - ---- + Notes + ----- Multivariate normal distribution simulation is used in multi-trait simulation, which is described in :ref:`multi_trait`. This is a trait model built on top of - :py:func:`random.Generator.multivariate_normal`, so please see its documentation - for the details of the multivariate normal distribution simulation. + :py:meth:`numpy.random.Generator.multivariate_normal`, so please see its + documentation for the details of the multivariate normal distribution + simulation. - Example - ------- + Examples + -------- Please see the docstring example of :func:`trait_model` for constructing a multivariate normal distribution trait model. """