Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sample mask #896

Merged
merged 11 commits into from
May 15, 2024
57 changes: 16 additions & 41 deletions tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import msprime
import pytest
from test_sgkit import make_ts_and_zarr
import tsutil

import tsinfer

Expand Down Expand Up @@ -43,48 +43,23 @@ def test_dask_match_ancestors(tmpdir):


@pytest.mark.skipif(sys.platform == "win32", reason="No cyvcf2 on Windows")
def test_dask_match_samples(tmp_path):
def test_dask_match_with_mask(tmp_path, tmpdir):
from dask.distributed import Client
from dask.distributed import LocalCluster

cluster = LocalCluster(processes=False, threads_per_worker=1, n_workers=2)
client = Client(cluster) # noqa F841
ts, zarr_path = make_ts_and_zarr(tmp_path)
sd = tsinfer.SgkitSampleData(zarr_path)
anc = tsinfer.generate_ancestors(sd, path=str(tmp_path / "anc"), num_threads=2)
anc_ts_dask = tsinfer.match_ancestors(
sd,
anc,
recombination_rate=2e-8,
precision=13,
path_compression=True,
num_threads=1,
use_dask=True,
)
anc_ts = tsinfer.match_ancestors(
sd,
anc,
recombination_rate=2e-8,
precision=13,
path_compression=True,
num_threads=1,
)
anc_ts.tables.assert_equals(anc_ts_dask.tables, ignore_provenance=True)
inf_ts_dask = tsinfer.match_samples(
sd,
anc_ts,
recombination_rate=2e-8,
precision=13,
path_compression=True,
num_threads=1,
use_dask=True,
)
inf_ts = tsinfer.match_samples(
sd,
anc_ts,
recombination_rate=2e-8,
precision=13,
path_compression=True,
num_threads=1,
)
inf_ts.tables.assert_equals(inf_ts_dask.tables, ignore_provenance=True)
(
mat_sd,
mask_sd,
samples_mask,
variant_mask,
) = tsutil.make_materialized_and_masked_sampledata(tmp_path, tmpdir)
mat_anc = tsinfer.generate_ancestors(mat_sd, path=str(tmp_path / "mat_anc"))
mask_anc = tsinfer.generate_ancestors(mask_sd, path=str(tmp_path / "mask_anc"))
mat_anc_ts = tsinfer.match_ancestors(mat_sd, mat_anc, use_dask=True)
mask_anc_ts = tsinfer.match_ancestors(mask_sd, mask_anc, use_dask=True)
mat_ts = tsinfer.match_samples(mat_sd, mat_anc_ts)
mask_ts = tsinfer.match_samples(mask_sd, mask_anc_ts)

mat_ts.tables.assert_equals(mask_ts.tables, ignore_provenance=True)
Loading
Loading