Skip to content

Commit

Permalink
Merge branch 'update_sourmash_latest' into ctb_misc2
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Oct 12, 2024
2 parents 4f5fefd + 4778862 commit bd18277
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 6 deletions.
5 changes: 2 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ crate-type = ["cdylib"]
pyo3 = { version = "0.22.3", features = ["extension-module", "anyhow"] }
rayon = "1.10.0"
serde = { version = "1.0.210", features = ["derive"] }
#sourmash = { version = "0.15.2", features = ["branchwater"] }
sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "refactor_rs_downsample", features = ["branchwater"] }
serde_json = "1.0.128"
sourmash = { version = "0.15.2", features = ["branchwater"] }
niffler = "2.4.0"
Expand Down
1 change: 1 addition & 0 deletions src/manysearch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ fn downsample_and_inflate_abundances(
// avoid downsampling if we can
if against_scaled != query_scaled {
let against_ds = against
.clone()
.downsample_scaled(query.scaled())
.expect("cannot downsample sketch");
(abunds, sum_weighted) = query.inflated_abundances(&against_ds)?;
Expand Down
4 changes: 2 additions & 2 deletions src/python/tests/test_fastmultigather.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,15 +992,15 @@ def test_indexed_full_output(runtmp):
# check a few columns
average_ani = set(df['average_containment_ani'])
avg_ani = set([round(x, 4) for x in average_ani])
assert avg_ani == {0.8502, 0.8584, 0.8602}
assert avg_ani == {0.9221, 0.9306, 0.9316} # @CTB check against py gather

f_unique_weighted = set(df['f_unique_weighted'])
f_unique_weighted = set([round(x, 4) for x in f_unique_weighted])
assert f_unique_weighted == {0.0063, 0.002, 0.0062}

unique_intersect_bp = set(df['unique_intersect_bp'])
unique_intersect_bp = set([round(x,4) for x in unique_intersect_bp])
assert unique_intersect_bp == {44000, 18000, 22000}
assert unique_intersect_bp == {4400000, 1800000, 2200000}


def test_nonindexed_full_vs_sourmash_gather(runtmp):
Expand Down
5 changes: 4 additions & 1 deletion src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,10 @@ pub fn load_sketches_above_threshold(
if let Ok(against_sig) = coll.sig_from_record(against_record) {
if let Some(against_mh) = against_sig.minhash() {
// downsample against_mh, but keep original md5sum
let against_mh_ds = against_mh.downsample_scaled(query.scaled()).unwrap();
let against_mh_ds = against_mh
.clone()
.downsample_scaled(query.scaled())
.unwrap();
if let Ok(overlap) = against_mh_ds.count_common(query, false) {
if overlap >= threshold_hashes {
let result = PrefetchResult {
Expand Down

0 comments on commit bd18277

Please sign in to comment.