diff --git a/sourmash/commands.py b/sourmash/commands.py index 1969d3491f..91479ac55a 100644 --- a/sourmash/commands.py +++ b/sourmash/commands.py @@ -694,22 +694,13 @@ def gather(args): sig.save_signatures([ r.match for r in found ], fp) if args.output_unassigned: - if not len(query.minhash): - notify('no unassigned hashes! not saving.') + if not len(next_query.minhash): + notify('no unassigned hashes to save with --output-unassigned!') else: notify('saving unassigned hashes to "{}"', args.output_unassigned) - with_abundance = next_query.minhash.track_abundance - e = MinHash(ksize=query.minhash.ksize, n=0, max_hash=new_max_hash, - track_abundance=with_abundance) - if with_abundance: - abunds = next_query.minhash.hashes - e.set_abundances(abunds) - else: - e.add_many(next_query.minhash.hashes) - with FileOutput(args.output_unassigned, 'wt') as fp: - sig.save_signatures([ sig.SourmashSignature(e) ], fp) + sig.save_signatures([ next_query ], fp) def multigather(args): diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index 22d53040b8..acab15e51f 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -3216,6 +3216,35 @@ def test_gather_metagenome_output_unassigned(): 'NC_011294.1' in out)) +def test_gather_metagenome_output_unassigned_none(): + # test what happens when there's nothing unassigned to output + with utils.TempDirectory() as location: + testdata_glob = utils.get_test_data('gather/GCF_*.sig') + testdata_sigs = glob.glob(testdata_glob) + + query_sig = utils.get_test_data('gather/combined.sig') + + cmd = 'gather {} {} -k 21'.format(query_sig, " ".join(testdata_sigs)) + cmd += ' --output-unassigned=unassigned.sig' + cmd += ' --threshold=0' + status, out, err = utils.runscript('sourmash', cmd.split(' '), + in_directory=location) + + print(out) + print(err) + + assert 'found 12 matches total' in out + assert 'the recovered matches hit 100.0% of the query' in out + assert all(('4.9 Mbp 33.2% 100.0%' in out, + 'NC_003198.1 Salmonella enterica subsp...' in out)) + assert all(('4.5 Mbp 0.1% 0.4%' in out, + 'NC_004631.1 Salmonella enterica subsp...' in out)) + + # now examine unassigned + assert not os.path.exists(os.path.join(location, 'unassigned.sig')) + assert 'no unassigned hashes to save with --output-unassigned!' in err + + @utils.in_tempdir def test_gather_metagenome_output_unassigned_nomatches(c): # test --output-unassigned when there are no matches @@ -3234,6 +3263,28 @@ def test_gather_metagenome_output_unassigned_nomatches(c): assert x.minhash == y.minhash +@utils.in_tempdir +def test_gather_metagenome_output_unassigned_nomatches_protein(c): + # test --output-unassigned with protein signatures + query_sig = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') + against_sig = utils.get_test_data('prot/protein/GCA_001593935.1_ASM159393v1_protein.faa.gz.sig') + + c.run_sourmash('gather', query_sig, against_sig, + '--output-unassigned', 'foo.sig') + + print(c.last_result.out) + assert 'found 0 matches total;' in c.last_result.out + + c.run_sourmash('sig', 'describe', c.output('foo.sig')) + print(c.last_result.out) + + x = sourmash.load_one_signature(query_sig, ksize=57) + y = sourmash.load_one_signature(c.output('foo.sig')) + + assert x.minhash == y.minhash + assert y.minhash.moltype == "protein" + + def test_gather_metagenome_downsample(): # downsample w/scaled of 100,000 with utils.TempDirectory() as location: