Skip to content

Commit

Permalink
Add uchime2_denovo
Browse files Browse the repository at this point in the history
  • Loading branch information
colinbrislawn committed Sep 29, 2024
1 parent 475bfd2 commit 31dcbc3
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 0 deletions.
32 changes: 32 additions & 0 deletions q2_vsearch/_chimera.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,35 @@ def _uchime_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn):
run_command(cmd)

return cmd, chimeras, nonchimeras, uchime_stats


def uchime2_denovo(sequences: DNAFASTAFormat,
table: biom.Table,
dn: float = _uchime_defaults['dn'],
xn: float = _uchime_defaults['xn']) \
-> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt):
cmd, chimeras, nonchimeras, uchime_stats = \
_uchime2_denovo(sequences, table, dn, xn)
return chimeras, nonchimeras, uchime_stats


def _uchime2_denovo(sequences, table, dn, xn):
# this function only exists to simplify testing
chimeras = DNAFASTAFormat()
nonchimeras = DNAFASTAFormat()
uchime_stats = UchimeStatsFmt()
with tempfile.NamedTemporaryFile() as fasta_with_sizes:
_fasta_with_sizes(str(sequences), fasta_with_sizes.name, table)
cmd = ['vsearch',
'--uchime2_denovo', fasta_with_sizes.name,
'--uchimeout', str(uchime_stats),
'--nonchimeras', str(nonchimeras),
'--chimeras', str(chimeras),
'--dn', str(dn),
'--xn', str(xn),
'--qmask', 'none', # ensures no lowercase DNA chars
'--xsize',
'--fasta_width', '0']
run_command(cmd)

return cmd, chimeras, nonchimeras, uchime_stats
99 changes: 99 additions & 0 deletions q2_vsearch/tests/test_chimera.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from qiime2.util import redirected_stdio
from q2_types.feature_data import DNAFASTAFormat
from q2_vsearch._chimera import (uchime_denovo, _uchime_denovo,
uchime2_denovo, _uchime2_denovo,
uchime_ref, _uchime_ref)
from q2_vsearch._format import UchimeStatsFmt
from .test_cluster_features import _read_seqs
Expand Down Expand Up @@ -114,6 +115,104 @@ def test_uchime_denovo_no_chimeras_alt_params(self):
self.assertTrue('--xn 9.0' in cmd)


class Uchime2DenovoTests(TestPluginBase):

package = 'q2_vsearch.tests'

def setUp(self):
super().setUp()
input_sequences_fp = self.get_data_path('dna-sequences-3.fasta')
self.input_sequences = DNAFASTAFormat(input_sequences_fp, mode='r')
self.input_sequences_list = _read_seqs(self.input_sequences)

self.input_table = biom.Table(np.array([[100, 101, 103],
[99, 98, 99],
[4, 5, 6],
[2, 2, 2]]),
['feature1', 'feature2', 'feature3',
'feature4'],
['sample1', 'sample2', 'sample3'])

def test_uchime2_denovo(self):
with redirected_stdio(stderr=os.devnull):
chime, nonchime, stats = uchime2_denovo(
sequences=self.input_sequences, table=self.input_table)

obs_chime = _read_seqs(chime)
# >feature4 is the chimera!
exp_chime = [self.input_sequences_list[3]]
self.assertEqual(obs_chime, exp_chime)

# sequences are reverse-sorted by abundance in output
obs_nonchime = _read_seqs(nonchime)
exp_nonchime = [self.input_sequences_list[0],
self.input_sequences_list[1],
self.input_sequences_list[2]]
# Note how >feature4 is gone!
self.assertEqual(obs_nonchime, exp_nonchime)

with stats.open() as stats_fh:
stats_text = stats_fh.read()
self.assertTrue('feature1' in stats_text)
self.assertTrue('feature2' in stats_text)
self.assertTrue('feature3' in stats_text)
self.assertTrue('feature4' in stats_text)
stats_lines = [e for e in stats_text.split('\n')
if len(e) > 0]
self.assertEqual(len(stats_lines), 4)

def test_uchime2_denovo_no_chimeras(self):
input_table = biom.Table(np.array([[3, 4, 2],
[1, 0, 0],
[4, 5, 6],
[2, 2, 2]]),
['feature1', 'feature2', 'feature3',
'feature4'],
['sample1', 'sample2', 'sample3'])
with redirected_stdio(stderr=os.devnull):
chime, nonchime, stats = uchime2_denovo(
sequences=self.input_sequences, table=input_table)

obs_chime = _read_seqs(chime)
exp_chime = []
self.assertEqual(obs_chime, exp_chime)

# sequences are reverse-sorted by abundance in output
obs_nonchime = _read_seqs(nonchime)
exp_nonchime = [self.input_sequences_list[2],
self.input_sequences_list[0],
self.input_sequences_list[3],
self.input_sequences_list[1]]
self.assertEqual(obs_nonchime, exp_nonchime)

with stats.open() as stats_fh:
stats_text = stats_fh.read()
self.assertTrue('feature1' in stats_text)
self.assertTrue('feature2' in stats_text)
self.assertTrue('feature3' in stats_text)
self.assertTrue('feature4' in stats_text)
stats_lines = [e for e in stats_text.split('\n')
if len(e) > 0]
self.assertEqual(len(stats_lines), 4)

# Is also needed for this flavor of the function?
# Removing it still keeps coverage at 100%
# def test_uchime2_denovo_no_chimeras_alt_params(self):
# with redirected_stdio(stderr=os.devnull):
# cmd, chime, nonchime, stats = _uchime2_denovo(
# sequences=self.input_sequences, table=self.input_table,
# dn=9999.42, xn=9.01)
# cmd = ' '.join(cmd)
# self.assertTrue('--dn 9999.42' in cmd)
# self.assertTrue('--xn 9.01' in cmd)

# obs_chime = _read_seqs(chime)
# # >feature4 is the chimera!
# exp_chime = [self.input_sequences_list[3]]
# self.assertEqual(obs_chime, exp_chime)



class UchimeRefTests(TestPluginBase):

package = 'q2_vsearch.tests'
Expand Down

0 comments on commit 31dcbc3

Please sign in to comment.