Skip to content

Commit

Permalink
ENH/API: add in minimum count filtering on search (#74)
Browse files Browse the repository at this point in the history
* ENH/API: add in minimum count filtering on search

* Addressing @antgonza's comment
  • Loading branch information
wasade authored and antgonza committed Mar 6, 2019
1 parent b2901ec commit e599d9b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 9 deletions.
18 changes: 12 additions & 6 deletions redbiom/commands/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from . import cli


def _axis_search(from_, exact, context, ids, axis):
def _axis_search(from_, exact, context, ids, axis, min_count):
import redbiom._requests
import redbiom.util

Expand All @@ -12,7 +12,7 @@ def _axis_search(from_, exact, context, ids, axis):
it = redbiom.util.from_or_nargs(from_, ids)

# determine the opposite axis ids associated with query ids
observed = redbiom.util.ids_from(it, exact, axis, context)
observed = redbiom.util.ids_from(it, exact, axis, context, min_count)

for id_ in observed:
click.echo(id_)
Expand All @@ -32,10 +32,13 @@ def search():
help="All found samples must contain all specified features")
@click.option('--context', required=True, type=str,
help="The context to search within.")
@click.option('--min-count', required=False,
type=click.IntRange(min=1), default=1,
help="The minimum number of times the feature was observed.")
@click.argument('features', nargs=-1)
def search_features(from_, exact, context, features):
def search_features(from_, exact, context, features, min_count):
"""Get samples containing features."""
_axis_search(from_, exact, context, features, 'feature')
_axis_search(from_, exact, context, features, 'feature', min_count)


@search.command(name="samples")
Expand All @@ -47,8 +50,11 @@ def search_features(from_, exact, context, features):
"samples"))
@click.option('--context', required=True, type=str,
help="The context to search within.")
@click.option('--min-count', required=False,
type=click.IntRange(min=1), default=1,
help="The minimum number of times the feature was observed.")
@click.argument('samples', nargs=-1)
def search_samples(from_, exact, context, samples):
def search_samples(from_, exact, context, samples, min_count):
"""Get features present in samples."""
import redbiom
import redbiom._requests
Expand All @@ -58,7 +64,7 @@ def search_samples(from_, exact, context, samples):
get = redbiom._requests.make_get(config)
_, _, _, rb_ids = redbiom.util.resolve_ambiguities(context, samples, get)
rb_ids = list(rb_ids)
_axis_search(from_, exact, context, iter(rb_ids), 'sample')
_axis_search(from_, exact, context, iter(rb_ids), 'sample', min_count)


@search.command(name='metadata')
Expand Down
14 changes: 14 additions & 0 deletions redbiom/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,20 @@ def test_from_or_nargs(self):
# deferring validation of inference of stdin to integration tests
# as it would require overriding that standard file descriptor.

def test_ids_from_filter(self):
redbiom.admin.create_context('test', 'foo')
redbiom.admin.load_sample_metadata(metadata)
redbiom.admin.ScriptManager.load_scripts(read_only=False)
redbiom.admin.load_sample_data(table, 'test', tag=None)
id_ = [table.ids(axis='observation')[0], ]
obs = ids_from(id_,
False, 'feature', ['test', ],
min_count=3)
self.assertEqual(len(obs), 3)
obs = ids_from(id_,
False, 'feature', ['test', ])
self.assertEqual(len(obs), 4)

def test_ids_from_multicontext(self):
redbiom.admin.create_context('test', 'foo')
redbiom.admin.create_context('test2', 'foo')
Expand Down
11 changes: 8 additions & 3 deletions redbiom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def from_or_nargs(from_, nargs_variable):
return iter((s.strip() for s in nargs_variable))


def ids_from(it, exact, axis, contexts):
def ids_from(it, exact, axis, contexts, min_count=1):
"""Grab samples from an iterable of IDs
Parameters
Expand All @@ -44,6 +44,8 @@ def ids_from(it, exact, axis, contexts):
The axis to operate over.
contexts : list of str
The contexts to search in
min_count : int, optional
The minimum count (inclusive) to retain an observation.
Notes
-----
Expand All @@ -53,7 +55,7 @@ def ids_from(it, exact, axis, contexts):
Returns
-------
set
The sample IDs associated with the search IDs.
The IDs associated with the search IDs.
"""
import redbiom
Expand All @@ -70,12 +72,15 @@ def ids_from(it, exact, axis, contexts):
if not isinstance(contexts, (list, set, tuple)):
contexts = [contexts]

def min_count_filter(dat):
return {k: v for k, v in dat.items() if v >= min_count}

it = list(it)
fetcher = redbiom.admin.ScriptManager.get('fetch-%s' % axis)
for context in contexts:
context_ids = None
for id_ in it:
block = se(fetcher, 0, context, id_)
block = min_count_filter(se(fetcher, 0, context, id_))
if not exact:
if context_ids is None:
context_ids = set()
Expand Down

0 comments on commit e599d9b

Please sign in to comment.