Skip to content

Commit

Permalink
Merge pull request #149 from maxibor/download
Browse files Browse the repository at this point in the history
feat: add download subcommand
  • Loading branch information
maxibor authored Apr 8, 2024
2 parents 2c459ec + 1ed1e3d commit 95694a7
Show file tree
Hide file tree
Showing 7 changed files with 167 additions and 6 deletions.
2 changes: 1 addition & 1 deletion AMDirT/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.5.0"
__version__ = "1.6.0"
43 changes: 42 additions & 1 deletion AMDirT/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from AMDirT.validate import run_validation
from AMDirT.viewer import run_app
from AMDirT.convert import run_convert
from AMDirT.core import get_json_path
from AMDirT.core import get_json_path, get_amdir_tags, get_latest_tag
from AMDirT.autofill import run_autofill
from AMDirT.merge import merge_new_df
from AMDirT.download import download as download_amdir
from json import load


Expand Down Expand Up @@ -294,5 +295,45 @@ def merge(ctx, no_args_is_help=True, **kwargs):
merge_new_df(**kwargs, **ctx.obj)


@cli.command()
@click.option(
"-t",
"--table",
help="AncientMetagenomeDir table to download",
type=click.Choice(get_table_list()),
default="ancientmetagenome-hostassociated",
show_default=True,
)
@click.option(
"-y",
"--table_type",
help="Type of table to download",
type=click.Choice(["samples", "libraries"]),
default="samples",
show_default=True,
)
@click.option(
"-r",
"--release",
help="Release tag to download",
type=click.Choice(get_amdir_tags()),
default=get_latest_tag(get_amdir_tags()),
show_default=True,
)
@click.option(
"-o",
"--output",
help="Output directory",
type=click.Path(writable=True),
default=".",
show_default=True,
)
def download(no_args_is_help=True, **kwargs):
"""\b
Download a table from the AMDirT repository
"""
download_amdir(**kwargs)


if __name__ == "__main__":
cli()
30 changes: 29 additions & 1 deletion AMDirT/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import streamlit as st
from packaging import version
from packaging.version import InvalidVersion
from importlib.resources import files as get_module_dir
import os
import logging
Expand Down Expand Up @@ -65,7 +66,34 @@ def get_amdir_tags():
if version.parse(tag["name"]) >= version.parse("v22.09")
]
else:
return []
logger.warning(
"Could not fetch tags from AncientMetagenomeDir. Defaulting to master. Metadata may not yet be officially released."
)
return ["master"]


@st.cache_data
def get_latest_tag(tags):
try:
return sorted(tags, key=lambda x: version.Version(x))[-1]
except InvalidVersion:
if "master" in tags:
return "master"
else:
raise InvalidVersion("No valid tags found")


def check_allowed_values(ref: list, test: str):
"""
Check if test is in ref
Args:
ref(list): List of allowed values
test(str): value to check
"""

if test in ref:
return True
return False


def get_colour_chemistry(instrument: str) -> int:
Expand Down
62 changes: 62 additions & 0 deletions AMDirT/download/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from AMDirT.core import (
logger,
get_amdir_tags,
get_remote_resources,
check_allowed_values,
)
import requests


def download(table: str, table_type: str, release: str, output: str = ".") -> str:
"""
Download a table from the AMDirT repository.
Parameters
----------
table : str
The AncientMetagenomeDir table to download.
table_type : str
The type of table to download. Allowed values are ['samples', 'libraries'].
release : str
The release of the table to download. Must be a valid release tag.
output: str
The output directory to save the table. Default is the current directory.
Returns
-------
str:
The path to the downloaded table.
Raises
------
ValueError
If an invalid table is provided.
ValueError
If an invalid table type is provided.
ValueError
If an invalid release is provided.
"""

resources = get_remote_resources()
tags = get_amdir_tags()
if tags != ["master"]:
if check_allowed_values(tags, release) is False:
raise ValueError(f"Invalid release: {release}. Allowed values are {tags}")

tables = resources["samples"]
if check_allowed_values(tables, table) is False:
raise ValueError(f"Invalid table: {table}. Allowed values are {tables}")

if check_allowed_values(["samples", "libraries"], table_type) is False:
raise ValueError(
f"Invalid table type: {table_type}. Allowed values are ['samples', 'libraries']"
)
table_filename = f"{table}_{table_type}_{release}.tsv"
logger.info(
f"Downloading {table} {table_type} table from {release} release, saving to {output}/{table_filename}"
)
t = requests.get(resources[table_type][table].replace("master", release))
with open(table_filename, "w") as fh:
fh.write(t.text)

return table_filename
21 changes: 21 additions & 0 deletions docs/source/how_to/download.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# download

## What

Download a copy of an AncientMetagenomeDir table.

## When

This command would be used when you want to download an AncientMetagenomeDir table locally.

You typically do this if you're planning to use the `convert` command later.

## How

```bash
AMDirT download --table ancientsinglegenome-hostassociated --table_type samples -r v23.12.0 -o .
```

## Output

This example command above will download the `ancientsinglegenome-hostassociated` `sample` table from the `v23.12.0` AncientMetagenomeDir release, and save it locally to `ancientmetagenome-hostassociated_samples_v23.12.0.tsv`
5 changes: 2 additions & 3 deletions docs/source/tutorials/convert.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@ We will take use one of the previous releases of AncientMetagenomeDir as an exam
```bash
mkdir amdirt-convert-tutorial
cd amdirt-convert-tutorial
curl -LO https://github.com/SPAAM-community/AncientMetagenomeDir/releases/download/v23.09.0/AncientMetagenomeDir_v23.09.0.zip
unzip AncientMetagenomeDir_v23.09.0.zip
AMDirT download --table ancientmetagenome-hostassociated --table_type samples -r v23.09.0
```

## Filter a sample metadata table

Next we can filter the ancient metagenome 'host-associated' sample sheet for all dental calculus tables from Germany.

```bash
cat ancientmetagenome-hostassociated/samples/ancientmetagenome-hostassociated_samples.tsv | grep -e '^project_name' -e 'dental calculus' | grep -e '^project_name' -e 'Germany' > germany_dentalcalculus.tsv
cat ancientmetagenome-hostassociated_samples_v23.09.0.tsv | grep -e '^project_name' -e 'dental calculus' | grep -e '^project_name' -e 'Germany' > germany_dentalcalculus.tsv
```

> _The command above is not robust and is only used for system portability and demonstration purposes. For example the `Germany` string could be in a site name. In practice, you should use more robust filtering methods such more specific `grep` expressions or in R_.
Expand Down
10 changes: 10 additions & 0 deletions tests/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from AMDirT.download import download


def test_download():
table = "ancientmetagenome-hostassociated"
table_type = "samples"
release = "v23.12.0"

d = download(table, table_type, release, output=".")
assert d == "ancientmetagenome-hostassociated_samples_v23.12.0.tsv"

0 comments on commit 95694a7

Please sign in to comment.