diff --git a/micov/_io.py b/micov/_io.py index 7554458..63c2854 100644 --- a/micov/_io.py +++ b/micov/_io.py @@ -10,9 +10,9 @@ import gzip from ._cov import compress, coverage_percent -from ._constants import (BED_COV_SCHEMA, GENOME_COVERAGE_SCHEMA, - COLUMN_GENOME_ID, COLUMN_LENGTH, COLUMN_TAXONOMY, - SAM_SUBSET_SCHEMA, COLUMN_CIGAR, COLUMN_STOP, +from ._constants import (BED_COV_SCHEMA, GENOME_COVERAGE_SCHEMA, + COLUMN_GENOME_ID, COLUMN_LENGTH, COLUMN_TAXONOMY, + SAM_SUBSET_SCHEMA, COLUMN_CIGAR, COLUMN_STOP, COLUMN_START, COLUMN_SAMPLE_ID) from ._convert import cigar_to_lens @@ -177,7 +177,8 @@ def _test_has_header_taxonomy(line): if line.startswith('#'): has_header = True - elif line.split('\t')[0] in genome_id_columns and line.split('\t')[1] in taxonomy_columns: + elif line.split('\t')[0] in genome_id_columns and \ + line.split('\t')[1] in taxonomy_columns: has_header = True else: has_header = False @@ -221,10 +222,10 @@ def parse_taxonomy(taxonomy): genome_ids = df[genome_id_col] if len(genome_ids) != len(set(genome_ids)): raise ValueError(f"'{genome_id_col}' is not unique") - + rename = {genome_id_col: COLUMN_GENOME_ID, taxonomy_col: COLUMN_TAXONOMY} - + return df[[genome_id_col, taxonomy_col]].rename(rename) diff --git a/micov/cli.py b/micov/cli.py index 04ccc08..61f6a9b 100644 --- a/micov/cli.py +++ b/micov/cli.py @@ -7,8 +7,8 @@ import sys import tqdm from ._io import (parse_genome_lengths, parse_taxonomy, set_taxonomy_as_id, - parse_qiita_coverages, parse_sam_to_df, write_qiita_cov, - parse_sample_metadata, compress_from_stream, + parse_qiita_coverages, parse_sam_to_df, write_qiita_cov, + parse_sample_metadata, compress_from_stream, parse_bed_cov_to_df) from ._cov import coverage_percent from ._convert import cigar_to_lens @@ -83,7 +83,8 @@ def qiita_coverage(qiita_coverages, samples_to_keep, samples_to_ignore, @click.option('--lengths', type=click.Path(exists=True), required=False, help='Genome lengths, if provided compute coverage') @click.option('--taxonomy', type=click.Path(exists=True), required=False, - help='Genome taxonomy, if provided show species in coverage percentage. Only works when --length is provided') + help=('Genome taxonomy, if provided show species in coverage ' + 'percentage. Only works when --length is provided')) def compress(data, output, disable_compression, lengths, taxonomy): """Compress BAM/SAM/BED mapping data. @@ -97,7 +98,7 @@ def compress(data, output, disable_compression, lengths, taxonomy): if lengths is not None: lengths = parse_genome_lengths(lengths) - if taxonomy is not None: + if taxonomy is not None: taxonomy = parse_taxonomy(taxonomy) # compress data in blocks to avoid loading full mapping data into memory @@ -115,8 +116,12 @@ def compress(data, output, disable_compression, lengths, taxonomy): if taxonomy is None: genome_coverage.write_csv(output, separator='\t', include_header=True) else: - genome_coverage_with_taxonomy = set_taxonomy_as_id(genome_coverage, taxonomy) - genome_coverage_with_taxonomy.write_csv(output, separator='\t', include_header=True) + genome_coverage_with_taxonomy = set_taxonomy_as_id( + genome_coverage, taxonomy + ) + genome_coverage_with_taxonomy.write_csv( + output, separator='\t', include_header=True + ) @cli.command()