From 345ff50a8af85b70404a652f63529e77020cdba0 Mon Sep 17 00:00:00 2001 From: Daniel Bolin Date: Fri, 8 Mar 2024 14:53:01 -0500 Subject: [PATCH] Raise error in gene-expression if there are to few cells for gene ranking --- containers/gene-expression/context/main.py | 30 +++++++++++++++++++--- containers/gene-expression/pipeline.cwl | 6 ++++- steps/run-one.cwl | 23 +++++++++++++++-- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/containers/gene-expression/context/main.py b/containers/gene-expression/context/main.py index 5f5b6f8..fa8ee9e 100644 --- a/containers/gene-expression/context/main.py +++ b/containers/gene-expression/context/main.py @@ -1,5 +1,6 @@ import argparse import json +import traceback import typing as t from pathlib import Path @@ -121,6 +122,9 @@ def get_gene_expr( matrix.raw = matrix # for getting gene names as output for sc.tl.rank_genes_groups filtered_matrix = filter_matrix(matrix, clid_column) filtered_matrix = add_ensemble_data(filtered_matrix, ensemble) + if len(filtered_matrix.obs[clid_column].unique()) <= 1: + raise ValueError("Data has too few unique cells for `rank_genes_groups`") + sc.tl.rank_genes_groups(filtered_matrix, groupby=clid_column, n_genes=10) ct_marker_genes_df = format_marker_genes_df( pd.DataFrame(filtered_matrix.uns["rank_genes_groups"]["names"]), @@ -156,10 +160,22 @@ def main(args: argparse.Namespace): CLI arguments, must contain "matrix", "clid_column", "gene_expr_column", and "output_matrix" """ - matrix = get_gene_expr( - args.matrix, args.ensemble_lookup, args.clid_column, args.gene_expr_column - ) - matrix.write_h5ad(args.output_matrix) + try: + matrix = get_gene_expr( + args.matrix, args.ensemble_lookup, args.clid_column, args.gene_expr_column + ) + matrix.write_h5ad(args.output_matrix) + except Exception as error: + with open(args.output_report, "w") as file: + json.dump( + { + "status": "failure", + "cause": repr(error), + "traceback": traceback.format_tb(error.__traceback__), + }, + file, + indent=4, + ) def _get_arg_parser() -> argparse.ArgumentParser: @@ -180,6 +196,12 @@ def _get_arg_parser() -> argparse.ArgumentParser: default="matrix_with_gene_expr.h5ad", help="matrix with gene expressions output path", ) + parser.add_argument( + "--output-report", + type=Path, + default="report.json", + help="Report file path in case of errors", + ) return parser diff --git a/containers/gene-expression/pipeline.cwl b/containers/gene-expression/pipeline.cwl index 807793c..fdd5d2e 100644 --- a/containers/gene-expression/pipeline.cwl +++ b/containers/gene-expression/pipeline.cwl @@ -23,6 +23,10 @@ inputs: outputs: matrix_with_gene_expr: - type: File + type: File? outputBinding: glob: matrix_with_gene_expr.h5ad + report: + type: File? + outputBinding: + glob: report.json diff --git a/steps/run-one.cwl b/steps/run-one.cwl index 50511b0..a92ffad 100644 --- a/steps/run-one.cwl +++ b/steps/run-one.cwl @@ -72,7 +72,7 @@ steps: options: source: algorithm valueFrom: $(self.geneExpression || {}) - out: [matrix_with_gene_expr] + out: [matrix_with_gene_expr, report] summarize: run: ../containers/extract-summary/pipeline.cwl @@ -84,11 +84,30 @@ steps: valueFrom: $(self.summarize || {}) out: [summary, annotations] + selectReport: + run: + class: ExpressionTool + requirements: + InlineJavascriptRequirement: {} + + inputs: + reports: File[] + outputs: + report: File + + expression: | + ${ return { report: inputs.reports[inputs.reports.length - 1] }; } + in: + reports: + source: [annotate/report, gene_expression/report] + pickValue: all_non_null + out: [report] + collect: run: ./collect-files.cwl in: files: - source: [summarize/summary, summarize/annotations, annotate/report] + source: [summarize/summary, summarize/annotations, selectReport/report] pickValue: all_non_null outputDirectory: source: algorithm