Skip to content

Commit

Permalink
Merge pull request #266 from AlexandrovLab/spe_cli
Browse files Browse the repository at this point in the history
Spe cli
  • Loading branch information
mdbarnesUCSD authored Dec 10, 2024
2 parents e649027 + 7a9774b commit 07c3b4a
Show file tree
Hide file tree
Showing 9 changed files with 384 additions and 53 deletions.
5 changes: 2 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ python:
- '3.8'

before_install:
if ! [ -f ./src/GRCh37.tar.gz ]; then
wget ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/;
fi
- pip install --upgrade setuptools packaging
- if ! [ -f ./src/GRCh37.tar.gz ]; then wget ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi

install:
- pip install .
Expand Down
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [1.1.25] - 2024-12-09

### Added
- Introduced a Command-Line Interface (CLI) for SigProfilerExtractor, enabling users to interact with the tool via terminal commands.

### Updated
- Improved the formatting of the parameter table for sigProfilerExtractor function for better readability and consistency.
- The CI/CD badge link has been fixed.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ include SigProfilerExtractor/data/CNVInput/*
include SigProfilerExtractor/data/CSVInput/*
include SigProfilerExtractor/data/MatObjInput/*
include SigProfilerExtractor/data/ReferenceFiles/*
include SigProfilerExtractor/controllers/*

83 changes: 42 additions & 41 deletions README.md

Large diffs are not rendered by default.

266 changes: 266 additions & 0 deletions SigProfilerExtractor/controllers/cli_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
import argparse
from typing import List
from SigProfilerExtractor import sigpro


def str2bool(v):
if isinstance(v, bool):
return v
if v.lower() in ("yes", "true", "t", "y", "1"):
return True
elif v.lower() in ("no", "false", "f", "n", "0"):
return False
else:
raise argparse.ArgumentTypeError("Boolean value expected.")


def parse_arguments_extractor(args: List[str], description: str) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=description)

# Core required arguments
input_type_help = (
"The input file type: 'vcf', 'matrix', 'bedpe', or 'seg:TYPE'. "
"Accepted callers for TYPE: {'ASCAT', 'ASCAT_NGS', 'SEQUENZA', "
"'ABSOLUTE', 'BATTENBERG', 'FACETS', 'PURPLE', 'TCGA'}."
)

parser.add_argument(
"input_type",
help=input_type_help,
)

parser.add_argument(
"output",
help="Path to the output folder.",
)

input_data_help = (
"Path to input data. For 'vcf' or 'bedpe', provide an input folder. "
"For 'matrix' or 'seg:TYPE', provide an input file."
)

parser.add_argument(
"input_data",
help=input_data_help,
)

# Optional arguments with defaults
parser.add_argument(
"--reference_genome",
default="GRCh37",
help="Reference genome (default: 'GRCh37'). This parameter is applicable only if the input_type is 'vcf'.",
)
parser.add_argument(
"--opportunity_genome",
default="GRCh37",
help="The build or version of the reference genome for the reference signatures (default: 'GRCh37'). When the input type is 'vcf' the value for 'opportunity_genome' will be used instead.",
)
parser.add_argument(
"--context_type",
default="default",
help="Mutational context types (default: '96,DINUC,ID').",
)
parser.add_argument(
"--exome",
type=str2bool,
nargs="?",
const=True,
default=False,
help="Extract exomes (default: False).",
)
parser.add_argument(
"--minimum_signatures",
type=int,
default=1,
help="Minimum number of signatures to be extracted (default: 1).",
)
parser.add_argument(
"--maximum_signatures",
type=int,
default=10,
help="Maximum number of signatures to be extracted (default: 10).",
)
parser.add_argument(
"--nmf_replicates",
type=int,
default=100,
help="Number of NMF replicates to be performed at each rank using W and H (default: 100).",
)
parser.add_argument(
"--resample",
type=str2bool,
nargs="?",
const=True,
default=True,
help="Add poisson noise to samples by resampling (default: True).",
)
parser.add_argument(
"--seeds",
default="random",
help="Seeds for reproducible resamples, file path or 'random' (default: 'random').",
)
parser.add_argument(
"--batch_size",
type=int,
default=1,
help="Batch size is for GPU only and defines the number of NMF replicates to be performed by each CPU during parallel processing (default: 1).",
)
parser.add_argument(
"--cpu",
type=int,
default=-1,
help="Number of processors to use (default: all available).",
)
parser.add_argument(
"--gpu",
type=str2bool,
nargs="?",
const=True,
default=False,
help="Use GPU if available (default: False). note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the 'cpu' parameter.",
)
parser.add_argument(
"--nmf_init",
default="random",
help="The initialization algorithm for W and H matrix of NMF (default: 'random'). Options are 'random', 'nndsvd', 'nndsvda', 'nndsvdar' and 'nndsvd_min'.",
)
parser.add_argument(
"--precision",
default="single",
help="Precision for calculations (default: 'single'). Options are 'single' and 'double'.",
)
parser.add_argument(
"--matrix_normalization",
default="gmm",
help="Method of normalizing the genome matrix before it is analyzed by NMF (default: 'gmm'). Options are 'custom', 'gmm', 'log2', or 'none'.",
)
parser.add_argument(
"--min_nmf_iterations",
type=int,
default=10000,
help="Minimum NMF iterations (default: 10000).",
)
parser.add_argument(
"--max_nmf_iterations",
type=int,
default=1000000,
help="Maximum NMF iterations (default: 1000000).",
)
parser.add_argument(
"--nmf_test_conv",
type=int,
default=10000,
help="Test convergence every X iterations (default: 10000).",
)
parser.add_argument(
"--nmf_tolerance",
type=float,
default=1e-15,
help="NMF tolerance for convergence (default: 1e-15).",
)
parser.add_argument(
"--get_all_signature_matrices",
type=str2bool,
nargs="?",
const=True,
default=False,
help="Get all NMF matrices (default: False).",
)
parser.add_argument(
"--export_probabilities",
type=str2bool,
nargs="?",
const=True,
default=True,
help="Export probability matrix (default: True).",
)
parser.add_argument(
"--stability",
type=float,
default=0.8,
help="Average stability cutoff (default: 0.8).",
)
parser.add_argument(
"--min_stability",
type=float,
default=0.2,
help="Minimum stability cutoff (default: 0.2).",
)
parser.add_argument(
"--combined_stability",
type=float,
default=1.0,
help="Combined stability cutoff (default: 1.0).",
)
parser.add_argument(
"--allow_stability_drop",
type=str2bool,
nargs="?",
const=True,
default=False,
help="Allow stability drop (default: False).",
)
parser.add_argument(
"--cosmic_version",
type=float,
default=3.4,
help="COSMIC version for reference signatures. Valid values are 1, 2, 3, 3.1, 3.2, 3.3, and 3.4 (default: 3.4).",
)
parser.add_argument(
"--make_decomposition_plots",
type=str2bool,
nargs="?",
const=True,
default=True,
help="Generate decomposition plots (default: True).",
)
parser.add_argument(
"--collapse_to_SBS96",
type=str2bool,
nargs="?",
const=True,
default=True,
help="Collapse to SBS288 and SBS1536 matrices to SBS96. If False, will map reference signatures to the same context as input (default: True).",
)

return parser.parse_args(args)


class CliController:
def dispatch_sigProfilerExtractor(self, user_args: List[str]) -> None:
parsed_args = parse_arguments_extractor(
user_args, "Extract mutational signatures from input samples."
)
sigpro.sigProfilerExtractor(
input_type=parsed_args.input_type,
output=parsed_args.output,
input_data=parsed_args.input_data,
reference_genome=parsed_args.reference_genome,
opportunity_genome=parsed_args.opportunity_genome,
context_type=parsed_args.context_type,
exome=parsed_args.exome,
minimum_signatures=parsed_args.minimum_signatures,
maximum_signatures=parsed_args.maximum_signatures,
nmf_replicates=parsed_args.nmf_replicates,
resample=parsed_args.resample,
seeds=parsed_args.seeds,
batch_size=parsed_args.batch_size,
cpu=parsed_args.cpu,
gpu=parsed_args.gpu,
nmf_init=parsed_args.nmf_init,
precision=parsed_args.precision,
matrix_normalization=parsed_args.matrix_normalization,
min_nmf_iterations=parsed_args.min_nmf_iterations,
max_nmf_iterations=parsed_args.max_nmf_iterations,
nmf_test_conv=parsed_args.nmf_test_conv,
nmf_tolerance=parsed_args.nmf_tolerance,
get_all_signature_matrices=parsed_args.get_all_signature_matrices,
export_probabilities=parsed_args.export_probabilities,
stability=parsed_args.stability,
min_stability=parsed_args.min_stability,
combined_stability=parsed_args.combined_stability,
allow_stability_drop=parsed_args.allow_stability_drop,
cosmic_version=parsed_args.cosmic_version,
make_decomposition_plots=parsed_args.make_decomposition_plots,
collapse_to_SBS96=parsed_args.collapse_to_SBS96,
)
4 changes: 4 additions & 0 deletions SigProfilerExtractor/sigpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def sigProfilerExtractor(
combined_stability=1.0,
allow_stability_drop=False,
get_all_signature_matrices=False,
volume=None,
):
"""
Extracts mutational signatures from an array of samples.
Expand Down Expand Up @@ -642,6 +643,7 @@ def sigProfilerExtractor(
chrom_based=False,
plot=False,
gs=False,
volume=volume,
)
# Selecting the MutationType
if mtype == ["default"]:
Expand Down Expand Up @@ -999,6 +1001,7 @@ def sigProfilerExtractor(
colnames,
wall=wall,
sequence=sequence,
volume=volume,
)
all_similirities_list.append(all_similarities)
current_time_end = datetime.datetime.now()
Expand Down Expand Up @@ -1127,6 +1130,7 @@ def sigProfilerExtractor(
export_probabilities=export_probabilities,
devopts=devopts,
make_metadata=False,
volume=volume,
)

sysdata = open(out_put + "/JOB_METADATA.txt", "a")
Expand Down
34 changes: 34 additions & 0 deletions SigProfilerExtractor/sigprofilerextractor_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python3

import sys
from SigProfilerExtractor.controllers import cli_controller


def main_function():
commands = {
"sigprofilerextractor": "Extract mutational signatures from input samples."
}

if len(sys.argv) < 2 or sys.argv[1].lower() not in commands:
print_usage(commands)
sys.exit(1)

command = sys.argv[1].lower()
args = sys.argv[2:]

controller = cli_controller.CliController()

if command == "sigprofilerextractor":
controller.dispatch_sigProfilerExtractor(args)


def print_usage(commands):
"""Prints the usage message."""
print("Usage: SigProfilerExtractor <command> [<args>]\n")
print("Commands:")
for cmd, desc in commands.items():
print(f" {cmd}: {desc}")


if __name__ == "__main__":
main_function()
Loading

0 comments on commit 07c3b4a

Please sign in to comment.