From 774d08f32cc12bb07c1d2997b5adc149ef5da431 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 11 Sep 2024 14:23:01 -0400 Subject: [PATCH] docs: improve module docs & interlinking --- docs/_quarto.yml | 20 ++++++--- src/ccbr_tools/GSEA/__init__.py | 5 --- src/ccbr_tools/GSEA/deg2gs.py | 15 +++---- src/ccbr_tools/GSEA/multitext2excel.py | 3 +- src/ccbr_tools/GSEA/ncbr_huse.py | 3 +- src/ccbr_tools/homologfinder/hf.py | 11 ++++- src/ccbr_tools/jobby.py | 4 +- src/ccbr_tools/pipeline/__init__.py | 6 --- src/ccbr_tools/pipeline/cache.py | 5 ++- src/ccbr_tools/pipeline/hpc.py | 57 ++++++++++++++++++++++---- src/ccbr_tools/pipeline/nextflow.py | 8 +--- src/ccbr_tools/pipeline/util.py | 11 +++-- src/ccbr_tools/pyproject.toml | 3 +- src/ccbr_tools/shell.py | 4 +- src/ccbr_tools/templates/__init__.py | 7 +--- 15 files changed, 100 insertions(+), 62 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 7e33645..eef2a54 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -33,20 +33,30 @@ website: quartodoc: package: ccbr_tools parser: google + render_interlinks: true sidebar: _sidebar.yml + title: "API Reference" sections: - title: Modules contents: - - gb2gtf - - homologfinder - - intersect - jobby - - jobinfo - peek - - pipeline + - pipeline.cache + - pipeline.hpc + - pipeline.nextflow + - pipeline.util - pkg_util - shell - templates + - title: Legacy tools + contents: + - GSEA.deg2gs + - GSEA.multitext2excel + - GSEA.ncbr_huse + - gb2gtf + - homologfinder.hf + - intersect + - jobinfo metadata-files: - _sidebar.yml diff --git a/src/ccbr_tools/GSEA/__init__.py b/src/ccbr_tools/GSEA/__init__.py index d646ae3..cfedbc0 100755 --- a/src/ccbr_tools/GSEA/__init__.py +++ b/src/ccbr_tools/GSEA/__init__.py @@ -1,8 +1,3 @@ """ GSEA tools - -Modules: - - `ccbr_tools.GSEA.deg2gs` - - `ccbr_tools.GSEA.multitext2excel` - - `ccbr_tools.GSEA.ncbr_huse` """ diff --git a/src/ccbr_tools/GSEA/deg2gs.py b/src/ccbr_tools/GSEA/deg2gs.py index 8700770..48c2177 100755 --- a/src/ccbr_tools/GSEA/deg2gs.py +++ b/src/ccbr_tools/GSEA/deg2gs.py @@ -1,16 +1,17 @@ """ -deg2gs.py - Reads a rnaseq pipeliner *_DEG_all_genes.txt file - and outputs a prioritized list of Ensembl gene IDs for ToppFun +Reads a rnaseq pipeliner *_DEG_all_genes.txt file and outputs a prioritized list of Ensembl gene IDs for ToppFun + +Author: Susan Huse -Susan Huse NIAID Center for Biological Research + Frederick National Laboratory for Cancer Research + Leidos Biomedical -v 1.0 - initial code version. -v 1.1 - updated for new column headers in pipeliner limma_DEG_all_genes.txt -v 1.2 - top2Excel format is now csv rather than tab-delimited +- v 1.0 - initial code version. +- v 1.1 - updated for new column headers in pipeliner limma_DEG_all_genes.txt +- v 1.2 - top2Excel format is now csv rather than tab-delimited """ diff --git a/src/ccbr_tools/GSEA/multitext2excel.py b/src/ccbr_tools/GSEA/multitext2excel.py index 40b24c8..b8faaa6 100755 --- a/src/ccbr_tools/GSEA/multitext2excel.py +++ b/src/ccbr_tools/GSEA/multitext2excel.py @@ -1,6 +1,5 @@ """ -multitext2excel.py - Reads a list of files to import as separate tabs in Excel +Reads a list of files to import as separate tabs in Excel Created on Mon Aug 6 14:59:13 2018 diff --git a/src/ccbr_tools/GSEA/ncbr_huse.py b/src/ccbr_tools/GSEA/ncbr_huse.py index 93e9d55..ac72b58 100755 --- a/src/ccbr_tools/GSEA/ncbr_huse.py +++ b/src/ccbr_tools/GSEA/ncbr_huse.py @@ -1,6 +1,5 @@ """ -ncbr_huse.py - Set of functions supporting the FNL NCBR work +Set of functions supporting the FNL NCBR work Author: Susan Huse diff --git a/src/ccbr_tools/homologfinder/hf.py b/src/ccbr_tools/homologfinder/hf.py index a67f561..13b7e8f 100755 --- a/src/ccbr_tools/homologfinder/hf.py +++ b/src/ccbr_tools/homologfinder/hf.py @@ -1,15 +1,22 @@ #!/usr/bin/env python3 """ +Finds homologs in human and mouse. + About: hf or HomologFinder finds homologs in human and mouse. if the input gene or genelist is human, then it returns mouse homolog(s) and vice versa -USAGE: + +Usage: $ hf -h -Example: + +Examples: $ hf -g ZNF365 + $ hf -l Wdr53,Zfp365 + $ hf -f genelist.txt + """ __version__ = "v1.0.0" diff --git a/src/ccbr_tools/jobby.py b/src/ccbr_tools/jobby.py index 672d25f..fdcc026 100755 --- a/src/ccbr_tools/jobby.py +++ b/src/ccbr_tools/jobby.py @@ -10,9 +10,9 @@ getting job information to return to the user in a standardized format and unified cli. - Many thanks to the original author: Skyler Kuhn (@skchronicles) + Many thanks to the original author: Skyler Kuhn ([@skchronicles](https://github.com/skchronicles)) - Original source: https://raw.githubusercontent.com/OpenOmics/mr-seek/2ecbbb2628b7102bf2cc23bc946858de2e09929f/workflow/scripts/jobby + Original source: [OpenOmics/mr-seek](https://raw.githubusercontent.com/OpenOmics/mr-seek/2ecbbb2628b7102bf2cc23bc946858de2e09929f/workflow/scripts/jobby) REQUIRES: - python>=3.5 diff --git a/src/ccbr_tools/pipeline/__init__.py b/src/ccbr_tools/pipeline/__init__.py index c156e18..302f21f 100755 --- a/src/ccbr_tools/pipeline/__init__.py +++ b/src/ccbr_tools/pipeline/__init__.py @@ -1,9 +1,3 @@ """ Helpers for bioinformatics pipelines - -Modules: - - `ccbr_tools.pipeline.cache` - - `ccbr_tools.pipeline.hpc` - - `ccbr_tools.pipeline.nextflow` - - `ccbr_tools.pipeline.util` """ diff --git a/src/ccbr_tools/pipeline/cache.py b/src/ccbr_tools/pipeline/cache.py index 62cc2b2..9c5a785 100755 --- a/src/ccbr_tools/pipeline/cache.py +++ b/src/ccbr_tools/pipeline/cache.py @@ -1,3 +1,6 @@ +""" +Functions for singularity cache management +""" import json import os import sys @@ -95,7 +98,7 @@ def check_cache(parser, cache, *args, **kwargs): # Check that the user owns the child cache directory # May revert to os.getuid() if user id is not sufficient if ( - exists(os.path.join(cache, "cache")) + os.path.exists(os.path.join(cache, "cache")) and os.stat(os.path.join(cache, "cache")).st_uid != os.getuid() ): # User does NOT own the cache directory, raise error diff --git a/src/ccbr_tools/pipeline/hpc.py b/src/ccbr_tools/pipeline/hpc.py index 9c08ae3..fe5cde1 100755 --- a/src/ccbr_tools/pipeline/hpc.py +++ b/src/ccbr_tools/pipeline/hpc.py @@ -1,13 +1,8 @@ """ -This module defines classes for working with different HPC clusters. +Classes for working with different HPC clusters. -Classes: - Cluster: Parent class for HPC clusters, which evaluates to None. - Biowulf: Represents the Biowulf HPC cluster. - FRCE: Represents the FRCE HPC cluster. - -Functions: - get_hpc: Returns an instance of the appropriate HPC cluster based on the current environment. +Use [](`ccbr_tools.pipeline.hpc.get_hpc`) to retrieve an HPC Cluster instance, +which contains default attributes for supported clusters. """ from .util import get_hpcname @@ -15,6 +10,17 @@ class Cluster: + """ + Base class for an HPC cluster - evaluates to None + + Attributes: + name (str): The name of the cluster. + modules (dict): A dictionary containing the modules installed on the cluster. + The keys are the module names and the values are the corresponding versions. + singularity_sif_dir (str): The directory where Singularity SIF files are stored. + env_vars (str): A string representing the environment variables to be set on the cluster. + """ + def __init__(self): self.name = None self.modules = {"nxf": None, "smk": None} @@ -33,6 +39,16 @@ def __bool__(self): class Biowulf(Cluster): + """ + The Biowulf cluster -- child of [](`ccbr_tools.pipeline.hpc.Cluster`) + + Attributes: + name (str): The name of the cluster. + modules (dict): A dictionary mapping module names to their corresponding commands. + singularity_sif_dir (str): The directory path for Singularity SIF files. + env_vars (str): A string representing the environment variables to be set on the cluster. + """ + def __init__(self): super().__init__() self.name = "biowulf" @@ -44,6 +60,16 @@ def __init__(self): class FRCE(Cluster): + """ + The FRCE cluster -- child of [](`ccbr_tools.pipeline.hpc.Cluster`) + + Attributes: + name (str): The name of the cluster. + modules (dict): A dictionary mapping module names to their corresponding commands. + singularity_sif_dir (str): The directory path for Singularity SIF files. + env_vars (str): A string representing the environment variables to be set on the cluster. + """ + def __init__(self): super().__init__() self.name = "frce" @@ -58,6 +84,21 @@ def __init__(self): def get_hpc(debug=False): + """ + Returns an instance of the High-Performance Computing (HPC) cluster based on the specified HPC name. + + If the HPC is not known or supported, an instance of the base `Cluster` class is returned. + + Args: + debug (bool, optional): If True, uses `debug` as the HPC name. Defaults to False. + + Returns: + cluster (Cluster): An instance of the HPC cluster. + + Examples: + >>> get_hpc() + >>> get_hpc(debug=True) + """ hpc_options = {"biowulf": Biowulf, "frce": FRCE} hpc_name = get_hpcname() if not debug else debug return hpc_options.get(hpc_name, Cluster)() diff --git a/src/ccbr_tools/pipeline/nextflow.py b/src/ccbr_tools/pipeline/nextflow.py index 9ec9da7..7a8343d 100755 --- a/src/ccbr_tools/pipeline/nextflow.py +++ b/src/ccbr_tools/pipeline/nextflow.py @@ -1,7 +1,5 @@ """ -Module: nextflow - -This module provides functions for running Nextflow workflows in local and HPC environments. +Run Nextflow workflows in local and HPC environments. Functions: - run(nextfile_path=None, nextflow_args=None, mode="local", pipeline_name=None, debug=False, hpc_options={}) @@ -29,13 +27,9 @@ def run( nextfile_path (str, optional): Path to the Nextflow file. nextflow_args (list, optional): Additional Nextflow arguments. Defaults to an empty list. mode (str, optional): Execution mode. Defaults to "local". - hpc_options (dict, optional): HPC options. Defaults to {"biowulf": {"profile": "biowulf", "slurm": "assets/slurm_header_biowulf.sh"}, "fnlcr": {"profile": "frce", "slurm": "assets/slurm_header_frce.sh"}}. Raises: ValueError: If mode is 'slurm' but no HPC environment was detected. - - Returns: - None """ nextflow_command = ["nextflow", "run", nextfile_path] diff --git a/src/ccbr_tools/pipeline/util.py b/src/ccbr_tools/pipeline/util.py index 6e542b6..b148bb6 100755 --- a/src/ccbr_tools/pipeline/util.py +++ b/src/ccbr_tools/pipeline/util.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python3 -# -*- coding: UTF-8 -*- - -# Python standard library +""" +Pipeline utility functions +""" import collections import datetime import shutil @@ -21,10 +20,10 @@ def scontrol_show(): """ - Run scontrol show config and parse the output as a dictionary + Run `scontrol show config` and parse the output as a dictionary Returns: - scontrol_dict : + scontrol_dict (dict): dictionary containing the output of `scontrol show config` """ scontrol_dict = dict() scontrol_out = subprocess.run( diff --git a/src/ccbr_tools/pyproject.toml b/src/ccbr_tools/pyproject.toml index bc0022c..a7cfab6 100644 --- a/src/ccbr_tools/pyproject.toml +++ b/src/ccbr_tools/pyproject.toml @@ -54,8 +54,7 @@ test = [ "pytest-cov" ] docs = [ - "quartodoc >= v0.7.5", - "griffe >= 0.33, < 1.0.0" + "quartodoc >= v0.7.6", ] [project.scripts] diff --git a/src/ccbr_tools/shell.py b/src/ccbr_tools/shell.py index d36dfd2..9601cac 100755 --- a/src/ccbr_tools/shell.py +++ b/src/ccbr_tools/shell.py @@ -47,7 +47,7 @@ def exec_in_context(func: callable, *args: str, **kwargs: str): **kwargs: Arbitrary keyword arguments to be passed to the function. Returns: - str: The combined output from both stdout and stderr. + out (str): The combined output from both stdout and stderr. """ with ( contextlib.redirect_stdout(io.StringIO()) as out_f, @@ -66,6 +66,6 @@ def concat_newline(*args: str): *args: Variable length argument list of strings to be concatenated. Returns: - str: The concatenated string with newline characters between each non-empty argument. + string (str): The concatenated string with newline characters between each non-empty argument. """ return "\n".join([arg for arg in args if arg]) diff --git a/src/ccbr_tools/templates/__init__.py b/src/ccbr_tools/templates/__init__.py index 4cd882e..c269687 100755 --- a/src/ccbr_tools/templates/__init__.py +++ b/src/ccbr_tools/templates/__init__.py @@ -25,17 +25,14 @@ def read_template(template_name): return template_file.read() -def use_template(template_name, output_filepath=None, **kwargs): +def use_template(template_name, output_filepath=None, **kwargs: str): """ Uses a template, formats variables, and writes it to a file. Args: template_name (str): The name of the template to use. output_filepath (str, optional): The filepath to save the output file. If not provided, it will be written to `template_name` in the current working directory. - **kwargs: Keyword arguments to fill in the template variables. - - Returns: - None + **kwargs (str, optional): Keyword arguments to fill in the template variables. Raises: FileNotFoundError: If the template file is not found.