diff --git a/README.md b/README.md index 4ab542e..402aebe 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ **Park** an **arc**hived project tool**kit**! -[![test](https://github.com/CCBR/parkit/actions/workflows/test.yml/badge.svg)](https://github.com/CCBR/parkit/actions/workflows/test.yml) -[![docs](https://github.com/CCBR/parkit/actions/workflows/docs.yml/badge.svg)](https://github.com/CCBR/parkit/actions/workflows/docs.yml) > DISCLAIMERS: > @@ -17,6 +15,8 @@ When a project comes to a completion, most analysts have folders (or `.tar` file The analyst can use `parkit` to park these folders directly on to HPCDME's **CCBR_Archive** object store vault. A typical project, say `ccbrXYZ`, can be parked at `/CCBR_Archive/GRIDFTP/Project_CCBR-XYZ` with collections "Analysis" and "Rawdata". +!!! note `projark` command is preferred for CCBR **proj**ect **ark**iving + ### Prerequisites: - On helix or biowulf you can get access to `parkit` by loading the appropriate conda env @@ -30,7 +30,7 @@ The analyst can use `parkit` to park these folders directly on to HPCDME's **CCB - **HPC_DM_UTILS** environmental variable should be preset before calling `parkit`. It also needs to be passed as an argument to `parkit_folder2hpcdme` and `parkit_tarball2hpcdme` end-to-end workflows. -> If not on helix or biowulf then you will have to **clone** the repo and **pip install** it. Then setup [HPC_DME_APIs](https://github.com/CBIIT/HPC_DME_APIs) appropriately. +!!! warning If not on helix or biowulf then you will have to **clone** the repo and **pip install** it. Then setup [HPC_DME_APIs](https://github.com/CBIIT/HPC_DME_APIs) appropriately. ### Usage: @@ -69,23 +69,32 @@ options: # create an empty collection on HPC-DME %> parkit createemptycollection --dest /CCBR_Archive/GRIDFTP/Project_CCBR-12345 --projectdesc "testing" --projecttitle "test project 1" +# the above command creates collections: +# - /CCBR_Archive/GRIDFTP/Project_CCBR-12345 +# - /CCBR_Archive/GRIDFTP/Project_CCBR-12345/Analysis +# - /CCBR_Archive/GRIDFTP/Project_CCBR-12345/Rawdata # create required metadata %> parkit createmetadata --tarball /data/CCBR/projects/ccbr_12345.tar --dest /CCBR_Archive/GRIDFTP/Project_CCBR-12345 +# if ccbr_12345.tar is rawdata then "--collectiontype Rawdata" argument needs to be added to the above commandline # deposit the tar into HPC-DME %> parkit deposittar --tarball /data/CCBR/projects/ccbr_12345.tar --dest /CCBR_Archive/GRIDFTP/Project_CCBR-12345 +# if ccbr_12345.tar is rawdata then "--collectiontype Rawdata" argument needs to be added to the above commandline # bunch of extra files are created in the process %> ls /data/CCBR/projects/ccbr_12345.tar* /data/CCBR/projects/ccbr_12345.tar /data/CCBR/projects/ccbr_12345.tar.filelist.md5 /data/CCBR/projects/ccbr_12345.tar.md5 /data/CCBR/projects/ccbr_12345.tar.filelist /data/CCBR/projects/ccbr_12345.tar.filelist.metadata.json /data/CCBR/projects/ccbr_12345.tar.metadata.json -# these extra files can now be deleted -%> rm -f /data/CCBR/projects/ccbr_12345.tar* +# delete the recently parked project folder contents including hidden contents +%> rm -rf /data/CCBR/projects/CCBR-12345/* -# you can also deleted the recently parked project folder -%> rm -rf /data/CCBR/projects/ccbr_12345 +# copy filelist into the empty project folder for future quick reference +%> cp /data/CCBR/projects/ccbr_12345.tar.filelist /data/CCBR/projects/CCBR-12345/ccbr_12345.tar.filelist + +# delete files created by parkit +%> rm -f /data/CCBR/projects/ccbr_12345.tar* # test results with %> dm_get_collection /CCBR_Archive/GRIDFTP/Project_CCBR-12345 @@ -95,10 +104,13 @@ options: We also have end-to-end slurm-supported folder-to-hpcdme and tarball-to-hpcdme workflows: - `parkit_folder2hpcdme` -- `parkit_tarball2hpcdme` +- `parkit_tarball2hpcdme` and +- `projark` [ recommended for archiving CCBR projects to GRIPFTP folder under CCBR_Archive ] If run with `--executor slurm` this interfaces with the job scheduler on Biowulf and submitted individual steps of these E2E workflows as interdependent jobs. +### `parkit_folder2hpcdme` + ```bash %> parkit_folder2hpcdme --help usage: parkit_folder2hpcdme [-h] [--restartfrom RESTARTFROM] [--executor EXECUTOR] [--folder FOLDER] [--dest DEST] [--projectdesc PROJECTDESC] @@ -124,12 +136,12 @@ options: --version print version ``` -> NOTE: -> -> `parkit_folder2hpcdme` by default parks files under `/CCBR_Archive/GRIDFTP/Project_CCBR-12345/Analysis` unless the `--rawdata` flag is provided at command line. In that case, the tarball is parked at `/CCBR_Archive/GRIDFTP/Project_CCBR-12345/Rawdata` + + +### `parkit_tarball2hpcdme` ```bash -parkit_tarball2hpcdme --help +%> parkit_tarball2hpcdme --help usage: parkit_tarball2hpcdme [-h] [--restartfrom RESTARTFROM] [--executor EXECUTOR] [--tarball TARBALL] [--dest DEST] [--projectdesc PROJECTDESC] [--projecttitle PROJECTTITLE] [--cleanup] --hpcdmutilspath HPCDMUTILSPATH [--version] @@ -152,3 +164,22 @@ options: what should be the value of env var HPC_DM_UTILS --version print version ``` + +```bash +> %projark --help +usage: projark [-h] --folder FOLDER --projectnumber PROJECTNUMBER + [--executor EXECUTOR] [--rawdata] [--cleanup] + +Wrapper for folder2hpcdme for quick CCBR project archiving! + +options: + -h, --help show this help message and exit + --folder FOLDER Input folder path to archive + --projectnumber PROJECTNUMBER + CCBR project number.. destination will be + /CCBR_Archive/GRIDFTP/Project_CCBR- + --executor EXECUTOR slurm or local + --rawdata If tarball is rawdata and needs to go under folder + Rawdata + --cleanup post transfer step to delete local files +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 17000ae..316967d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ test = [ parkit = "parkit.__main__:main" parkit_folder2hpcdme = "parkit.parkit_folder2hpcdme:main" parkit_tarball2hpcdme = "parkit.parkit_tarball2hpcdme:main" +projark = "parkit.projark:main" update_collection_metadata = "parkit.update_collection_metadata:main" [tool.setuptools.dynamic] diff --git a/src/parkit/__main__.py b/src/parkit/__main__.py index 7c64d19..211d28d 100755 --- a/src/parkit/__main__.py +++ b/src/parkit/__main__.py @@ -48,6 +48,12 @@ def main(): help="destination path in vault (Analysis collection goes under here)", required=True, ) + parser_createmetadata.add_argument( + "--collectiontype", + type=str, + help="type of collection ... Analysis[default] or Rawdata", + default="Analysis" # or Rawdata + ) # Create a subcommand for "createemptycollection" parser_createemptycollection = subparsers.add_parser( @@ -82,7 +88,7 @@ def main(): parser_deposittar.add_argument( "--collectiontype", type=str, - help="path to tarball", + help="type of collection ... Analysis[default] or Rawdata", default="Analysis" # or Rawdata ) @@ -102,9 +108,9 @@ def main(): args.dest, projectdesc=args.projectdesc, projecttitle=args.projecttitle ) elif args.command == "createmetadata": - tar_json_path = createmetadata(args.tarball, args.dest) + tar_json_path = createmetadata(args.tarball, args.dest, args.collectiontype) files_created.append(tar_json_path) - filelist_json_path = createmetadata(args.tarball + ".filelist", args.dest) + filelist_json_path = createmetadata(args.tarball + ".filelist", args.dest, args.collectiontype) files_created.append(filelist_json_path) elif args.command == "deposittar": deposittocollection(args.tarball, args.dest, args.collectiontype) diff --git a/src/parkit/projark.py b/src/parkit/projark.py new file mode 100644 index 0000000..0b4c0ad --- /dev/null +++ b/src/parkit/projark.py @@ -0,0 +1,19 @@ +import sys, os +import subprocess +from pathlib import Path + + +def main(): + # Path to your bash script + p = Path(__file__).absolute() + pp = str(p.parent) + + # script_path = 'parkit/scripts/parkit_folder2hpcdme' + script_path = os.path.join(pp, "scripts", "projark") # projark ... archive a ccbr project!! + + # Pass all arguments to the bash script + subprocess.run([script_path] + sys.argv[1:]) + + +if __name__ == "__main__": + main() diff --git a/src/parkit/scripts/parkit_folder2hpcdme b/src/parkit/scripts/parkit_folder2hpcdme index bbff713..897bc88 100755 --- a/src/parkit/scripts/parkit_folder2hpcdme +++ b/src/parkit/scripts/parkit_folder2hpcdme @@ -92,6 +92,7 @@ parser.add_argument('--projectdesc',required=False, help='project description') parser.add_argument('--projecttitle',required=False, help='project title') parser.add_argument('--rawdata',action='store_true', help='If tarball is rawdata and needs to go under folder Rawdata') parser.add_argument('--cleanup',action='store_true', help='post transfer step to delete local files') +parser.add_argument('--makereadme',action='store_true', help='make readme file with destination location on vault') parser.add_argument('--hpcdmutilspath', required=True, help='what should be the value of env var HPC_DM_UTILS') parser.add_argument('--version',action='store_true', help='print version') EOF @@ -104,12 +105,15 @@ fi # folder is required required_argument "$FOLDER" "--folder" TARBALL="${FOLDER}.tar" +README="${FOLDER}.README" # cleanup option if [[ "${CLEANUP}" == "yes" ]];then for f in "${TARBALL}"* *response-header.tmp *response-message.json.tmp do - rm -iv $f + if [[ "$f" != "${TARBALL}.filelist" ]];then + rm -iv $f + fi done exit 0 fi @@ -176,7 +180,10 @@ echo "################ Running createtar #############################" cmd="${PARKIT} createtar --folder \"${FOLDER}\"" echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else run_sbatch_cmd "$cmd" "$dependency" "createtar" "$HPCDMUTILSPATH" jid="$JID" @@ -196,7 +203,10 @@ echo "############ Running createemptycollection ######################" cmd="${PARKIT} createemptycollection --dest \"${DEST}\" --projectdesc \"${PROJECTDESC}\" --projecttitle \"${PROJECTTITLE}\"" echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else dependency="" if [[ "$jobids" != "" ]];then @@ -218,9 +228,15 @@ fi # RUN_createemptycollection ends if [[ "$RUN_createmetadata" == "1" ]];then echo "########### Running createmetadata ##############################" cmd="${PARKIT} createmetadata --tarball \"${TARBALL}\" --dest \"${DEST}\"" + if [[ "${RAWDATA}" == "yes" ]];then + cmd="${cmd} --collectiontype \"Rawdata\"" + fi echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else dependency="" if [[ "$jobids" != "" ]];then @@ -247,7 +263,10 @@ echo "############# Running deposittar ###############################" fi echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else dependency="" if [[ "$jobids" != "" ]];then @@ -262,5 +281,8 @@ echo "############# Running deposittar ###############################" jobids="$jobids:$jid" fi fi + if [[ "${MAKEREADME}" == "yes" ]];then + echo "${TARBALL} parked at ${DEST} on HPCDME!" >> "${README}" + fi echo "################################################################" fi # RUN_deposittar ends diff --git a/src/parkit/scripts/parkit_tarball2hpcdme b/src/parkit/scripts/parkit_tarball2hpcdme index 454de39..d88dc6d 100755 --- a/src/parkit/scripts/parkit_tarball2hpcdme +++ b/src/parkit/scripts/parkit_tarball2hpcdme @@ -85,6 +85,7 @@ parser.add_argument('--dest',required=False, help='vault collection path (Analys parser.add_argument('--projectdesc',required=False, help='project description') parser.add_argument('--projecttitle',required=False, help='project title') parser.add_argument('--cleanup',action='store_true', help='post transfer step to delete local files') +parser.add_argument('--makereadme',action='store_true', help='make readme file with destination location on vault') parser.add_argument('--hpcdmutilspath', required=True, help='what should be the value of env var HPC_DM_UTILS') parser.add_argument('--version',action='store_true', help='print version') EOF @@ -100,11 +101,14 @@ if [ ! -f "${TARBALL}" ];then echo "${TARBALL} does not exist!" exit 1 fi +README="${TARBALL}.README" # cleanup option if [[ "${CLEANUP}" == "yes" ]];then for f in "${TARBALL}"* *response-header.tmp *response-message.json.tmp do - rm -iv $f + if [[ "${TARBALL}.filelist" != "${f}" ]];then + rm -iv $f + fi done exit 0 fi @@ -169,7 +173,10 @@ echo "################ Running tarprep #############################" cmd="${PARKIT} tarprep --tarball \"${TARBALL}\"" echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else run_sbatch_cmd "$cmd" "$dependency" "tarprep" "$HPCDMUTILSPATH" jid="$JID" @@ -189,7 +196,10 @@ echo "############ Running createemptycollection ######################" cmd="${PARKIT} createemptycollection --dest \"${DEST}\" --projectdesc \"${PROJECTDESC}\" --projecttitle \"${PROJECTTITLE}\"" echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else dependency="" if [[ "$jobids" != "" ]];then @@ -211,9 +221,15 @@ fi # RUN_createemptycollection ends if [[ "$RUN_createmetadata" == "1" ]];then echo "########### Running createmetadata ##############################" cmd="${PARKIT} createmetadata --tarball \"${TARBALL}\" --dest \"${DEST}\"" + if [[ "${RAWDATA}" == "yes" ]];then + cmd="${cmd} --collectiontype \"Rawdata\"" + fi echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else dependency="" if [[ "$jobids" != "" ]];then @@ -235,9 +251,15 @@ fi # RUN_createmetadata ends if [[ "$RUN_deposittar" == "1" ]];then echo "############# Running deposittar ###############################" cmd="${PARKIT} deposittar --tarball \"${TARBALL}\" --dest \"${DEST}\"" - echo $cmd + if [[ "${RAWDATA}" == "yes" ]];then + cmd="${cmd} --collectiontype \"Rawdata\"" + fi + echo $cmd if [[ "$EXECUTOR" == "local" ]];then - `$cmd` + eval "$cmd" + if [[ "$?" != "0" ]];then + exit 1 + fi else dependency="" if [[ "$jobids" != "" ]];then @@ -252,5 +274,8 @@ echo "############# Running deposittar ###############################" jobids="$jobids:$jid" fi fi + if [[ "${MAKEREADME}" == "yes" ]];then + echo "${TARBALL} parked at ${DEST} on HPCDME!" >> "${README}" + fi echo "################################################################" fi # RUN_deposittar ends diff --git a/src/parkit/scripts/projark b/src/parkit/scripts/projark new file mode 100755 index 0000000..cdaa4ac --- /dev/null +++ b/src/parkit/scripts/projark @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +SCRIPTNAME="$BASH_SOURCE" +SCRIPTDIRNAME=$(readlink -f $(dirname "$SCRIPTNAME")) + +# add "bin" to PATH +if [[ ":$PATH:" != *":${SCRIPTDIRNAME}:"* ]];then + export PATH=${PATH}:${SCRIPTDIRNAME} +fi + +# rely on redirect to be redirect to the python script +RESOURCEDIR=$(dirname "$SCRIPTDIRNAME") +TOOLDIR="$SCRIPTDIRNAME" +TOOLNAME="parkit" +# PARKIT="${TOOLDIR}/${TOOLNAME}" +PARKIT="parkit" + +# Check if --version is provided as the first argument +if [[ "$1" == "--version" ]]; then + echo "projark is using the following parkit version:" + ${PARKIT} --version + exit 0 +fi + +# Test if parkit is working +${PARKIT} > /dev/null 2>&1 || { echo "${PARKIT} not found or cannot be run!"; exit 1; } + +ARGPARSE_DESCRIPTION="Wrapper for folder2hpcdme for quick CCBR project archiving!" +source ${RESOURCEDIR}/resources/argparse.bash || exit 1 +argparse "$@" <') +parser.add_argument('--executor',required=False,default='slurm', help='slurm or local') +parser.add_argument('--rawdata',required=False,action='store_true', help='If tarball is rawdata and needs to go under folder Rawdata') +parser.add_argument('--cleanup',required=False,action='store_true', help='post transfer step to delete local files') +EOF + +# Destination path for archiving +TITLE="CCBR-${PROJECTNUMBER}" +DEST="/CCBR_Archive/GRIDFTP/Project_${TITLE}" + +# Check if SOURCE_CONDA_CMD is set +if [ -z "${SOURCE_CONDA_CMD}" ];then + echo "SOURCE_CONDA_CMD env variable must be set" + exit 1 +else + echo "SOURCE_CONDA_CMD is set to: $SOURCE_CONDA_CMD" +fi + +# Check if HPC_DM_UTILS is set +if [ -z "$HPC_DM_UTILS" ]; then + echo "HPC_DM_UTILS environment variable is not set." + exit 1 # Exit the script with an error code +else + echo "HPC_DM_UTILS is set to: $HPC_DM_UTILS" +fi + +# Call folder2hpcdme with necessary parameters +cmd="parkit_folder2hpcdme --folder \"$FOLDER\" --dest \"$DEST\" --projecttitle \"$TITLE\" --projectdesc \"$TITLE\" --executor \"$EXECUTOR\" --hpcdmutilspath $HPC_DM_UTILS --makereadme" +if [[ "${RAWDATA}" == "yes" ]];then + cmd="${cmd} --rawdata" +fi +if [[ "${CLEANUP}" == "yes" ]];then + cmd="${cmd} --cleanup" +fi +echo $cmd +eval "$cmd" +if [[ "$?" != "0" ]];then + exit 1 +fi + +# Exit with the same status code as folder2hpcdme +exit $? \ No newline at end of file diff --git a/src/parkit/src/VERSION b/src/parkit/src/VERSION index e9307ca..94245d2 100644 --- a/src/parkit/src/VERSION +++ b/src/parkit/src/VERSION @@ -1 +1 @@ -2.0.2 +2.0.2-dev diff --git a/src/parkit/src/createemptycollection.py b/src/parkit/src/createemptycollection.py index 0a19449..841711d 100644 --- a/src/parkit/src/createemptycollection.py +++ b/src/parkit/src/createemptycollection.py @@ -112,4 +112,4 @@ def createemptycollection(collectionpath, projectdesc="", projecttitle=""): ) cmd = f"cat {json_path} && rm -f {json_path}" - run_cmd(cmd=cmd) \ No newline at end of file + run_cmd(cmd=cmd) diff --git a/src/parkit/src/createmetadata.py b/src/parkit/src/createmetadata.py index 25a9a55..94778c2 100644 --- a/src/parkit/src/createmetadata.py +++ b/src/parkit/src/createmetadata.py @@ -4,11 +4,11 @@ from parkit.src.utils import * -def createmetadata(infile, collectionpath): - analysis_collectionpath = collectionpath + "/Analysis" +def createmetadata(infile, collectionpath, collectiontype): # collectiontype="Rawdata" for rawdata or "Analysis" for analysis p = Path(infile) p = p.absolute() + analysis_collectionpath = collectionpath + "/" + collectiontype filename = p.name vaultpath = analysis_collectionpath + "/" + filename