diff --git a/.DS_Store b/.DS_Store new file mode 100755 index 0000000..e69de29 diff --git a/cfg.yaml b/cfg.yaml index 6ca39c4..60295f7 100644 --- a/cfg.yaml +++ b/cfg.yaml @@ -1,7 +1,7 @@ config-yaml: path-settings: - reference-directory: /gpfs/data/shared/databases/refchef_refs - git-directory: /gpfs/data/shared/databases/cbc-references-refchef + reference-directory: /oscar/data/shared/databases/refchef_refs + git-directory: /oscar/data/shared/databases/refchef_staging/cbc-references-refchef remote-repository: compbiocore/cbc-references-refchef log-settings: log: 'yes' diff --git a/logs/refchef_20240806_104132.log b/logs/refchef_20240806_104132.log new file mode 100644 index 0000000..9e5d05b --- /dev/null +++ b/logs/refchef_20240806_104132.log @@ -0,0 +1,214 @@ +2024-08-06 10:41:32,450 INFO: + =========================================== + REFCHEF 🐶 + ------------------------------------------- + - References will be downloaded to: /oscar/data/shared/databases/refchef_refs + - Remote repository for master.yaml compbiocore/cbc-references-refchef + - Local repository for master.yaml /oscar/data/shared/databases/refchef_staging/cbc-references-refchef + - Logs files: /oscar/data/shared/databases/refchef_staging/cbc-references-refchef/logs/ + ------------------------------------------- + +2024-08-06 10:41:32,450 DEBUG: parse kwarg method: 2 ... +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,964 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:34,965 INFO: + No references to download. + +2024-08-06 10:41:35,406 INFO: References processed: ['spur31', 'cdubliniensis_cd36', 'broad_bundle_hg19', 'drosophila-melanogaster-bdgp6rel9', 'wbps8_ws256', 'grch37_release87', 'grcm38_p5', 'nt_db', 'nr_db', 'Escherichia_coli_K12', 'grch38_release98', 'saccer3', 'spur42', 'spur50', 'grcm38_p6', 'refseq_genomic', 'refseq_rna', 'refseq_protein', 'pr2_db', 'OM-RGC', 'silva_arbfiles', 'uniprot', 'rnor60', 'Genbank_UniVec_2018_11_28', 'Genbank_bacteria_all_2018_11_20_15_34_part1', 'Genbank_bacteria_all_2018_11_20_15_34_part2', 'Genbank_bacteria_all_2018_11_20_15_34_part3', 'cellranger_mm10', 'S_scrofa', 'alphafold_data', 'GRCm38_gencode'] +2024-08-06 10:41:35,406 INFO: Location of references: /oscar/data/shared/databases/refchef_refs diff --git a/master.yaml b/master.yaml index de49ca0..606c40f 100644 --- a/master.yaml +++ b/master.yaml @@ -9128,4 +9128,3 @@ alphafold_data: files: - metadata.txt uuid: af939b94-b1ec-11ec-ac1a-ac1f6b1b78c8 - diff --git a/new_parsed.yaml b/new_parsed.yaml new file mode 100644 index 0000000..ec58341 --- /dev/null +++ b/new_parsed.yaml @@ -0,0 +1,24 @@ +GRCm38_gencode: + metadata: + name: GRCm38_gencode + common_name: mouse + custom: 'No' + description: Gencode mouse genome GRCm38 release M25 primary assembly + downloader: joselynn wallace + ncbi_taxon_id: null + ensembl_release_number: null + accession: + genbank: none + refseq: none + organism: Mus musculus + organization: gencode + category: genomics + levels: + annotations: + - component: null + complete: + status: 'false' + commands: + - wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.basic.annotation.gtf.gz + - gunzip -c gencode.vM25.basic.annotation.gtf.gz > gencode.vM25.basic.annotation.gtf + - md5sum *.gtf > final_checksums.md5 diff --git a/new_ref.yaml b/new_ref.yaml new file mode 100644 index 0000000..e97f65e --- /dev/null +++ b/new_ref.yaml @@ -0,0 +1 @@ +{"What is your email address?":"joselynn_wallace@brown.edu","name":"GRCm38_gencode","organism":"Mus musculus","common":"mouse","taxon":"10090","organization":"gencode","description":"Gencode mouse genome GRCm38 release M25 primary assembly","genbank":"none","refseq":"none","ensembl":"none","custom":"No","category":"genomics","level":"annotations","annotations_component":"GTF","Commands":"wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.basic.annotation.gtf.gz\ngunzip -c gencode.vM25.basic.annotation.gtf.gz > gencode.vM25.basic.annotation.gtf\nmd5sum *.gtf > final_checksums.md5"} diff --git a/parser.py b/parser.py index 722969b..e424f6e 100644 --- a/parser.py +++ b/parser.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 - import json +import re import oyaml as yaml @@ -9,7 +9,8 @@ datajson = json.loads(data) # if yaml_level is indices, yaml_level = datajson.get('level') - yaml_commands = datajson.get('Commands') + yaml_commands = datajson.get('Commands').rstrip() + commands = re.split('\n|, ', yaml_commands) if 'src' in datajson: yaml_dict = { datajson.get('name'): @@ -36,7 +37,7 @@ 'complete': {'status': 'false'}, 'src': datajson.get('src'), - 'commands': datajson.get('Commands').split(', ') + 'commands': commands } ] } @@ -67,7 +68,7 @@ 'component': datajson.get('component'), 'complete': {'status': 'false'}, - 'commands': datajson.get('Commands').split(', ') + 'commands': commands } ] }