From 20a670f59e6ed1ed5774ddb3ea071943e6a77379 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Wed, 14 Jun 2023 13:20:14 -0400 Subject: [PATCH 1/5] [wfpm v0.8.0] started a new version sanity-check@0.1.2 from sanity-check@0.1.1 which was released --- sanity-check/main.nf | 2 +- sanity-check/pkg.json | 2 +- sanity-check/tests/checker.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sanity-check/main.nf b/sanity-check/main.nf index 3709e40..7d36390 100755 --- a/sanity-check/main.nf +++ b/sanity-check/main.nf @@ -24,7 +24,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' +version = '0.1.2' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.sanity-check' diff --git a/sanity-check/pkg.json b/sanity-check/pkg.json index a700cc4..76ec072 100644 --- a/sanity-check/pkg.json +++ b/sanity-check/pkg.json @@ -1,6 +1,6 @@ { "name": "sanity-check", - "version": "0.1.1", + "version": "0.1.2", "description": "Data submission sanity checks including verifying info from Clinical API", "main": "main.nf", "deprecated": false, diff --git a/sanity-check/tests/checker.nf b/sanity-check/tests/checker.nf index 96c1fce..c54bd4f 100755 --- a/sanity-check/tests/checker.nf +++ b/sanity-check/tests/checker.nf @@ -29,7 +29,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' +version = '0.1.2' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.sanity-check' From dbbaf6a7ef2065ae26772defbcf1c2b930516e74 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Wed, 14 Jun 2023 14:37:37 -0400 Subject: [PATCH 2/5] added check for existing normal sample --- sanity-check/main.py | 26 ++++++++++++++++--- .../tests/input/bad_tumour_example.tsv | 2 ++ .../input/comparison_good_tumour_example.tsv | 2 ++ .../tests/input/good_tumour_example.tsv | 2 ++ .../tests/local_bad_tumour_example.json | 6 +++++ .../tests/local_good_tumour_example.json | 6 +++++ 6 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 sanity-check/tests/input/bad_tumour_example.tsv create mode 100644 sanity-check/tests/input/comparison_good_tumour_example.tsv create mode 100644 sanity-check/tests/input/good_tumour_example.tsv create mode 100644 sanity-check/tests/local_bad_tumour_example.json create mode 100644 sanity-check/tests/local_good_tumour_example.json diff --git a/sanity-check/main.py b/sanity-check/main.py index b5f3fbe..21e5a26 100755 --- a/sanity-check/main.py +++ b/sanity-check/main.py @@ -58,7 +58,7 @@ def main(): args.clinical_url, args.api_token ) - + final_metadata=compile_metadata( metadata, clinical_metadata @@ -67,6 +67,7 @@ def main(): final_metadata, args.submission_song_url ) + if not args.force: check_analysis_exists( final_metadata, @@ -74,7 +75,7 @@ def main(): ) update_tsv(final_metadata,"updated_"+args.experiment_info_tsv) - + def load_tsv(experiment_info_tsv): metadata_dict = {} @@ -145,12 +146,31 @@ def get_clinical(metadata,clinical_url,api_token): sample_ind=[ele for ele,sample in enumerate(response.json()['specimens'][specimen_ind[0]]['samples']) if sample['sampleId']==return_metadata['sample_id']] if len(sample_ind)!=1: - sys.exit("ID Mismatch detected. Sample_id:'%s'/'%s' was not found within Specimen:'%s'/'%s' 's samples" % (metadata['submitter_sample_id'],return_metadata['sample_id'],metadata['submitter_specimen_id'],return_metadata['specimen_id'],)) + sys.exit("ID Mismatch detected. Sample_id:'%s'/'%s' was not found within Specimen:'%s'/'%s' 's samples" % (metadata['submitter_sample_id'],return_metadata['sample_id'],metadata['submitter_specimen_id'],return_metadata['specimen_id'])) return_metadata['sample_type']=response.json()['specimens'][specimen_ind[0]]['samples'][sample_ind[0]]['sampleType'] return_metadata['submitter_sample_id']=response.json()['specimens'][specimen_ind[0]]['samples'][sample_ind[0]]['submitterId'] + if return_metadata['tumour_normal_designation']=="Tumour" and metadata.get("submitter_matched_normal_sample_id"): + check_tumour_sample_exists(metadata,response.json()) return return_metadata +def check_tumour_sample_exists(metadata,clinical_metadata): + submitter_id=metadata['submitter_matched_normal_sample_id'] + + return_id=None + tumourNormalDesignation=None + + for specimen in clinical_metadata['specimens']: + for samples in specimen['samples']: + if samples['submitterId']==submitter_id: + return_id=samples['submitterId'] + tumourNormalDesignation=specimen["tumourNormalDesignation"] + if return_id==None: + sys.exit("'submitter_matched_normal_sample_id':%s was not found in study. Please verify '%s' has been registered." % (submitter_id,submitter_id)) + if tumourNormalDesignation=="Tumour": + sys.exit("'submitter_matched_normal_sample_id':%s detected as tumour instead of normal. Please verify correct sample." % (submitter_id)) + + def compile_metadata(metadata,clinical_metadata): ###Over-write metadata with clinical_metadata diff --git a/sanity-check/tests/input/bad_tumour_example.tsv b/sanity-check/tests/input/bad_tumour_example.tsv new file mode 100644 index 0000000..342165c --- /dev/null +++ b/sanity-check/tests/input/bad_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 \ No newline at end of file diff --git a/sanity-check/tests/input/comparison_good_tumour_example.tsv b/sanity-check/tests/input/comparison_good_tumour_example.tsv new file mode 100644 index 0000000..e4643fc --- /dev/null +++ b/sanity-check/tests/input/comparison_good_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count gender specimen_tissue_source tumour_normal_designation specimen_type sample_type +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 Female Urine Tumour Metastatic tumour - metastasis to distant location Total RNA diff --git a/sanity-check/tests/input/good_tumour_example.tsv b/sanity-check/tests/input/good_tumour_example.tsv new file mode 100644 index 0000000..cc809e8 --- /dev/null +++ b/sanity-check/tests/input/good_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 \ No newline at end of file diff --git a/sanity-check/tests/local_bad_tumour_example.json b/sanity-check/tests/local_bad_tumour_example.json new file mode 100644 index 0000000..61a3f24 --- /dev/null +++ b/sanity-check/tests/local_bad_tumour_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/bad_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good_example.tsv" +} diff --git a/sanity-check/tests/local_good_tumour_example.json b/sanity-check/tests/local_good_tumour_example.json new file mode 100644 index 0000000..f95725b --- /dev/null +++ b/sanity-check/tests/local_good_tumour_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/good_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good_tumour_example.tsv" +} From d6889c230f936e392b0f2115278f94abf14e7727 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Fri, 16 Jun 2023 10:20:06 -0400 Subject: [PATCH 3/5] add check to only allow `null` for `RNA-Seq` and `Targeted-Seq` --- sanity-check/main.py | 12 ++++++++++-- ...ad_tumour_example.tsv => bad1_tumour_example.tsv} | 0 sanity-check/tests/input/bad2_tumour_example.tsv | 2 ++ .../tests/input/comparison_good1_tumour_example.tsv | 2 ++ .../tests/input/comparison_good2_tumour_example.tsv | 2 ++ ...d_tumour_example.tsv => good1_tumour_example.tsv} | 0 sanity-check/tests/input/good2_tumour_example.tsv | 2 ++ ...ple.json => local_bad_tumour_badRef_example.json} | 2 +- ...le.json => local_bad_tumour_nullWGS_example.json} | 4 ++-- sanity-check/tests/local_good_tumour_ts_example.json | 6 ++++++ .../tests/local_good_tumour_wgs_example.json | 6 ++++++ 11 files changed, 33 insertions(+), 5 deletions(-) rename sanity-check/tests/input/{bad_tumour_example.tsv => bad1_tumour_example.tsv} (100%) create mode 100644 sanity-check/tests/input/bad2_tumour_example.tsv create mode 100644 sanity-check/tests/input/comparison_good1_tumour_example.tsv create mode 100644 sanity-check/tests/input/comparison_good2_tumour_example.tsv rename sanity-check/tests/input/{good_tumour_example.tsv => good1_tumour_example.tsv} (100%) create mode 100644 sanity-check/tests/input/good2_tumour_example.tsv rename sanity-check/tests/{local_bad_tumour_example.json => local_bad_tumour_badRef_example.json} (77%) rename sanity-check/tests/{local_good_tumour_example.json => local_bad_tumour_nullWGS_example.json} (54%) create mode 100644 sanity-check/tests/local_good_tumour_ts_example.json create mode 100644 sanity-check/tests/local_good_tumour_wgs_example.json diff --git a/sanity-check/main.py b/sanity-check/main.py index 21e5a26..eea6113 100755 --- a/sanity-check/main.py +++ b/sanity-check/main.py @@ -149,11 +149,19 @@ def get_clinical(metadata,clinical_url,api_token): sys.exit("ID Mismatch detected. Sample_id:'%s'/'%s' was not found within Specimen:'%s'/'%s' 's samples" % (metadata['submitter_sample_id'],return_metadata['sample_id'],metadata['submitter_specimen_id'],return_metadata['specimen_id'])) return_metadata['sample_type']=response.json()['specimens'][specimen_ind[0]]['samples'][sample_ind[0]]['sampleType'] return_metadata['submitter_sample_id']=response.json()['specimens'][specimen_ind[0]]['samples'][sample_ind[0]]['submitterId'] - if return_metadata['tumour_normal_designation']=="Tumour" and metadata.get("submitter_matched_normal_sample_id"): - check_tumour_sample_exists(metadata,response.json()) + + if return_metadata['tumour_normal_designation']=="Tumour": + #WGS, WXS, RNA-Seq, Bisulfite-Seq, ChIP-Seq, Targeted-Seq + if metadata.get("submitter_matched_normal_sample_id"): + check_tumour_sample_exists(metadata,response.json()) + else: + if metadata.get("experimental_strategy")!="RNA-Seq" and metadata.get("experimental_strategy")!= "Targeted-Seq": + sys.exit("Null entry for `submitter_matched_normal_sample_id` detected. For tumour `experiment_strategy` type %s ,this field is required and must reference a registered normal sample." % (metadata.get("experimental_strategy"))) return return_metadata + + def check_tumour_sample_exists(metadata,clinical_metadata): submitter_id=metadata['submitter_matched_normal_sample_id'] diff --git a/sanity-check/tests/input/bad_tumour_example.tsv b/sanity-check/tests/input/bad1_tumour_example.tsv similarity index 100% rename from sanity-check/tests/input/bad_tumour_example.tsv rename to sanity-check/tests/input/bad1_tumour_example.tsv diff --git a/sanity-check/tests/input/bad2_tumour_example.tsv b/sanity-check/tests/input/bad2_tumour_example.tsv new file mode 100644 index 0000000..834f45a --- /dev/null +++ b/sanity-check/tests/input/bad2_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 diff --git a/sanity-check/tests/input/comparison_good1_tumour_example.tsv b/sanity-check/tests/input/comparison_good1_tumour_example.tsv new file mode 100644 index 0000000..e4643fc --- /dev/null +++ b/sanity-check/tests/input/comparison_good1_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count gender specimen_tissue_source tumour_normal_designation specimen_type sample_type +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 Female Urine Tumour Metastatic tumour - metastasis to distant location Total RNA diff --git a/sanity-check/tests/input/comparison_good2_tumour_example.tsv b/sanity-check/tests/input/comparison_good2_tumour_example.tsv new file mode 100644 index 0000000..5cf6f36 --- /dev/null +++ b/sanity-check/tests/input/comparison_good2_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count gender specimen_tissue_source tumour_normal_designation specimen_type sample_type +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 QCMG ILLUMINA Illumina HiSeq 2000 Targeted-Seq 4 Female Urine Tumour Metastatic tumour - metastasis to distant location Total RNA diff --git a/sanity-check/tests/input/good_tumour_example.tsv b/sanity-check/tests/input/good1_tumour_example.tsv similarity index 100% rename from sanity-check/tests/input/good_tumour_example.tsv rename to sanity-check/tests/input/good1_tumour_example.tsv diff --git a/sanity-check/tests/input/good2_tumour_example.tsv b/sanity-check/tests/input/good2_tumour_example.tsv new file mode 100644 index 0000000..6e23ed7 --- /dev/null +++ b/sanity-check/tests/input/good2_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 QCMG ILLUMINA Illumina HiSeq 2000 Targeted-Seq 4 diff --git a/sanity-check/tests/local_bad_tumour_example.json b/sanity-check/tests/local_bad_tumour_badRef_example.json similarity index 77% rename from sanity-check/tests/local_bad_tumour_example.json rename to sanity-check/tests/local_bad_tumour_badRef_example.json index 61a3f24..e3fd86a 100644 --- a/sanity-check/tests/local_bad_tumour_example.json +++ b/sanity-check/tests/local_bad_tumour_badRef_example.json @@ -1,5 +1,5 @@ { - "experiment_info_tsv": "input/bad_tumour_example.tsv", + "experiment_info_tsv": "input/bad1_tumour_example.tsv", "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", "expected_output": "input/comparison_good_example.tsv" diff --git a/sanity-check/tests/local_good_tumour_example.json b/sanity-check/tests/local_bad_tumour_nullWGS_example.json similarity index 54% rename from sanity-check/tests/local_good_tumour_example.json rename to sanity-check/tests/local_bad_tumour_nullWGS_example.json index f95725b..e3ebd08 100644 --- a/sanity-check/tests/local_good_tumour_example.json +++ b/sanity-check/tests/local_bad_tumour_nullWGS_example.json @@ -1,6 +1,6 @@ { - "experiment_info_tsv": "input/good_tumour_example.tsv", + "experiment_info_tsv": "input/bad2_tumour_example.tsv", "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", - "expected_output": "input/comparison_good_tumour_example.tsv" + "expected_output": "input/comparison_good_example.tsv" } diff --git a/sanity-check/tests/local_good_tumour_ts_example.json b/sanity-check/tests/local_good_tumour_ts_example.json new file mode 100644 index 0000000..5ccb5e7 --- /dev/null +++ b/sanity-check/tests/local_good_tumour_ts_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/good2_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good2_tumour_example.tsv" +} diff --git a/sanity-check/tests/local_good_tumour_wgs_example.json b/sanity-check/tests/local_good_tumour_wgs_example.json new file mode 100644 index 0000000..928f8b5 --- /dev/null +++ b/sanity-check/tests/local_good_tumour_wgs_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/good1_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good1_tumour_example.tsv" +} From e9f83f3ef8c167a9c4a688070eff6581b89f8a51 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Fri, 23 Jun 2023 16:36:36 -0400 Subject: [PATCH 4/5] rename function to `check_normal_sample_exists` --- sanity-check/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sanity-check/main.py b/sanity-check/main.py index eea6113..881cf0c 100755 --- a/sanity-check/main.py +++ b/sanity-check/main.py @@ -152,17 +152,17 @@ def get_clinical(metadata,clinical_url,api_token): if return_metadata['tumour_normal_designation']=="Tumour": #WGS, WXS, RNA-Seq, Bisulfite-Seq, ChIP-Seq, Targeted-Seq - if metadata.get("submitter_matched_normal_sample_id"): - check_tumour_sample_exists(metadata,response.json()) - else: - if metadata.get("experimental_strategy")!="RNA-Seq" and metadata.get("experimental_strategy")!= "Targeted-Seq": + if metadata.get("experimental_strategy") in ['WGS', 'WXS']: + if metadata.get("submitter_matched_normal_sample_id"): + check_normal_sample_exists(metadata,response.json()) + else: sys.exit("Null entry for `submitter_matched_normal_sample_id` detected. For tumour `experiment_strategy` type %s ,this field is required and must reference a registered normal sample." % (metadata.get("experimental_strategy"))) return return_metadata -def check_tumour_sample_exists(metadata,clinical_metadata): +def check_normal_sample_exists(metadata,clinical_metadata): submitter_id=metadata['submitter_matched_normal_sample_id'] return_id=None From c52a2d4b6b9c88b650b8ff2f6603362ec760d781 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Mon, 26 Jun 2023 13:13:27 -0400 Subject: [PATCH 5/5] updated main.py --- sanity-check/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sanity-check/main.py b/sanity-check/main.py index 881cf0c..5e58ada 100755 --- a/sanity-check/main.py +++ b/sanity-check/main.py @@ -152,10 +152,10 @@ def get_clinical(metadata,clinical_url,api_token): if return_metadata['tumour_normal_designation']=="Tumour": #WGS, WXS, RNA-Seq, Bisulfite-Seq, ChIP-Seq, Targeted-Seq - if metadata.get("experimental_strategy") in ['WGS', 'WXS']: - if metadata.get("submitter_matched_normal_sample_id"): - check_normal_sample_exists(metadata,response.json()) - else: + if metadata.get("submitter_matched_normal_sample_id"): + check_normal_sample_exists(metadata,response.json()) + else: + if metadata.get("experimental_strategy") in ['WGS', 'WXS']: sys.exit("Null entry for `submitter_matched_normal_sample_id` detected. For tumour `experiment_strategy` type %s ,this field is required and must reference a registered normal sample." % (metadata.get("experimental_strategy"))) return return_metadata