diff --git a/sanity-check/main.nf b/sanity-check/main.nf index 3709e40..7d36390 100755 --- a/sanity-check/main.nf +++ b/sanity-check/main.nf @@ -24,7 +24,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' +version = '0.1.2' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.sanity-check' diff --git a/sanity-check/main.py b/sanity-check/main.py index b5f3fbe..5e58ada 100755 --- a/sanity-check/main.py +++ b/sanity-check/main.py @@ -58,7 +58,7 @@ def main(): args.clinical_url, args.api_token ) - + final_metadata=compile_metadata( metadata, clinical_metadata @@ -67,6 +67,7 @@ def main(): final_metadata, args.submission_song_url ) + if not args.force: check_analysis_exists( final_metadata, @@ -74,7 +75,7 @@ def main(): ) update_tsv(final_metadata,"updated_"+args.experiment_info_tsv) - + def load_tsv(experiment_info_tsv): metadata_dict = {} @@ -145,12 +146,39 @@ def get_clinical(metadata,clinical_url,api_token): sample_ind=[ele for ele,sample in enumerate(response.json()['specimens'][specimen_ind[0]]['samples']) if sample['sampleId']==return_metadata['sample_id']] if len(sample_ind)!=1: - sys.exit("ID Mismatch detected. Sample_id:'%s'/'%s' was not found within Specimen:'%s'/'%s' 's samples" % (metadata['submitter_sample_id'],return_metadata['sample_id'],metadata['submitter_specimen_id'],return_metadata['specimen_id'],)) + sys.exit("ID Mismatch detected. Sample_id:'%s'/'%s' was not found within Specimen:'%s'/'%s' 's samples" % (metadata['submitter_sample_id'],return_metadata['sample_id'],metadata['submitter_specimen_id'],return_metadata['specimen_id'])) return_metadata['sample_type']=response.json()['specimens'][specimen_ind[0]]['samples'][sample_ind[0]]['sampleType'] return_metadata['submitter_sample_id']=response.json()['specimens'][specimen_ind[0]]['samples'][sample_ind[0]]['submitterId'] + if return_metadata['tumour_normal_designation']=="Tumour": + #WGS, WXS, RNA-Seq, Bisulfite-Seq, ChIP-Seq, Targeted-Seq + if metadata.get("submitter_matched_normal_sample_id"): + check_normal_sample_exists(metadata,response.json()) + else: + if metadata.get("experimental_strategy") in ['WGS', 'WXS']: + sys.exit("Null entry for `submitter_matched_normal_sample_id` detected. For tumour `experiment_strategy` type %s ,this field is required and must reference a registered normal sample." % (metadata.get("experimental_strategy"))) + return return_metadata + + +def check_normal_sample_exists(metadata,clinical_metadata): + submitter_id=metadata['submitter_matched_normal_sample_id'] + + return_id=None + tumourNormalDesignation=None + + for specimen in clinical_metadata['specimens']: + for samples in specimen['samples']: + if samples['submitterId']==submitter_id: + return_id=samples['submitterId'] + tumourNormalDesignation=specimen["tumourNormalDesignation"] + if return_id==None: + sys.exit("'submitter_matched_normal_sample_id':%s was not found in study. Please verify '%s' has been registered." % (submitter_id,submitter_id)) + if tumourNormalDesignation=="Tumour": + sys.exit("'submitter_matched_normal_sample_id':%s detected as tumour instead of normal. Please verify correct sample." % (submitter_id)) + + def compile_metadata(metadata,clinical_metadata): ###Over-write metadata with clinical_metadata diff --git a/sanity-check/pkg.json b/sanity-check/pkg.json index a700cc4..76ec072 100644 --- a/sanity-check/pkg.json +++ b/sanity-check/pkg.json @@ -1,6 +1,6 @@ { "name": "sanity-check", - "version": "0.1.1", + "version": "0.1.2", "description": "Data submission sanity checks including verifying info from Clinical API", "main": "main.nf", "deprecated": false, diff --git a/sanity-check/tests/checker.nf b/sanity-check/tests/checker.nf index 96c1fce..c54bd4f 100755 --- a/sanity-check/tests/checker.nf +++ b/sanity-check/tests/checker.nf @@ -29,7 +29,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' +version = '0.1.2' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.sanity-check' diff --git a/sanity-check/tests/input/bad1_tumour_example.tsv b/sanity-check/tests/input/bad1_tumour_example.tsv new file mode 100644 index 0000000..342165c --- /dev/null +++ b/sanity-check/tests/input/bad1_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 \ No newline at end of file diff --git a/sanity-check/tests/input/bad2_tumour_example.tsv b/sanity-check/tests/input/bad2_tumour_example.tsv new file mode 100644 index 0000000..834f45a --- /dev/null +++ b/sanity-check/tests/input/bad2_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 diff --git a/sanity-check/tests/input/comparison_good1_tumour_example.tsv b/sanity-check/tests/input/comparison_good1_tumour_example.tsv new file mode 100644 index 0000000..e4643fc --- /dev/null +++ b/sanity-check/tests/input/comparison_good1_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count gender specimen_tissue_source tumour_normal_designation specimen_type sample_type +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 Female Urine Tumour Metastatic tumour - metastasis to distant location Total RNA diff --git a/sanity-check/tests/input/comparison_good2_tumour_example.tsv b/sanity-check/tests/input/comparison_good2_tumour_example.tsv new file mode 100644 index 0000000..5cf6f36 --- /dev/null +++ b/sanity-check/tests/input/comparison_good2_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count gender specimen_tissue_source tumour_normal_designation specimen_type sample_type +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 QCMG ILLUMINA Illumina HiSeq 2000 Targeted-Seq 4 Female Urine Tumour Metastatic tumour - metastasis to distant location Total RNA diff --git a/sanity-check/tests/input/comparison_good_tumour_example.tsv b/sanity-check/tests/input/comparison_good_tumour_example.tsv new file mode 100644 index 0000000..e4643fc --- /dev/null +++ b/sanity-check/tests/input/comparison_good_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count gender specimen_tissue_source tumour_normal_designation specimen_type sample_type +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 Female Urine Tumour Metastatic tumour - metastasis to distant location Total RNA diff --git a/sanity-check/tests/input/good1_tumour_example.tsv b/sanity-check/tests/input/good1_tumour_example.tsv new file mode 100644 index 0000000..cc809e8 --- /dev/null +++ b/sanity-check/tests/input/good1_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuN1 QCMG ILLUMINA Illumina HiSeq 2000 WGS 4 \ No newline at end of file diff --git a/sanity-check/tests/input/good2_tumour_example.tsv b/sanity-check/tests/input/good2_tumour_example.tsv new file mode 100644 index 0000000..6e23ed7 --- /dev/null +++ b/sanity-check/tests/input/good2_tumour_example.tsv @@ -0,0 +1,2 @@ +type program_id submitter_sequencing_experiment_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment TEST-CA EXP-9 TEST_SUBMITTER_DONOR_ID_fpkyjwkleu TEST_SUBMITTER_SPECIMEN_ID_fpkyjwkleuT1 TEST_SUBMITTER_SAMPLE_ID_fpkyjwkleuT1 QCMG ILLUMINA Illumina HiSeq 2000 Targeted-Seq 4 diff --git a/sanity-check/tests/local_bad_tumour_badRef_example.json b/sanity-check/tests/local_bad_tumour_badRef_example.json new file mode 100644 index 0000000..e3fd86a --- /dev/null +++ b/sanity-check/tests/local_bad_tumour_badRef_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/bad1_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good_example.tsv" +} diff --git a/sanity-check/tests/local_bad_tumour_nullWGS_example.json b/sanity-check/tests/local_bad_tumour_nullWGS_example.json new file mode 100644 index 0000000..e3ebd08 --- /dev/null +++ b/sanity-check/tests/local_bad_tumour_nullWGS_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/bad2_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good_example.tsv" +} diff --git a/sanity-check/tests/local_good_tumour_ts_example.json b/sanity-check/tests/local_good_tumour_ts_example.json new file mode 100644 index 0000000..5ccb5e7 --- /dev/null +++ b/sanity-check/tests/local_good_tumour_ts_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/good2_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good2_tumour_example.tsv" +} diff --git a/sanity-check/tests/local_good_tumour_wgs_example.json b/sanity-check/tests/local_good_tumour_wgs_example.json new file mode 100644 index 0000000..928f8b5 --- /dev/null +++ b/sanity-check/tests/local_good_tumour_wgs_example.json @@ -0,0 +1,6 @@ +{ + "experiment_info_tsv": "input/good1_tumour_example.tsv", + "song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org", + "clinical_url": "https://clinical.qa.argo.cancercollaboratory.org", + "expected_output": "input/comparison_good1_tumour_example.tsv" +}