From ec014f7a508c5cb662acb109c342061177065437 Mon Sep 17 00:00:00 2001 From: RSWilson1 Date: Mon, 29 Jul 2024 14:24:42 +0100 Subject: [PATCH 1/6] Remove unknown contigs from final BED file. --- gene_annotation2bed.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/gene_annotation2bed.py b/gene_annotation2bed.py index a04d90c..ec5894d 100644 --- a/gene_annotation2bed.py +++ b/gene_annotation2bed.py @@ -848,6 +848,15 @@ def write_bed(annotation_df: pd.DataFrame, ) # Merge overlapping entries collapsed_df = merge_overlapping(joint_bed_df) + # removing unknown contigs and raise in terminal + filtered_collapsed_df = collapsed_df[~collapsed_df["chromosome"].str.startswith('Unknown')] + + # Print all unknown contigs + print("Unknown contigs in the BED file:") + unknown_contigs = collapsed_df[collapsed_df["chromosome"].str.startswith('Unknown')] + for contig in unknown_contigs["chromosome"].unique(): + print(contig) + print(f"Total unknown contig rows: {len(unknown_contigs)}") # Write the collapsed data to an output file output_file_name_maf = ( f"output_{args.genome_build}_{args.output_file_suffix}.maf" @@ -855,10 +864,10 @@ def write_bed(annotation_df: pd.DataFrame, output_file_name_bed = ( f"output_{args.genome_build}_{args.output_file_suffix}.bed" ) - collapsed_df.to_csv(output_file_name_maf, sep="\t", - header=True, index=False) - collapsed_df.to_csv(output_file_name_bed, sep="\t", - header=False, index=False) + filtered_collapsed_df.to_csv(output_file_name_maf, sep="\t", + header=True, index=False) + filtered_collapsed_df.to_csv(output_file_name_bed, sep="\t", + header=False, index=False) def main(): From a70a23e12c9e6063d68be5dbc0468a113ff9fbb9 Mon Sep 17 00:00:00 2001 From: RSWilson1 Date: Mon, 29 Jul 2024 14:25:05 +0100 Subject: [PATCH 2/6] Fix file name for creating IGV report --- scripts/igv_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/igv_report.py b/scripts/igv_report.py index 9f1c0fd..a2ef678 100644 --- a/scripts/igv_report.py +++ b/scripts/igv_report.py @@ -49,7 +49,7 @@ def create_igv_report(bed_file: str, maf_file: str, { "name": 'BED', "type": '', - "url": f'{bed_file}.gz', + "url": f'{bed_file}.sorted.gz', "indexURL": f'{bed_file}.gz.tbi' }, ] From ed12bef58a10982ea60d0bce6dcdebb9cd306301 Mon Sep 17 00:00:00 2001 From: RSWilson1 Date: Thu, 1 Aug 2024 14:37:11 +0100 Subject: [PATCH 3/6] Updated yaml for getting python version to run with GitHub actions --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 182f973..11cbb92 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -9,7 +9,7 @@ jobs: - name: Set up Python 3.12.2 uses: actions/setup-python@v1 with: - python-version: 3.12.2 + python-version: "3.12.2" - name: Install dependencies run: | python -m pip install --upgrade pip From e66de17f8c716b3d33c13c75c7e561f3839e5090 Mon Sep 17 00:00:00 2001 From: RSWilson1 Date: Thu, 1 Aug 2024 14:43:52 +0100 Subject: [PATCH 4/6] allow prereleased to fix not finding python version. --- .github/workflows/pytest.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 11cbb92..6da41dd 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -10,6 +10,7 @@ jobs: uses: actions/setup-python@v1 with: python-version: "3.12.2" + allow-prereleases: true - name: Install dependencies run: | python -m pip install --upgrade pip From 038d1c801a91f53c01ee342968570c184d91fcd1 Mon Sep 17 00:00:00 2001 From: RSWilson1 Date: Thu, 1 Aug 2024 14:44:42 +0100 Subject: [PATCH 5/6] Use less specific python version --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 6da41dd..5d724d7 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -9,7 +9,7 @@ jobs: - name: Set up Python 3.12.2 uses: actions/setup-python@v1 with: - python-version: "3.12.2" + python-version: "3.12" allow-prereleases: true - name: Install dependencies run: | From b99a3adda12da43fe1a9d06ae7f04309874f03f7 Mon Sep 17 00:00:00 2001 From: RSWilson1 Date: Fri, 2 Aug 2024 14:22:41 +0100 Subject: [PATCH 6/6] Updated print statement to be clear and show the associated gene symbol as well. --- gene_annotation2bed.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gene_annotation2bed.py b/gene_annotation2bed.py index ec5894d..ecbe9a1 100644 --- a/gene_annotation2bed.py +++ b/gene_annotation2bed.py @@ -849,13 +849,16 @@ def write_bed(annotation_df: pd.DataFrame, # Merge overlapping entries collapsed_df = merge_overlapping(joint_bed_df) # removing unknown contigs and raise in terminal + print(collapsed_df.head()) + print(collapsed_df.tail()) filtered_collapsed_df = collapsed_df[~collapsed_df["chromosome"].str.startswith('Unknown')] # Print all unknown contigs print("Unknown contigs in the BED file:") unknown_contigs = collapsed_df[collapsed_df["chromosome"].str.startswith('Unknown')] - for contig in unknown_contigs["chromosome"].unique(): - print(contig) + print(f"These rows will not be present in the final bed file due to unknown contigs \n") + for _, row in unknown_contigs.iterrows(): + print(f"{row['chromosome']} - {row['gene']}") print(f"Total unknown contig rows: {len(unknown_contigs)}") # Write the collapsed data to an output file output_file_name_maf = (