Skip to content

Commit

Permalink
Merge pull request #11 from eastgenomics/removing-unknown-contigs
Browse files Browse the repository at this point in the history
Removing unmapped contigs
  • Loading branch information
jethror1 authored Aug 2, 2024
2 parents 8d77f1f + b99a3ad commit 1866644
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ jobs:
- name: Set up Python 3.12.2
uses: actions/setup-python@v1
with:
python-version: 3.12.2
python-version: "3.12"
allow-prereleases: true
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
20 changes: 16 additions & 4 deletions gene_annotation2bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,17 +848,29 @@ def write_bed(annotation_df: pd.DataFrame,
)
# Merge overlapping entries
collapsed_df = merge_overlapping(joint_bed_df)
# removing unknown contigs and raise in terminal
print(collapsed_df.head())
print(collapsed_df.tail())
filtered_collapsed_df = collapsed_df[~collapsed_df["chromosome"].str.startswith('Unknown')]

# Print all unknown contigs
print("Unknown contigs in the BED file:")
unknown_contigs = collapsed_df[collapsed_df["chromosome"].str.startswith('Unknown')]
print(f"These rows will not be present in the final bed file due to unknown contigs \n")
for _, row in unknown_contigs.iterrows():
print(f"{row['chromosome']} - {row['gene']}")
print(f"Total unknown contig rows: {len(unknown_contigs)}")
# Write the collapsed data to an output file
output_file_name_maf = (
f"output_{args.genome_build}_{args.output_file_suffix}.maf"
)
output_file_name_bed = (
f"output_{args.genome_build}_{args.output_file_suffix}.bed"
)
collapsed_df.to_csv(output_file_name_maf, sep="\t",
header=True, index=False)
collapsed_df.to_csv(output_file_name_bed, sep="\t",
header=False, index=False)
filtered_collapsed_df.to_csv(output_file_name_maf, sep="\t",
header=True, index=False)
filtered_collapsed_df.to_csv(output_file_name_bed, sep="\t",
header=False, index=False)


def main():
Expand Down
2 changes: 1 addition & 1 deletion scripts/igv_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def create_igv_report(bed_file: str, maf_file: str,
{
"name": 'BED',
"type": '',
"url": f'{bed_file}.gz',
"url": f'{bed_file}.sorted.gz',
"indexURL": f'{bed_file}.gz.tbi'
},
]
Expand Down

0 comments on commit 1866644

Please sign in to comment.