Skip to content

Commit

Permalink
Merge pull request #332 from susannasiebert/frameshift_error
Browse files Browse the repository at this point in the history
Fix bug with handling certain frameshift variants
  • Loading branch information
susannasiebert authored Aug 30, 2017
2 parents 6480a00 + ad930b0 commit 57574f0
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pvacseq/lib/generate_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ def main(args_input = sys.argv[1:]):
variant_type = line['variant_type']
full_wildtype_sequence = line['wildtype_amino_acid_sequence']
if variant_type == 'FS':
position = int(line['protein_position'].split('-', 1)[0]) - 1
if line['amino_acid_change'] is not None and line['amino_acid_change'].split('/')[0] == '-':
position = int(line['protein_position'].split('-', 1)[0])
else:
position = int(line['protein_position'].split('-', 1)[0]) - 1
elif variant_type == 'missense' or variant_type == 'inframe_ins':
wildtype_amino_acid, mutant_amino_acid = line['amino_acid_change'].split('/')
if wildtype_amino_acid == '-':
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index
12 62381609 62381609 G GT USP15 ENST00000280377 -/X ENSG00000135655 MAEGGAADLDTQRSDIATLLKTSLRKGDTWYLVDSRWFKQWKKYVGFDSWDKYQMGDQNVYPGPIDNSGLLKDGDAQSLKEHLIDELDYILLPTEGWNKLVSWYTLMEGQEPIARKVVEQGMFVKHCKVEVYLTELKLCENGNMNNVVTRRFSKADTIDTIEKEIRKIFSIPDEKETRLWNKYMSNTFEPLNKPDSTIQDAGLYQGQVLVIEQKNEDGTWPRGPSTPKSPGASNFSTLPKISPSSLSNNYNNMNNRNVKNSNYCLPSYTAYKNYDYSEPGRNNEQPGLCGLSNLGNTCFMNSAIQCLSNTPPLTEYFLNDKYQEELNFDNPLGMRGEIAKSYAELIKQMWSGKFSYVTPRAFKTQVGRFAPQFSGYQQQDCQELLAFLLDGLHEDLNRIRKKPYIQLKDADGRPDKVVAEEAWENHLKRNDSIIVDIFHGLFKSTLVCPECAKISVTFDPFCYLTLPLPMKKERTLEVYLVRMDPLTKPMQYKVVVPKIGNILDLCTALSALSGIPADKMIVTDIYNHRFHRIFAMDENLSSIMERDDIYVFEININRTEDTEHVIIPVCLREKFRHSSYTHHTGSSLFGQPFLMAVPRNNTEDKLYNLLLLRMCRYVKISTETEETEGSLHCCKDQNINGNGPNGIHEEGSPSEMETDEPDDESSQDQELPSENENSQSEDSVGGDNDSENGLCTEDTCKGQLTGHKKRLFTFQFNNLGNTDINYIKDDTRHIRFDDRQLRLDERSFLALDWDPDLKKRYFDENAAEDFEKHESVEYKPPKKPFVKLKDCIELFTTKEKLGAEDPWYCPNCKEHQQATKKLDLWSLPPVLVVHLKRFSYSRYMRDKLDTLVDFPINDLDMSEFLINPNAGPCRYNLIAVSNHYGGMGGGHYTAFAKNKDDGKWYYFDDSSVSTASEDQIVSKAAYVLFYQRQDTFSGTGFFPLDRETKGASAATGIPLESDEDSNDNDNDIENENCMHTN YQANVVWKV FS 345-346 NA NA NA NA NA NA NA NA USP15_ENST00000280377_1.FS.345-346
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>1
GEIAKSYAELIKQMWSGKFS
>2
GEIAKSYAELYQANVVWKV
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1:
- WT.USP15_ENST00000280377_1.FS.345-346
2:
- MT.USP15_ENST00000280377_1.FS.345-346
19 changes: 19 additions & 0 deletions tests/test_generate_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,25 @@ def test_input_file_with_frameshift_variant_range_generates_expected_file(self):
expected_key_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_range.key')
self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file))

def test_input_file_with_frameshift_variant_position_shift_generates_expected_file(self):
generate_fasta_input_file = os.path.join(self.test_data_dir, 'input_frameshift_variant_position_shift.tsv')
generate_fasta_output_file = tempfile.NamedTemporaryFile()
generate_fasta_key_output_file = tempfile.NamedTemporaryFile()

self.assertFalse(call([
self.python,
self.executable,
generate_fasta_input_file,
self.peptide_sequence_length,
self.epitope_length,
generate_fasta_output_file.name,
generate_fasta_key_output_file.name,
], shell=False))
expected_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_position_shift.fasta')
self.assertTrue(cmp(generate_fasta_output_file.name, expected_output_file))
expected_key_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_position_shift.key')
self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file))

def test_input_file_with_sequence_containing_asterisk(self):
generate_fasta_input_file = os.path.join(self.test_data_dir, 'input_asterisk_sequence.tsv')
generate_fasta_output_file = tempfile.NamedTemporaryFile()
Expand Down

0 comments on commit 57574f0

Please sign in to comment.