Skip to content

Commit

Permalink
Merge pull request #242 from susannasiebert/fusions
Browse files Browse the repository at this point in the history
Integrate fusion processing into the pipeline
  • Loading branch information
susannasiebert authored Jan 6, 2017
2 parents 1b4961a + 49ea946 commit cbf0a94
Show file tree
Hide file tree
Showing 14 changed files with 677 additions and 17 deletions.
12 changes: 11 additions & 1 deletion pvacseq/lib/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def define_parser():

parser.add_argument(
"input_file",
help="A VEP-annotated single-sample VCF containing transcript, Wildtype protein sequence, and Downstream protein sequence information"
help="The variant input file to process. This can either be a VEP-annotated single-sample VCF "
+ "containing transcript, Wildtype protein sequence, and Downstream protein sequence information, "
+ "or a INTEGRATE-Neo bedpe file with fusions."
)
parser.add_argument(
"sample_name",
Expand Down Expand Up @@ -183,6 +185,13 @@ def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

if args.input_file.endswith('.vcf'):
input_file_type = 'vcf'
elif args.input_file.endswith('.bedpe'):
input_file_type = 'bedpe'
else:
sys.exit("Unknown input file type for file (%s). Input file must be either a VCF (.vcf) or a bedpe (.bedpe) file." % input_file)

PredictionClass.check_alleles_valid(args.allele)

if "." in args.sample_name:
Expand Down Expand Up @@ -223,6 +232,7 @@ def main(args_input = sys.argv[1:]):

shared_arguments = {
'input_file' : args.input_file,
'input_file_type' : input_file_type,
'sample_name' : args.sample_name,
'top_result_per_mutation' : args.top_result_per_mutation,
'top_score_metric' : args.top_score_metric,
Expand Down
49 changes: 42 additions & 7 deletions pvacseq/lib/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def status_message(msg):
class Pipeline(metaclass=ABCMeta):
def __init__(self, **kwargs):
self.input_file = kwargs['input_file']
self.input_file_type = kwargs['input_file_type']
self.sample_name = kwargs['sample_name']
self.alleles = kwargs['alleles']
self.prediction_algorithms = kwargs['prediction_algorithms']
Expand Down Expand Up @@ -59,8 +60,39 @@ def tsv_file_path(self):
tsv_file = self.sample_name + '.tsv'
return os.path.join(self.output_dir, tsv_file)

def converter(self, params):
converter_types = {
'vcf' : 'VcfConverter',
'bedpe': 'IntegrateConverter',
}
converter_type = converter_types[self.input_file_type]
converter = getattr(sys.modules[__name__], converter_type)
return converter(**params)

def fasta_generator(self, params):
generator_types = {
'vcf' : 'FastaGenerator',
'bedpe': 'FusionFastaGenerator',
}
generator_type = generator_types[self.input_file_type]
generator = getattr(sys.modules[__name__], generator_type)
return generator(**params)

def output_parser(self, params):
parser_types = {
'vcf' : 'DefaultOutputParser',
'bedpe': 'FusionOutputParser',
}
parser_type = parser_types[self.input_file_type]
parser = getattr(sys.modules[__name__], parser_type)
return parser(**params)

def tsv_file_path(self):
tsv_file = self.sample_name + '.tsv'
return os.path.join(self.output_dir, tsv_file)

def convert_vcf(self):
status_message("Converting VCF to TSV")
status_message("Converting .%s to TSV" % self.input_file_type)
if os.path.exists(self.tsv_file_path()):
status_message("TSV file already exists. Skipping.")
return
Expand All @@ -84,7 +116,7 @@ def convert_vcf(self):
else:
convert_params[attribute] = None

converter = VcfConverter(**convert_params)
converter = self.converter(convert_params)
converter.execute()
print("Completed")

Expand Down Expand Up @@ -246,7 +278,10 @@ def execute(self):

total_row_count = self.tsv_entry_count()
if total_row_count == 0:
sys.exit("The TSV file is empty. Please check that the input VCF contains missense, inframe indel, or frameshift mutations.")
if self.input_file_type == 'vcf':
sys.exit("The TSV file is empty. Please check that the input VCF contains missense, inframe indel, or frameshift mutations.")
elif self.input_file_type == 'bedpe':
sys.exit("The TSV file is empty. Please check that the input bedpe file contains fusion entries.")
chunks = self.split_tsv_file(total_row_count)

self.generate_fasta(chunks)
Expand Down Expand Up @@ -324,7 +359,7 @@ def generate_fasta(self, chunks):
'output_key_file' : split_fasta_key_file_path,
'downstream_sequence_length': self.downstream_sequence_length,
}
fasta_generator = FastaGenerator(**generate_fasta_params)
fasta_generator = self.fasta_generator(generate_fasta_params)
fasta_generator.execute()
status_message("Completed")

Expand Down Expand Up @@ -387,7 +422,7 @@ def call_iedb_and_parse_outputs(self, chunks):
'top_score_metric' : self.top_score_metric,
'top_result_per_mutation': self.top_result_per_mutation
}
parser = DefaultOutputParser(**params)
parser = self.output_parser(params)
parser.execute()
status_message("Completed")
split_parsed_output_files.append(split_parsed_file_path)
Expand Down Expand Up @@ -418,7 +453,7 @@ def generate_fasta(self, chunks):
'output_key_file' : split_fasta_key_file_path,
'downstream_sequence_length': self.downstream_sequence_length,
}
fasta_generator = FastaGenerator(**generate_fasta_params)
fasta_generator = self.fasta_generator(generate_fasta_params)
fasta_generator.execute()
status_message("Completed")

Expand Down Expand Up @@ -475,7 +510,7 @@ def call_iedb_and_parse_outputs(self, chunks):
'top_score_metric' : self.top_score_metric,
'top_result_per_mutation': self.top_result_per_mutation
}
parser = DefaultOutputParser(**params)
parser = self.output_parser(params)
parser.execute()
status_message("Completed")
split_parsed_output_files.append(split_parsed_file_path)
Expand Down
Loading

0 comments on commit cbf0a94

Please sign in to comment.