diff --git a/docs/index.rst b/docs/index.rst
index 19f7cc3cd..dc1e8877a 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -69,6 +69,7 @@ Project Info
     step/ngs_data_qc
     step/ngs_mapping
     step/somatic_gene_fusion_calling
+    step/somatic_neoepitope_prediction
     step/somatic_purity_ploidy_estimate
     step/somatic_targeted_seq_cnv_calling
     step/somatic_variant_annotation
diff --git a/snappy_pipeline/apps/snappy_snake.py b/snappy_pipeline/apps/snappy_snake.py
index f5f8bf3c2..74bd46e9d 100644
--- a/snappy_pipeline/apps/snappy_snake.py
+++ b/snappy_pipeline/apps/snappy_snake.py
@@ -35,6 +35,7 @@
     somatic_gene_fusion_calling,
     somatic_hla_loh_calling,
     somatic_msi_calling,
+    somatic_neoepitope_prediction,
     somatic_purity_ploidy_estimate,
     somatic_targeted_seq_cnv_calling,
     somatic_variant_annotation,
@@ -86,6 +87,7 @@
     "somatic_gene_fusion_calling": somatic_gene_fusion_calling,
     "somatic_hla_loh_calling": somatic_hla_loh_calling,
     "somatic_msi_calling": somatic_msi_calling,
+    "somatic_neoepitope_prediction": somatic_neoepitope_prediction,
     "somatic_purity_ploidy_estimate": somatic_purity_ploidy_estimate,
     "somatic_targeted_seq_cnv_calling": somatic_targeted_seq_cnv_calling,
     "somatic_variant_annotation": somatic_variant_annotation,
diff --git a/snappy_pipeline/models/annotation.py b/snappy_pipeline/models/annotation.py
index 468f0bf96..22d7d1a9c 100644
--- a/snappy_pipeline/models/annotation.py
+++ b/snappy_pipeline/models/annotation.py
@@ -35,4 +35,7 @@ class Vep(SnappyModel):
     ]
     num_threads: int = 8
     buffer_size: int = 1000
+    plugins: list[str] = []
+    """To use this option in VEP, you should download the plugin repository from the link https://github.com/Ensembl/VEP_plugins"""
+    plugins_dir: str = ""
     output_options: list[str] = ["everything"]
diff --git a/snappy_pipeline/workflow_model.py b/snappy_pipeline/workflow_model.py
index aeda7682e..1b1a7fefd 100644
--- a/snappy_pipeline/workflow_model.py
+++ b/snappy_pipeline/workflow_model.py
@@ -36,6 +36,9 @@
 from snappy_pipeline.workflows.somatic_variant_filtration.model import SomaticVariantFiltration
 from snappy_pipeline.workflows.somatic_variant_signatures.model import SomaticVariantSignatures
 from snappy_pipeline.workflows.somatic_wgs_cnv_calling.model import SomaticWgsCnvCalling
+from snappy_pipeline.workflows.somatic_neoepitope_prediction.model import (
+    SomaticNeoepitopePrediction,
+)
 from snappy_pipeline.workflows.somatic_wgs_sv_calling.model import SomaticWgsSvCalling
 from snappy_pipeline.workflows.sv_calling_targeted.model import SvCallingTargeted
 from snappy_pipeline.workflows.sv_calling_wgs.model import SvCallingWgs
@@ -113,6 +116,7 @@ class StepConfig(TypedDict, total=False):
     somatic_gene_fusion_calling: SomaticGeneFusionCalling
     somatic_hla_loh_calling: SomaticHlaLohCalling
     somatic_msi_calling: SomaticMsiCalling
+    somatic_neoepitope_prediction: SomaticNeoepitopePrediction
     somatic_purity_ploidy_estimate: SomaticPurityPloidyEstimate
     somatic_targeted_seq_cnv_calling: SomaticTargetedSeqCnvCalling
     somatic_variant_annotation: SomaticVariantAnnotation
diff --git a/snappy_pipeline/workflows/hla_typing/Snakefile b/snappy_pipeline/workflows/hla_typing/Snakefile
index 0f627c46d..3dc829c5a 100644
--- a/snappy_pipeline/workflows/hla_typing/Snakefile
+++ b/snappy_pipeline/workflows/hla_typing/Snakefile
@@ -67,51 +67,53 @@ rule hla_typing_link_out_run:
 
 # OptiType --------------------------------------------------------------------
 
-
-rule hla_typing_optitype_run:
-    input:
-        **wf.get_input_files("optitype", "run"),
-    output:
-        **wf.get_output_files("optitype", "run"),
-    threads: wf.get_resource("optitype", "run", "threads")
-    resources:
-        time=wf.get_resource("optitype", "run", "time"),
-        memory=wf.get_resource("optitype", "run", "memory"),
-        partition=wf.get_resource("optitype", "run", "partition"),
-        tmpdir=wf.get_resource("optitype", "run", "tmpdir"),
-    log:
-        **wf.get_log_file("optitype", "run"),
-    params:
-        args=wf.substep_dispatch("optitype", "get_args", "run"),
-    wrapper:
-        wf.wrapper_path("optitype")
+if "optitype" in wf.w_config.step_config["hla_typing"].tools:
+
+    rule hla_typing_optitype_run:
+        input:
+            **wf.get_input_files("optitype", "run"),
+        output:
+            **wf.get_output_files("optitype", "run"),
+        threads: wf.get_resource("optitype", "run", "threads")
+        resources:
+            time=wf.get_resource("optitype", "run", "time"),
+            memory=wf.get_resource("optitype", "run", "memory"),
+            partition=wf.get_resource("optitype", "run", "partition"),
+            tmpdir=wf.get_resource("optitype", "run", "tmpdir"),
+        log:
+            **wf.get_log_file("optitype", "run"),
+        params:
+            args=wf.substep_dispatch("optitype", "get_args", "run"),
+        wrapper:
+            wf.wrapper_path("optitype")
 
 
 # arcasHLA --------------------------------------------------------------------
 
 
 # NB: reference is updated in the installed package
-rule hla_typing_arcashla_prepare_reference:
-    output:
-        touch("work/arcashla.prepare_reference/out/.done"),
-    log:
-        "work/arcashla.prepare_reference/log/arcashla.prepare_reference.log",
-    wrapper:
-        wf.wrapper_path("arcashla/prepare_reference")
-
-
-rule hla_typing_arcashla_run:
-    input:
-        unpack(wf.get_input_files("arcashla", "run")),
-    output:
-        **wf.get_output_files("arcashla", "run"),
-    threads: wf.get_resource("arcashla", "run", "threads")
-    resources:
-        time=wf.get_resource("arcashla", "run", "time"),
-        memory=wf.get_resource("arcashla", "run", "memory"),
-        partition=wf.get_resource("arcashla", "run", "partition"),
-        tmpdir=wf.get_resource("arcashla", "run", "tmpdir"),
-    log:
-        wf.get_log_file("arcashla", "run"),
-    wrapper:
-        wf.wrapper_path("arcashla/run")
+if "arcashla" in wf.w_config.step_config["hla_typing"].tools:
+
+    rule hla_typing_arcashla_prepare_reference:
+        output:
+            touch("work/arcashla.prepare_reference/out/.done"),
+        log:
+            "work/arcashla.prepare_reference/log/arcashla.prepare_reference.log",
+        wrapper:
+            wf.wrapper_path("arcashla/prepare_reference")
+
+    rule hla_typing_arcashla_run:
+        input:
+            unpack(wf.get_input_files("arcashla", "run")),
+        output:
+            **wf.get_output_files("arcashla", "run"),
+        threads: wf.get_resource("arcashla", "run", "threads")
+        resources:
+            time=wf.get_resource("arcashla", "run", "time"),
+            memory=wf.get_resource("arcashla", "run", "memory"),
+            partition=wf.get_resource("arcashla", "run", "partition"),
+            tmpdir=wf.get_resource("arcashla", "run", "tmpdir"),
+        log:
+            wf.get_log_file("arcashla", "run"),
+        wrapper:
+            wf.wrapper_path("arcashla/run")
diff --git a/snappy_pipeline/workflows/somatic_neoepitope_prediction/Snakefile b/snappy_pipeline/workflows/somatic_neoepitope_prediction/Snakefile
new file mode 100644
index 000000000..7b18f7238
--- /dev/null
+++ b/snappy_pipeline/workflows/somatic_neoepitope_prediction/Snakefile
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+"""CUBI Pipeline somatic neoepitope prediction step Snakefile"""
+
+import os
+
+from snappy_pipeline import expand_ref
+from snappy_pipeline.workflows.somatic_neoepitope_prediction import (
+    SomaticNeoepitopePredictionWorkflow,
+)
+
+__author__ = "Pham Gia Cuong"
+
+
+# Configuration ===============================================================
+
+
+configfile: "config.yaml"
+
+
+# Expand "$ref" JSON pointers in configuration (also works for YAML)
+config, lookup_paths, config_paths = expand_ref("config.yaml", config)
+
+# WorkflowImpl Object Setup ===================================================
+wf = SomaticNeoepitopePredictionWorkflow(workflow, config, lookup_paths, config_paths, os.getcwd())
+
+
+localrules:
+    # Linking files from work/ to output/ should be done locally
+    somatic_neoepitope_preparation_link_out_run,
+
+
+rule all:
+    input:
+        wf.get_result_files(),
+
+
+# Generic linking out ---------------------------------------------------------
+
+
+rule somatic_neoepitope_preparation_link_out_run:
+    input:
+        wf.get_input_files("link_out", "run"),
+    output:
+        wf.get_output_files("link_out", "run"),
+    run:
+        shell(wf.get_shell_cmd("link_out", "run", wildcards))
+
+
+rule somatic_neoepitope_prediction_pvactools_install:
+    output:
+        **wf.get_output_files("pvacseq", "install"),
+    params:
+        container="docker://griffithlab/pvactools",
+    resources:
+        time=wf.get_resource("pvacseq", "install", "time"),
+        memory=wf.get_resource("pvacseq", "install", "memory"),
+        partition=wf.get_resource("pvacseq", "install", "partition"),
+    log:
+        **wf.get_log_file("pvacseq", "install"),
+    wrapper:
+        wf.wrapper_path("singularity")
+
+
+rule somatic_neoepitope_preparation:
+    input:
+        unpack(wf.get_input_files("pvacseq", "prepare")),
+    output:
+        **wf.get_output_files("pvacseq", "prepare"),
+    log:
+        **wf.get_log_file("pvacseq", "prepare"),
+    threads: wf.get_resource("pvacseq", "prepare", "threads")
+    resources:
+        time=wf.get_resource("pvacseq", "prepare", "time"),
+        memory=wf.get_resource("pvacseq", "prepare", "memory"),
+        partition=wf.get_resource("pvacseq", "prepare", "partition"),
+        tmpdir=wf.get_resource("pvacseq", "prepare", "tmpdir"),
+    params:
+        **{"args": wf.get_params("pvacseq", "prepare")},
+    wrapper:
+        wf.wrapper_path("pvactools/combining")
+
+
+rule somatic_neoepitope_prediction:
+    input:
+        unpack(wf.get_input_files("pvacseq", "predict")),
+    output:
+        **wf.get_output_files("pvacseq", "predict"),
+    log:
+        **wf.get_log_file("pvacseq", "predict"),
+    threads: wf.get_resource("pvacseq", "predict", "threads")
+    resources:
+        time=wf.get_resource("pvacseq", "predict", "time"),
+        memory=wf.get_resource("pvacseq", "predict", "memory"),
+        partition=wf.get_resource("pvacseq", "predict", "partition"),
+        tmpdir=wf.get_resource("pvacseq", "predict", "tmpdir"),
+    params:
+        **{"args": wf.get_params("pvacseq", "predict")},
+    wrapper:
+        wf.wrapper_path("pvactools/pvacseq")
diff --git a/snappy_pipeline/workflows/somatic_neoepitope_prediction/__init__.py b/snappy_pipeline/workflows/somatic_neoepitope_prediction/__init__.py
new file mode 100644
index 000000000..9721a32f1
--- /dev/null
+++ b/snappy_pipeline/workflows/somatic_neoepitope_prediction/__init__.py
@@ -0,0 +1,468 @@
+# -*- coding: utf-8 -*-
+"""Implementation of the ``somatic_neoepitope_prediction`` step
+
+The somatic_neoepitope_prediction step allows for the prediction of neoepitopes from somatic
+(small) variant calling results and a transcript database such as ENSEMBL.  Further, the step
+allows for the binding prediction to a given set of HLA alleles.
+
+.. note::
+
+    Status: not implemented yet
+
+==========
+Step Input
+==========
+
+.. note:: TODO
+
+===========
+Step Output
+===========
+
+.. note:: TODO
+
+=====================
+Default Configuration
+=====================
+
+The default configuration is as follows.
+
+.. include:: DEFAULT_CONFIG_somatic_neoepitope_prediction.rst
+
+"""
+
+from collections import OrderedDict
+import sys
+
+from biomedsheets.shortcuts import CancerCaseSheet, CancerCaseSheetOptions, is_not_background
+from snakemake.io import expand
+
+from snappy_pipeline.utils import dictify, listify
+from snappy_pipeline.workflows.abstract import BaseStep, BaseStepPart, LinkOutStepPart
+from snappy_pipeline.workflows.ngs_mapping import NgsMappingWorkflow, ResourceUsage
+from snappy_pipeline.workflows.somatic_variant_annotation import SomaticVariantAnnotationWorkflow
+from snappy_pipeline.workflows.somatic_variant_calling import (
+    SOMATIC_VARIANT_CALLERS_MATCHED,
+    SomaticVariantCallingWorkflow,
+)
+from snappy_pipeline.workflows.hla_typing import HlaTypingWorkflow
+from .model import SomaticNeoepitopePrediction as SomaticNeoepitopePredictionConfigModel
+
+
+__author__ = "Pham Gia Cuong"
+__email__ = "pham.gia-cuong@bih-charite.de"
+
+#: Extensions of files to create as main payload
+PREPARE_EXT_VALUES = (".vcf.gz", ".vcf.gz.tbi", ".vcf.gz.md5", ".vcf.gz.tbi.md5")
+PREDICT_EXT_VALUES = (
+    ".all_epitopes.tsv",
+    ".filtered.tsv",
+    ".all_epitopes.tsv.md5",
+    ".filtered.tsv.md5",
+)
+#: Default configuration for the somatic_gene_fusion_calling step
+DEFAULT_CONFIG = SomaticNeoepitopePredictionConfigModel.default_config_yaml_string()
+
+
+class PvacSeqStepPart(BaseStepPart):
+    """
+    Preparation VCF file for pvactool
+    """
+
+    #: Step name
+    name = "pvacseq"
+
+    #: Actions
+    actions = ("install", "prepare", "predict")
+
+    #: Resources
+    resource_usage = {
+        "install": ResourceUsage(
+            threads=2,
+            time="03:00:00",
+            memory="6G",
+        ),
+        "prepare": ResourceUsage(
+            threads=1,
+            time="01:00:00",
+            memory="6G",
+        ),
+        "predict": ResourceUsage(
+            threads=4,
+            time="4:00:00",
+            memory="30G",
+        ),
+    }
+
+    def __init__(self, parent):
+        super().__init__(parent)
+        # Build shortcut from cancer bio sample name to matched cancer sample
+        self.tumor_ngs_library_to_sample_pair = OrderedDict()
+        for sheet in self.parent.shortcut_sheets:
+            self.tumor_ngs_library_to_sample_pair.update(
+                sheet.all_sample_pairs_by_tumor_dna_ngs_library
+            )
+        # Build mapping from donor name to donor.
+        self.donors = OrderedDict()
+        for sheet in self.parent.shortcut_sheets:
+            for donor in sheet.donors:
+                self.donors[donor.name] = donor
+
+    def get_input_files(self, action):
+        self._validate_action(action)
+        # if action == "install":
+        #     return {"container": "work/containers/out/pvactools.simg"}
+        if action == "prepare":
+            return self._get_input_files_prepare
+        if action == "predict":
+            return self._get_input_files_predict
+
+    @dictify
+    def _get_input_files_predict(self, wildcards):
+        yield "container", "work/containers/out/pvactools.simg"
+        prepare_tpl = (
+            "work/prepare/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}/out/"
+            "{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}.vcf.gz"
+        )
+        yield "combine_vcf", prepare_tpl
+        hla_typing = self.parent.sub_workflows["hla_typing"]
+        hla_tpl = "output/optitype.{ngs_library}/out/optitype.{ngs_library}.txt"
+        for sheet in filter(is_not_background, self.parent.shortcut_sheets):
+            for sample_pair in sheet.all_sample_pairs:
+                if sample_pair.tumor_sample.dna_ngs_library.name == wildcards.tumor_library:
+                    yield (
+                        "hla_normal_dna",
+                        hla_typing(
+                            (hla_tpl).format(
+                                ngs_library=sample_pair.normal_sample.dna_ngs_library.name,
+                            )
+                        ),
+                    )
+                    yield (
+                        "hla_tumor_dna",
+                        hla_typing(
+                            (hla_tpl).format(
+                                ngs_library=sample_pair.tumor_sample.dna_ngs_library.name,
+                            )
+                        ),
+                    )
+                    yield (
+                        "hla_tumor_rna",
+                        hla_typing(
+                            (hla_tpl).format(
+                                ngs_library=sample_pair.tumor_sample.rna_ngs_library.name,
+                            )
+                        ),
+                    )
+
+    @dictify
+    def _get_input_files_prepare(self, wildcards):
+        """Return path to somatic variant annotated file"""
+        # It only works with vep now.
+        # Validate action
+        # self._validate_action(action)
+        # yield "container","work/containers/out/pvactools.simg"
+        tpl = (
+            "output/{mapper}.{var_caller}.{anno_caller}.{tumor_library}/out/"
+            "{mapper}.{var_caller}.{anno_caller}.{tumor_library}"
+        )
+        if self.config["preparation"]["full_vep_annotation"]:
+            vep_key_ext = {"vcf": ".full.vcf.gz", "vcf_tbi": ".full.vcf.gz.tbi"}
+        else:
+            vep_key_ext = {"vcf": ".vcf.gz", "vcf_tbi": ".vcf.gz.tbi"}
+        variant_annotation = self.parent.sub_workflows["somatic_variant_annotation"]
+        if self.config["preparation"]["format"] == "snappy_custom":
+            for key, ext in vep_key_ext.items():
+                yield key, variant_annotation(tpl + ext)
+
+        ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
+        star_key_ext = {"expression": ".GeneCounts.tab", "bam": ".bam"}
+        for sheet in filter(is_not_background, self.parent.shortcut_sheets):
+            for sample_pair in sheet.all_sample_pairs:
+                if sample_pair.tumor_sample.dna_ngs_library.name == wildcards.tumor_library:
+                    rna_tpl = "output/{mapper}.{rna_library}/out/{mapper}.{rna_library}"
+                    for key, ext in star_key_ext.items():
+                        yield (
+                            key,
+                            ngs_mapping(
+                                (rna_tpl + ext).format(
+                                    mapper=self.w_config.step_config["ngs_mapping"].tools.rna[0],
+                                    rna_library=sample_pair.tumor_sample.rna_ngs_library.name,
+                                )
+                            ),
+                        )
+
+    @dictify
+    def get_output_files(self, action):
+        """Return output files"""
+        # Validate action
+        self._validate_action(action)
+        if action == "install":
+            yield "container", "work/containers/out/pvactools.simg"
+        if action == "prepare":
+            path_prepare = self._get_output_files_prepare()
+            yield from path_prepare.items()
+        if action == "predict":
+            path_predict = self._get_output_files_predict()
+            yield from path_predict.items()
+
+    @dictify
+    def _get_output_files_prepare(self):
+        if self.config["preparation"]["mode"] == "gene":
+            prefix = (
+                "work/prepare/{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}/out/"
+                "{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}"
+            )
+        elif self.config["preparation"]["mode"] == "transcript":
+            prefix = (
+                "work/prepare/{mapper}.{var_caller}.{anno_caller}.TX.{tumor_library}/out/"
+                "{mapper}.{var_caller}.{anno_caller}.TX.{tumor_library}"
+            )
+        key_ext = {"vcf": ".vcf.gz", "vcf_tbi": ".vcf.gz.tbi"}
+        for key, ext in key_ext.items():
+            yield key, prefix + ext
+            yield key + "_md5", prefix + ext + ".md5"
+
+    @dictify
+    def _get_output_files_predict(self):
+        key_ext = {"all_epitopes": ".all_epitopes.tsv", "filtered_epitopes": ".filtered.tsv"}
+        prefix = "work/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}.epitopes/out/MHC_Class_I/{tumor_library}"
+        for key, ext in key_ext.items():
+            yield key, prefix + ext
+            yield key + "_md5", prefix + ext + ".md5"
+
+    @dictify
+    def _get_log_file(self, action):
+        """Return mapping of log files."""
+        # Validate action
+        self._validate_action(action)
+        if action == "install":
+            key_ext = (
+                ("log", ".log"),
+                ("conda_info", ".conda_info.txt"),
+                ("conda_list", ".conda_list.txt"),
+            )
+            for key, ext in key_ext:
+                yield key, "work/containers/log/pvactool_install" + ext
+
+        if action == "prepare":
+            if self.config["preparation"]["mode"] == "gene":
+                prefix = (
+                    "work/prepare/{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}/log/"
+                    "{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}"
+                )
+            elif self.config["preparation"]["mode"] == "transcript":
+                prefix = (
+                    "work/preprare/{mapper}.{var_caller}.{anno_caller}.TX.{tumor_library}/log/"
+                    "{mapper}.{var_caller}.{anno_caller}.TX.{tumor_library}"
+                )
+            key_ext = (
+                ("log", ".log"),
+                ("conda_info", ".conda_info.txt"),
+                ("conda_list", ".conda_list.txt"),
+            )
+            for key, ext in key_ext:
+                yield key, prefix + ext
+
+        if action == "predict":
+            prefix = (
+                "work/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}.epitopes/"
+                "log/{tumor_library}"
+            )
+            key_ext = (
+                ("log", ".log"),
+                ("conda_info", ".conda_info.txt"),
+                ("conda_list", ".conda_list.txt"),
+            )
+            for key, ext in key_ext:
+                yield key, prefix + ext
+
+    def get_params(self, action):
+        self._validate_action(action)
+        if action == "prepare":
+            return getattr(self, "_get_params_run_prepare")
+        if action == "predict":
+            return getattr(self, "_get_params_run_predict")
+
+    def _get_params_run_prepare(self, wildcards):
+        d = {
+            "max_depth": self.config["preparation"]["max_depth"],
+            "format": self.config["preparation"]["format"],
+            "expression_column": self.config["preparation"]["expression_column"],
+            "id_column": self.config["preparation"]["id_column"],
+            "ignore_ensembl_id_version": self.config["preparation"]["ignore_ensembl_id_version"],
+            "mode": self.config["preparation"]["mode"],
+        }
+        return d
+
+    def _get_params_run_predict(self, wildcards):
+        d = {
+            "epitope_length": ",".join(
+                map(str, self.config["prediction"]["CLASS_I_EPITOPE_LENGTH"])
+            ),
+            "algorithms": " ".join(self.config["prediction"]["algorithms"]),
+            "BINDING_THRESHOLD": self.config["prediction"]["BINDING_THRESHOLD"],
+            "percentile_threshold": self.config["prediction"]["percentile_threshold"],
+            "allele_specific_binding_thresholds": self.config["prediction"][
+                "allele_specific_binding_thresholds"
+            ],
+            "aggregate_inclusion_binding_threshold": self.config["prediction"][
+                "aggregate_inclusion_binding_threshold"
+            ],
+            "netmhc_stab": self.config["prediction"]["netmhc_stab"],
+            "NET_CHOP_THRESHOLD": self.config["prediction"]["NET_CHOP_THRESHOLD"],
+            "PROBLEMATIC_AMINO_ACIDS": self.config["prediction"]["PROBLEMATIC_AMINO_ACIDS"],
+            # 'run_reference_proteome_similarity': self.config["prediction"]["run_reference_proteome_similarity"],
+            "FAST_SIZE": self.config["prediction"]["FAST_SIZE"],
+            "exclude_NAs": self.config["prediction"]["exclude_NAs"],
+            "NORMAL_COV": self.config["prediction"]["NORMAL_COV"],
+            "TDNA_COV": self.config["prediction"]["TDNA_COV"],
+            "TRNA_COV": self.config["prediction"]["TRNA_COV"],
+            "NORMAL_VAF": self.config["prediction"]["NORMAL_VAF"],
+            "maximum_transcript_support_level": self.config["prediction"][
+                "maximum_transcript_support_level"
+            ],
+            "pass_only": self.config["prediction"]["pass_only"],
+            "tumor_purity": self.config["prediction"]["tumor_purity"],
+        }
+        for sheet in filter(is_not_background, self.parent.shortcut_sheets):
+            for sample_pair in sheet.all_sample_pairs:
+                if sample_pair.tumor_sample.dna_ngs_library.name == wildcards.tumor_library:
+                    d["normal_sample"] = sample_pair.normal_sample.dna_ngs_library.name
+        return d
+
+
+class SomaticNeoepitopePredictionWorkflow(BaseStep):
+    """Perform neoepitope prediction workflow"""
+
+    name = "somatic_neoepitope_prediction"
+    sheet_shortcut_class = CancerCaseSheet
+    sheet_shortcut_kwargs = {
+        "options": CancerCaseSheetOptions(allow_missing_normal=True, allow_missing_tumor=True)
+    }
+
+    @classmethod
+    def default_config_yaml(cls):
+        """Return default config YAML, to be overwritten by project-specific one."""
+        return DEFAULT_CONFIG
+
+    def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir):
+        super().__init__(
+            workflow,
+            config,
+            config_lookup_paths,
+            config_paths,
+            workdir,
+            config_model_class=SomaticNeoepitopePredictionConfigModel,
+            previous_steps=(
+                SomaticVariantCallingWorkflow,
+                SomaticVariantAnnotationWorkflow,
+                NgsMappingWorkflow,
+                HlaTypingWorkflow,
+            ),
+        )
+        # Register sub step classes so the sub steps are available
+        config = self.config
+        self.register_sub_step_classes((PvacSeqStepPart, LinkOutStepPart))
+        self.register_sub_workflow(
+            "somatic_variant_annotation",
+            config["path_somatic_variant_annotation"],
+        )
+        self.register_sub_workflow(
+            "ngs_mapping",
+            config["path_rna_ngs_mapping"],
+        )
+        self.register_sub_workflow(
+            "hla_typing",
+            config["path_hla_typing"],
+        )
+        # Copy over "tools" setting from somatic_variant_calling/ngs_mapping if not set here
+        if not config.tools_ngs_mapping:
+            config.tools_ngs_mapping = self.w_config["step_config"]["ngs_mapping"].tools.dna
+        if not config.tools_somatic_variant_calling:
+            config.tools_somatic_variant_calling = self.w_config["step_config"][
+                "somatic_variant_calling"
+            ].tools
+        if not config.tools_somatic_variant_annotation:
+            config.tools_somatic_variant_annotation = ["vep"]
+        if not config.tools_rna_mapping:
+            config.tools_rna_mapping = self.w_config["step_config"]["ngs_mapping"].tools.rna
+        if not config.preparation.path_features:
+            config.preparation.path_features = self.w_config["static_data_config"].features.path
+
+    @listify
+    def get_result_files(self):
+        cfg_mode = self.config["preparation"]["mode"]
+        callers = set(self.config["tools_somatic_variant_calling"])
+        anno_callers = set(self.config["tools_somatic_variant_annotation"])
+        predict_tpl = "output/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library.name}.epitopes/"
+        yield from self._yield_result_files_matched(
+            predict_tpl + "out/MHC_Class_I/{tumor_library.name}" + "{ext}",
+            mapper=self.config["tools_ngs_mapping"],
+            var_caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
+            anno_caller=anno_callers,
+            mode="GX" if cfg_mode == "gene" else "TX",
+            ext=PREDICT_EXT_VALUES,
+        )
+
+        yield from self._yield_result_files_matched(
+            predict_tpl + "log/{tumor_library.name}" + "{ext}",
+            mode="GX" if cfg_mode == "gene" else "TX",
+            mapper=self.config["tools_ngs_mapping"],
+            var_caller=callers & set(SOMATIC_VARIANT_CALLERS_MATCHED),
+            anno_caller=anno_callers,
+            ext=(
+                ".log",
+                ".log.md5",
+                ".conda_info.txt",
+                ".conda_info.txt.md5",
+                ".conda_list.txt",
+                ".conda_list.txt.md5",
+            ),
+        )
+
+    def _yield_result_files_matched(self, tpl, **kwargs):
+        """Build output paths from path template and extension list.
+
+        This function returns the results from the matched somatic variant callers such as
+        Mutect.
+        """
+        for sheet in filter(is_not_background, self.shortcut_sheets):
+            for sample_pair in sheet.all_sample_pairs:
+                if (
+                    not sample_pair.tumor_sample.dna_ngs_library
+                    or not sample_pair.normal_sample.dna_ngs_library
+                    or not sample_pair.tumor_sample.rna_ngs_library
+                ):
+                    msg = (
+                        "INFO: sample pair for cancer bio sample {} has is missing primary"
+                        "normal or primary cancer NGS library"
+                    )
+                    print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
+                    continue
+                yield from expand(
+                    tpl,
+                    tumor_library=[sample_pair.tumor_sample.dna_ngs_library],
+                    **kwargs,
+                )
+
+    def check_config(self):
+        """Check that path_somatic_variant_annotation, preparation/mode and preparation/format are present in the configuration"""
+        self.ensure_w_config(
+            (
+                "step_config",
+                "somatic_neoepitope_prediction",
+                "path_somatic_variant_annotation",
+            ),
+            "Path to variant (directory of vcf files) not configured but required for somatic neoepitope prediction",
+        )
+
+        self.ensure_w_config(
+            ("step_config", "somatic_neoepitope_prediction", "preparation", "mode"),
+            "The mode is required for adding gene expression data to somatic variant annotated file",
+        )
+
+        self.ensure_w_config(
+            ("step_config", "somatic_neoepitope_prediction", "preparation", "format"),
+            "Format is required to determine which tool generated the TPM values. ",
+        )
diff --git a/snappy_pipeline/workflows/somatic_neoepitope_prediction/model.py b/snappy_pipeline/workflows/somatic_neoepitope_prediction/model.py
new file mode 100644
index 000000000..a17452d03
--- /dev/null
+++ b/snappy_pipeline/workflows/somatic_neoepitope_prediction/model.py
@@ -0,0 +1,122 @@
+import enum
+from typing import Annotated
+
+from pydantic import Field
+
+from snappy_pipeline.models import EnumField, SnappyModel, SnappyStepModel
+
+
+class Format(enum.StrEnum):
+    stringtie = "stringtie"
+    snappy_custom = "snappy_custom"
+    cufflinks = "cufflinks"
+    kallisto = "kallisto"
+    custom = "custom"
+
+
+class Preparation(SnappyModel):
+    format: Annotated[Format, EnumField(Format)] = Format.snappy_custom
+    "The file format of the expression file to process. (stringtie, kallisto, cufflinks, snappy_custom, custom)"
+    "Use `custom` to process file formats not explicitly supported."
+    "The `custom` option requires the use of the --id-column and --expression-column arguments."
+    path_features: str = ""
+    """Gencode file path, required for star and snappy format"""
+    mode: Annotated[str, Field(examples=["gene", "transcript"])] = "gene"
+    """Determine whether the expression file contains gene or transcript TPM values."""
+    full_vep_annotation: bool = True
+    """The somatic_variant_annotation has been done in fully annotation mode"""
+    id_column: str = ""
+    """Gene/Transcript id column name. Need when using the `custom` format."""
+    expression_column: str = ""
+    """Expression column name. Need when using the `custom` format."""
+    ignore_ensembl_id_version: bool = True
+    """Ignore the ensemble id version"""
+    max_depth: int = 4000
+    "max_depth option of bcftools mpileup"
+
+
+class Algorithms(enum.StrEnum):
+    BigMHC_EL = "BigMHC_EL"
+    BigMHC_IM = "BigMHC_IM"
+    DeepImmuno = "DeepImmuno"
+    MHCflurry = "MHCflurry"
+    MHCflurryEL = "MHCflurryEL"
+    MHCnuggetsI = "MHCnuggetsI"
+    MHCnuggetsII = "MHCnuggetsII"
+    NNalign = "NNalign"
+    NetMHC = "NetMHC"
+    NetMHCIIpan = "NetMHCIIpan"
+    NetMHCIIpanEL = "NetMHCIIpanEL"
+    NetMHCcons = "NetMHCcons"
+    NetMHCpan = "NetMHCpan"
+    NetMHCpanEL = "NetMHCpanEL"
+    PickPocket = "PickPocket"
+    SMM = "SMM"
+    SMMPMBEC = "SMMPMBEC"
+    SMMalign = "SMMalign"
+    all_class_i = "all_class_i"
+
+
+class Prediction(SnappyModel):
+    CLASS_I_EPITOPE_LENGTH: list[int] = [8, 9, 10, 11]
+    "Length of MHC Class I subpeptides (neoepitopes) to predict."
+    "Multiple epitope lengths can be specified using a comma-separated list."
+    "Typical epitope lengths vary between 8-15."
+    algorithms: Annotated[list[Algorithms], EnumField(Algorithms, [])]
+    "The epitope prediction algorithms to use."
+    "For running with all prediction assign to ['all_class_i']"
+    # Following parameter please follow the documentation of pvactools to know its function
+    BINDING_THRESHOLD: int = 500
+    percentile_threshold: float | None = Field(default=None)
+    allele_specific_binding_thresholds: bool = False
+    aggregate_inclusion_binding_threshold: int = 5000
+    netmhc_stab: bool = False
+    NET_CHOP_THRESHOLD: float = 0.5
+    PROBLEMATIC_AMINO_ACIDS: list[str] | None = Field(default=None)
+    # top-score-metric
+    # net-chop-method
+    # net-chop-threshold
+
+    # E.g amino_acid:position - G:2 would check for a Glycine at
+    # the second position of the epitope
+    # run_reference_proteome_similarity: bool = False
+    # blastp_path
+    # blastp-db
+    # peptide-fasta
+    FAST_SIZE: int = 200
+    exclude_NAs: bool = False
+    NORMAL_COV: int = 5
+    TDNA_COV: int = 10
+    TRNA_COV: int = 10
+    NORMAL_VAF: float = 0.02
+    maximum_transcript_support_level: int = Field(gt=0, lt=6, default=1)
+    # allele-specific-anchors
+    # anchor_contribution_threshold: float=0.8
+    pass_only: bool = False
+    tumor_purity: float | None = Field(default=None)
+
+
+class SomaticNeoepitopePrediction(SnappyStepModel):
+    path_somatic_variant_annotation: Annotated[
+        str, Field(examples=["../somatic_variant_annotation"])
+    ]
+    path_container: Annotated[
+        str, Field(examples=["../somatic_neoepitope_prediction/work/containers/out/pvactools.simg"])
+    ]
+    """
+    Running somatic neoepitope prediction with pvactools
+    is required,with the container
+    """
+    path_hla_typing: Annotated[str, Field(examples=["../hla_typing"])]
+    path_rna_ngs_mapping: Annotated[str, Field(examples=["../ngs_mapping"])]
+    tools_somatic_variant_annotation: list[str] = ["vep"]
+    tools_ngs_mapping: list[str] = []
+    tools_somatic_variant_calling: list[str] = []
+    tools_rna_mapping: list[str] = []
+    """Deafult to those configured for ngs_mapping"""
+    tools_ngs_mapping: list[str] = []
+    """Deafult to those configured for ngs_mapping"""
+    tools_somatic_variant_calling: list[str] = []
+    """Deafult to those configured for somatic_variant_calling"""
+    preparation: Preparation | None = None
+    prediction: Prediction | None = None
diff --git a/snappy_wrappers/wrappers/pvactools/combining/comb_rna.py b/snappy_wrappers/wrappers/pvactools/combining/comb_rna.py
new file mode 100644
index 000000000..8b3658c14
--- /dev/null
+++ b/snappy_wrappers/wrappers/pvactools/combining/comb_rna.py
@@ -0,0 +1,495 @@
+import argparse
+import logging
+import re
+import sys
+
+import pandas as pd
+import pyranges as pr
+from vcfpy import OrderedDict, Reader, Writer
+
+
+########################## Define tool parameters ##########################
+def define_parser():
+    parser = argparse.ArgumentParser(
+        "comb_rna.py",
+        description="A tool that will add the data from several expression tools' output files"
+        + "and allelic depths from mRNA sequencing data for snvs"
+        + "to the VCF INFO column. Supported tools are StringTie, Kallisto, Star, snappy_custom"
+        + "and Cufflinks. There also is a ``custom`` option to annotate with data "
+        + "from any tab-delimited file.",
+    )
+
+    parser.add_argument("--input-vcf", help="A VEP-annotated VCF file")
+    parser.add_argument("--expression-file", help="A expression file")
+    parser.add_argument("--genecode", help="A genecode file for calculate TPM from star gene count")
+    parser.add_argument(
+        "--rna-vcf",
+        help="A VCF file of RNA-seq data",
+    )
+    parser.add_argument(
+        "--format",
+        choices=["kallisto", "stringtie", "cufflinks", "star", "snappy_custom", "custom"],
+        help="The file format of the expression file to process. "
+        + "Use `custom` to process file formats not explicitly supported. "
+        + "The `custom` option requires the use of the --id-column and --expression-column arguments.",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["gene", "transcript"],
+        help="The type of expression data in the expression_file",
+    )
+    parser.add_argument(
+        "-i",
+        "--id-column",
+        help="The column header in the expression_file for the column containing gene/transcript ids. Required when using the `custom` format.",
+    )
+    parser.add_argument(
+        "-e",
+        "--expression-column",
+        help="The column header in the expression_file for the column containing expression data. Required when using the `custom` and `star` format.",
+    )
+    parser.add_argument(
+        "-s",
+        "--sample-name",
+        help="If the input_vcf contains multiple samples, the name of the sample to annotate.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-vcf",
+        help="Path to write the output VCF file. If not provided, the output VCF file will be "
+        + "written next to the input VCF file with a .tx.vcf or .gx.vcf file ending.",
+    )
+    parser.add_argument(
+        "--ignore-ensembl-id-version",
+        help='Assumes that the final period and number denotes the Ensembl ID version and ignores it (i.e. for "ENST00001234.3" - ignores the ".3").',
+        action="store_true",
+    )
+
+    return parser
+
+
+########################## Arguments check ##########################
+def args_check(args):
+    if args.format == "custom":
+        if args.id_column is None:
+            raise Exception(
+                "--id-column is not set. This is required when using the `custom` format."
+            )
+        if args.expression_column is None:
+            raise Exception(
+                "--expression-column is not set. This is required when using the `custom` format."
+            )
+    if args.format == "star":
+        if args.genecode is None:
+            raise Exception("--genecode is not set. This is required when using the `star` format")
+
+
+########################## Get expected column names ##########################
+def get_expected_column_names(format, mode, id_column, exp_column):
+    match (format, mode):
+        case ("star", _):
+            try:
+                exp = int(exp_column)
+                if 1 < exp and exp <= 4:
+                    return [0, exp - 1]
+                else:
+                    raise Exception("Expression column is invalid")
+            except ValueError:
+                raise ValueError("Expression column in star should be a number")
+        case ("snappy_custom", _):
+            return [0, 1]
+        case ("kallisto", "gene"):
+            return ["gene", "abundance"]
+        case ("kallisto", "transcript"):
+            return ["target_id", "tpm"]
+        case ("cufflinks", _):
+            return ["tracking_id", "FPKM"]
+        case ("stringtie", "gene"):
+            return ["Gene ID", "TPM"]
+        case ("stringtie", "transcript"):
+            return ["transcript_id", "TPM"]
+        case ("custom", _):
+            return [id_column, exp_column]
+        case _:
+            raise Exception("We don't support {format} with mode as {mode} yet.")
+
+
+########################## Genecode for star parser ##########################
+def count_to_TPM(genecode_path, exp_df, no_version=False):
+    gc = pr.read_gtf(genecode_path, as_df=True)
+    if no_version:
+        gc.loc[:, "gene_id"] = gc.loc[:, "gene_id"].apply(lambda x: re.sub(r"\.[0-9]+$", "", x))
+    gc = gc[gc["gene_id"].isin(exp_df.iloc[:, 0])]
+    gc = gc[(gc.Feature == "gene")]
+    exon = gc[["Chromosome", "Start", "End", "gene_id", "gene_name"]]
+    # Convert columns to proper types.
+    exon.loc[:, "Start"] = exon.loc[:, "Start"].astype(int)
+    exon.loc[:, "End"] = exon.loc[:, "End"].astype(int)
+    # Sort in place.
+    exon = exon.sort_values(by=["Chromosome", "Start", "End"])
+    # Group the rows by the Ensembl gene identifier.
+    groups = exon.groupby("gene_id")
+    lengths = groups.apply(count_bp)
+    # Create a new DataFrame with gene lengths and EnsemblID.
+    gene_id = lengths.index
+    if len(gene_id) != len(exp_df):
+        raise Exception("Duplicated gene id in expression file")
+    ldf = pd.DataFrame({"length": lengths.values, "gene_id": gene_id}).sort_values(by="gene_id")
+    # Calculate TPM
+    tpm = calculate_TPM(exp_df, ldf)
+    tpm_dict = {k: v for k, v in zip(exp_df.iloc[:, 0], tpm)}
+    return tpm_dict
+
+
+def calculate_TPM(exp_df, ldf):
+    "TPM = (reads_transcript / transcript_length_kb) / scaled_total_mapped_reads"
+    transcript_length_kb = ldf["length"] / 1000
+    RPK = exp_df.iloc[:, 1] / transcript_length_kb
+    scale_factor_permil = sum(RPK) / (10**6)
+    return RPK / scale_factor_permil
+
+
+def count_bp(df):
+    """Given a DataFrame with the exon coordinates from Gencode for a single
+    gene, return the total number of coding bases in that gene.
+    Example:
+        >>> import numpy as np
+        >>> n = 3
+        >>> r = lambda x: np.random.sample(x) * 10
+        >>> d = pd.DataFrame([np.sort([a,b]) for a,b in zip(r(n), r(n))], columns=['start','end']).astype(int)
+        >>> d
+           start  end
+        0      6    9
+        1      3    4
+        2      4    9
+        >>> count_bp(d)
+        7
+    Here is a visual representation of the 3 exons and the way they are added:
+          123456789  Length
+        0      ----       4
+        1   --            2
+        2    ------       6
+            =======       7
+    """
+    start = df.Start.min()
+    end = df.End.max()
+    bp = [False] * (end - start + 1)
+    for i in range(df.shape[0]):
+        s = df.iloc[i]["Start"] - start
+        e = df.iloc[i]["End"] - start + 1
+        bp[s:e] = [True] * (e - s)
+    return sum(bp)
+
+
+########################## Expression file parser ##########################
+def colnames_check(names, exp_names):
+    return set(names).issubset(set(exp_names))
+
+
+def expression_file_parser(
+    path, format, mode, id_column, exp_column, no_version=False, genecode=""
+):
+    col_names = get_expected_column_names(format, mode, id_column, exp_column)
+    if isinstance(col_names[0], int):
+        exp_df = pd.read_csv(path, skiprows=4, sep="\t", header=None)
+        exp_df = exp_df.iloc[:, col_names]
+        exp_df = exp_df.sort_values(by=0)
+        if no_version:
+            exp_df.iloc[:, 0] = exp_df.iloc[:, 0].apply(lambda x: re.sub(r"\.[0-9]+$", "", x))
+        tpm_dict = count_to_TPM(genecode, exp_df, no_version)
+    else:
+        if format == "stringtie" and mode == "transcript":
+            exp_df = pr.read_gtf(path, as_df=True)
+            exp_df = exp_df[exp_df["feature"] == "transcript"]
+        else:
+            exp_df = pd.read_csv(path, sep="\t", header=0)
+
+        if colnames_check(col_names, exp_df.columns):
+            exp_df = exp_df.loc[:, col_names]
+        else:
+            raise Exception(
+                "Gene id column or expression column is not presense in expression file"
+            )
+
+        if no_version:
+            exp_df.iloc[:, 0] = exp_df.iloc[:, 0].apply(lambda x: re.sub(r"\.[0-9]+$", "", x))
+        tpm_dict = {k: v for k, v in zip(exp_df.iloc[:, 0], exp_df.iloc[:, 1])}
+    return tpm_dict
+
+
+########################## Dna Vcf parser ##########################
+def dna_vcf_parser(path, sample_name):
+    vcf_reader = Reader.from_path(path)
+    is_multi_sample = len(vcf_reader.header.samples.names) > 1
+    if is_multi_sample and sample_name is None:
+        vcf_reader.close()
+        raise Exception(
+            "ERROR: VCF {} contains more than one sample. Please use the -s option to specify which sample to annotate.".format(
+                path
+            )
+        )
+    elif is_multi_sample and sample_name not in vcf_reader.header.samples.names:
+        vcf_reader.close()
+        raise Exception(
+            "ERROR: VCF {} does not contain a sample column for sample {}.".format(
+                path, sample_name
+            )
+        )
+    if "CSQ" not in vcf_reader.header.info_ids():
+        vcf_reader.close()
+        raise Exception(
+            "ERROR: VCF {} is not VEP-annotated. Please annotate the VCF with VEP before running this tool.".format(
+                path
+            )
+        )
+    return vcf_reader, is_multi_sample
+
+
+########################## Rna Vcf parser ##########################
+def rna_vcf_parser(path):
+    rna_vcf_reader = Reader.from_path(path)
+    return rna_vcf_reader
+
+
+########################## Add expression values functions ##########################
+def to_array(dictionary):
+    array = []
+    for key, value in dictionary.items():
+        array.append("{}|{}".format(key, value))
+    return sorted(array)
+
+
+def add_Expression_to_vcf(
+    entry,
+    is_multi_sample,
+    sample_name,
+    exp_dict,
+    genes,
+    tag,
+    no_version,
+    missing_expressions_count,
+    entry_count,
+):
+    expressions = {}
+    for gene in genes:
+        entry_count += 1
+        if no_version:
+            gene = re.sub(r"\.[0-9]+$", "", gene)
+        if gene in exp_dict:
+            expressions[gene] = exp_dict[gene]
+        else:
+            missing_expressions_count += 1
+    if is_multi_sample:
+        entry.FORMAT += [tag]
+        entry.call_for_sample[sample_name].data[tag] = to_array(expressions)
+    else:
+        entry.add_format(tag, to_array(expressions))
+    return (entry, missing_expressions_count, entry_count)
+
+
+def add_AD_to_vcf(entry, is_multi_sample, sample_name, rna_vcf):
+    RAD_temp = []
+    ROT = []
+    try:
+        rna_records = rna_vcf.fetch(entry.CHROM, entry.affected_start, entry.affected_end)
+        rna_record = next(rna_records)
+    except (StopIteration, ValueError):
+        # if there is no snp. RAD and RoT will be set to 0
+        if is_multi_sample:
+            entry.FORMAT += ["RAD"]
+            entry.call_for_sample[sample_name].data["RAD"] = 0
+            entry.FORMAT += ["ROT"]
+            entry.call_for_sample[sample_name].data["ROT"] = 0
+        else:
+            entry.add_format("RAD", 0)
+            entry.add_format("ROT", 0)
+    else:
+        rna_alt = [alt.value for alt in rna_record.ALT]
+        rna_AD = [c.data.get("AD") for c in rna_record.calls][0]
+        ROT_temp = sum(rna_AD) - rna_AD[0]
+        if rna_alt:
+            rna_AD = [c.data.get("AD") for c in rna_record.calls][0]
+            for dna_alt in entry.ALT:
+                if dna_alt.value in rna_alt:
+                    index = rna_record.ALT.index(dna_alt)
+                    RAD_temp.append(rna_AD[0])  # AD of REF
+                    RAD_temp.append(rna_AD[index + 1])  # AD of ALT
+                    ROT_temp = ROT_temp - rna_AD[index + 1]
+        ROT.append(ROT_temp)
+        if is_multi_sample:
+            entry.FORMAT += ["RAD"]
+            entry.call_for_sample[sample_name].data["RAD"] = RAD_temp
+            entry.FORMAT += ["ROT"]
+            entry.call_for_sample[sample_name].data["ROT"] = ROT_temp
+        else:
+            entry.add_format("RAD", RAD_temp)
+            entry.add_format("ROT", ROT_temp)
+    return entry
+
+
+########################## Add fields to header ##########################
+def create_vcf_writer(path, vcf_reader, mode, rna_vcf, output_vcf):
+    (head, _, tail) = path.rpartition(".vcf")
+    new_header = vcf_reader.header.copy()
+    if mode == "gene":
+        new_header.add_format_line(
+            OrderedDict(
+                [
+                    ("ID", "GX"),
+                    ("Number", "."),
+                    ("Type", "String"),
+                    ("Description", "Gene Expressions"),
+                ]
+            )
+        )
+        output_file = ("").join([head, ".gx.vcf", tail])
+    elif mode == "transcript":
+        new_header.add_format_line(
+            OrderedDict(
+                [
+                    ("ID", "TX"),
+                    ("Number", "."),
+                    ("Type", "String"),
+                    ("Description", "Transcript Expressions"),
+                ]
+            )
+        )
+        output_file = ("").join([head, ".tx.vcf", tail])
+    if rna_vcf:
+        new_header.add_format_line(
+            OrderedDict(
+                [
+                    ("ID", "RAD"),
+                    ("Number", "A"),
+                    ("Type", "Integer"),
+                    (
+                        "Description",
+                        "Allelic depths from mRNA sequencing data of the same sample",
+                    ),
+                ]
+            )
+        )
+        new_header.add_format_line(
+            OrderedDict(
+                [
+                    ("ID", "ROT"),
+                    ("Number", "1"),
+                    ("Type", "Integer"),
+                    (
+                        "Description",
+                        "Sum of allelic depths of other alternative allele from mRNA sequencing data of the same sample",
+                    ),
+                ]
+            )
+        )
+        (head, _, tail) = output_file.rpartition(".vcf")
+        output_file = ("").join([head, ".pileup.vcf", tail])
+    if output_vcf:
+        output_file = output_vcf
+    return Writer.from_path(output_file, new_header)
+
+
+########################## Generate new vcf file ##########################
+def adding_extra_information(args):
+    dna_vcf, is_multi_sample = dna_vcf_parser(args.input_vcf, args.sample_name)
+    if args.rna_vcf:
+        rna_vcf = rna_vcf_parser(args.rna_vcf)
+    # CONTIGS = [x.id for x in dna_vcf.header.get_lines("contig")]
+    format_pattern = re.compile("Format: (.*)")
+    csq_format = (
+        format_pattern.search(dna_vcf.header.get_info_field_info("CSQ").description)
+        .group(1)
+        .split("|")
+    )
+    if args.ignore_ensembl_id_version:
+        no_version = True
+
+    vcf_writer = create_vcf_writer(
+        args.input_vcf, dna_vcf, args.mode, args.rna_vcf, args.output_vcf
+    )
+    exp_dict = expression_file_parser(
+        args.expression_file,
+        args.format,
+        args.mode,
+        args.id_column,
+        args.expression_column,
+        no_version,
+        args.genecode,
+    )
+
+    missing_expressions_count = 0
+    entry_count = 0
+
+    for entry in dna_vcf:
+        # Add expression data
+        transcript_ids = set()
+        genes = set()
+        if "CSQ" not in entry.INFO:
+            logging.warning(
+                "Variant is missing VEP annotation. INFO column doesn't contain CSQ field for variant {}".format(
+                    entry
+                )
+            )
+            vcf_writer.write_record(entry)
+            continue
+        for transcript in entry.INFO["CSQ"]:
+            for key, value in zip(csq_format, transcript.split("|")):
+                if key == "Feature" and value != "" and not value.startswith("ENSR"):
+                    transcript_ids.add(value)
+                if key == "Gene" and value != "":
+                    genes.add(value)
+
+        if args.mode == "gene":
+            genes = list(genes)
+            if len(genes) > 0:
+                (entry, missing_expressions_count, entry_count) = add_Expression_to_vcf(
+                    entry,
+                    is_multi_sample,
+                    args.sample_name,
+                    exp_dict,
+                    genes,
+                    "GX",
+                    no_version,
+                    missing_expressions_count,
+                    entry_count,
+                )
+        elif args.mode == "transcript":
+            transcript_ids = list(transcript_ids)
+            if len(transcript_ids) > 0:
+                (entry, missing_expressions_count, entry_count) = add_Expression_to_vcf(
+                    entry,
+                    is_multi_sample,
+                    args.sample_name,
+                    exp_dict,
+                    transcript_ids,
+                    "TX",
+                    no_version,
+                    missing_expressions_count,
+                    entry_count,
+                )
+        # Add RAD and ROT
+        if rna_vcf is not None:
+            if entry.is_snv():
+                entry = add_AD_to_vcf(entry, is_multi_sample, args.sample_name, rna_vcf)
+        vcf_writer.write_record(entry)
+
+    dna_vcf.close()
+    vcf_writer.close()
+
+    if missing_expressions_count > 0:
+        logging.warning(
+            "{} of {} {}s did not have an expression entry for their {} id.".format(
+                missing_expressions_count, entry_count, args.mode, args.mode
+            )
+        )
+
+
+########################## MAIN ##########################
+def main(args_input=sys.argv[1:]):
+    parser = define_parser()
+    args = parser.parse_args(args_input)
+    adding_extra_information(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/snappy_wrappers/wrappers/pvactools/combining/environment.yaml b/snappy_wrappers/wrappers/pvactools/combining/environment.yaml
new file mode 100644
index 000000000..47a97a8e3
--- /dev/null
+++ b/snappy_wrappers/wrappers/pvactools/combining/environment.yaml
@@ -0,0 +1,10 @@
+channels:
+    - conda-forge
+    - bioconda
+dependencies:
+    - vcfpy>=0.12.3
+    - numpy>=1.25.2, <1.26
+    - pyranges>=0.0.129, <0.1
+    - pandas>=2.1.1, <2.2
+    - python>=3.10.8, <3.11
+    - bcftools>=1.17, <1.18
diff --git a/snappy_wrappers/wrappers/pvactools/combining/wrapper.py b/snappy_wrappers/wrappers/pvactools/combining/wrapper.py
new file mode 100644
index 000000000..f5a48fdea
--- /dev/null
+++ b/snappy_wrappers/wrappers/pvactools/combining/wrapper.py
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+"""
+Wrapper for preparation the vcf file for somatic neoepitope prediction
+"""
+
+import os
+
+from snakemake.shell import shell
+
+__author__ = "Pham Gia Cuong"
+__email__ = "pham.gia-cuong@bih-charite.de"
+
+step = snakemake.config["pipeline_step"]["name"]
+config = snakemake.config["step_config"][step]
+
+exp_format = config["preparation"]["format"]
+preparation_vcf = os.path.join(
+    os.path.dirname(__file__),
+    "comb_rna.py",
+)
+ensemble_id = (
+    "--ignore-ensembl-id-version" if snakemake.params.args["ignore_ensembl_id_version"] else ""
+)
+max_depth = snakemake.params.args["max_depth"]
+format = snakemake.params.args["format"]
+mode = snakemake.params.args["mode"]
+expression_column = (
+    f"--expression-file {snakemake.params.args['expression_column']}" if format == "custom" else ""
+)
+
+id_column = f"--expression-file {snakemake.params.args['id_column']}" if format == "custom" else ""
+
+bam_file = snakemake.input.bam
+gencount_file = snakemake.input.expression
+
+rna_vcf = (
+    f"--rna-vcf $TMPDIR/{snakemake.wildcards.tumor_library}.tmp.vcf.gz "
+    if format == "snappy_custom"
+    else ""
+)
+expression_file = (
+    f"--expression-file {gencount_file} "
+    if format == "snappy_custom"
+    else config["preparation"]["expression_file"]
+)
+
+shell(
+    r"""
+# -----------------------------------------------------------------------------
+# Redirect stderr to log file by default and enable printing executed commands
+# Also pipe stderr to log file
+if [[ -n "{snakemake.log.log}" ]]; then
+    if [[ "$(set +e; tty; set -e)" != "" ]]; then
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        exec 2> >(tee -a "{snakemake.log.log}" >&2)
+    else
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        echo "No tty, logging disabled" >"{snakemake.log.log}"
+    fi
+fi
+# -----------------------------------------------------------------------------
+# Create auto-cleaned temporary directory
+export TMPDIR=$(mktemp -d)
+trap "rm -rf $TMPDIR" EXIT
+
+set -x
+# -----------------------------------------------------------------------------
+
+# Write out information about conda installation
+conda list > {snakemake.log.conda_list}
+conda info > {snakemake.log.conda_info}
+
+if [[ {format}=="snappy_custom" ]]; then
+    # Getting region of SNVs only
+    bcftools filter -i 'TYPE="snp"' {snakemake.input.vcf} | bcftools query -f '%CHROM\t%POS0\t%END\n' > $TMPDIR/{snakemake.wildcards.tumor_library}.tmp.bed
+
+    # Getting snvs from RNA sequencing data
+    bcftools mpileup -Ov \
+        --annotate FORMAT/AD,FORMAT/DP \
+        -R $TMPDIR/{snakemake.wildcards.tumor_library}.tmp.bed \
+        -f {snakemake.config[static_data_config][reference][path]} \
+        --per-sample-mF \
+        --max-depth {max_depth} \
+        --redo-BAQ -Oz \
+        -o $TMPDIR/{snakemake.wildcards.tumor_library}.tmp.vcf.gz \
+        {bam_file}
+    tabix -f $TMPDIR/{snakemake.wildcards.tumor_library}.tmp.vcf.gz
+fi
+
+#Need to add option for RNA vcf as well
+python3 {preparation_vcf} \
+    --genecode {snakemake.config[static_data_config][features][path]} \
+    --input-vcf {snakemake.input.vcf} --format {format} \
+    --mode {mode} \
+    {expression_column} \
+    {id_column} \
+    -s {snakemake.wildcards.tumor_library} \
+    -o /dev/stdout \
+    {expression_file}\
+    {rna_vcf}\
+    "{ensemble_id}" \
+| bgzip -c > {snakemake.output.vcf}
+tabix -f {snakemake.output.vcf}
+
+pushd $(dirname {snakemake.output.vcf})
+md5sum $(basename {snakemake.output.vcf}) >$(basename {snakemake.output.vcf}).md5
+md5sum $(basename {snakemake.output.vcf_tbi}) >$(basename {snakemake.output.vcf_tbi}).md5
+"""
+)
diff --git a/snappy_wrappers/wrappers/pvactools/environment.yaml b/snappy_wrappers/wrappers/pvactools/environment.yaml
new file mode 100644
index 000000000..e69de29bb
diff --git a/snappy_wrappers/wrappers/pvactools/pvacseq/environment.yaml b/snappy_wrappers/wrappers/pvactools/pvacseq/environment.yaml
new file mode 100644
index 000000000..7ef7fd65f
--- /dev/null
+++ b/snappy_wrappers/wrappers/pvactools/pvacseq/environment.yaml
@@ -0,0 +1,15 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+  - default
+dependencies:
+  - python==3.6.15
+  # - pip==21.3.1
+  # - pip:
+  #     - pvactools==3.1.3
+  #     - mhcflurry==2.0.1
+  #     - mhcnuggets==2.3.3
+  #     - git+https://github.com/griffithlab/bigmhc.git#egg=bigmhc
+  #     - git+https://github.com/griffithlab/deepimmuno.git#egg=deepimmuno
+  #     - tensorflow==1.5.0
\ No newline at end of file
diff --git a/snappy_wrappers/wrappers/pvactools/pvacseq/wrapper.py b/snappy_wrappers/wrappers/pvactools/pvacseq/wrapper.py
new file mode 100644
index 000000000..ec4245fd1
--- /dev/null
+++ b/snappy_wrappers/wrappers/pvactools/pvacseq/wrapper.py
@@ -0,0 +1,159 @@
+import os
+
+from snakemake import shell
+
+__author__ = "Pham Gia Cuong"
+__email__ = "pham.gia-cuong@bih-charite.de"
+
+step = snakemake.config["pipeline_step"]["name"]
+config = snakemake.config["step_config"][step]
+
+algorithms = snakemake.params.args["algorithms"]
+epitope_length = snakemake.params.args["epitope_length"]
+normal_smaple = snakemake.params.args["normal_sample"]
+BINDING_THRESHOLD = f"-b {snakemake.params.args["BINDING_THRESHOLD"]} "
+percentile_threshold = (
+    f"--percentile-threshold {snakemake.params.args["percentile_threshold"]} "
+    if snakemake.params.args["percentile_threshold"] is not None
+    else ""
+)
+
+
+allele_specific_binding_thresholds = (
+    "--allele-specific-binding-thresholds "
+    if snakemake.params.args["allele_specific_binding_thresholds"]
+    else ""
+)
+
+aggregate_inclusion_binding_threshold = (
+    f"--aggregate-inclusion-binding-threshold {snakemake.params.args["aggregate_inclusion_binding_threshold"]} ",
+)
+
+netmhc_stab = "--netmhc-stab " if snakemake.params.args["netmhc_stab"] else ""
+
+NET_CHOP_THRESHOLD = (f"--net-chop-threshold {snakemake.params.args["NET_CHOP_THRESHOLD"]} ",)
+
+PROBLEMATIC_AMINO_ACIDS = (
+    f"--problematic-amino-acids {snakemake.params.args["PROBLEMATIC_AMINO_ACIDS"]} ",
+)
+
+# if {snakemake.params.args["run_reference_proteome_similarity"]}:
+#     run_reference_proteome_similarity= f"--run-reference-proteome-similarity",
+
+FAST_SIZE = (f"-s {snakemake.params.args["FAST_SIZE"]} ",)
+
+exclude_NAs = "--exclude-NAs " if snakemake.params.args["exclude_NAs"] else ""
+
+NORMAL_COV = (f"--normal-cov {snakemake.params.args["NORMAL_COV"]} ",)
+TDNA_COV = (f"--tdna-cov {snakemake.params.args["TDNA_COV"] }",)
+TRNA_COV = (f"--trna-cov {snakemake.params.args["TRNA_COV"]} ",)
+NORMAL_VAF = (f"--normal-vaf {snakemake.params.args["NORMAL_VAF"]} ",)
+
+maximum_transcript_support_level = (
+    f"--maximum-transcript-support-level {snakemake.params.args["maximum_transcript_support_level"]} ",
+)
+
+pass_only = "--pass-only " if snakemake.params.args["pass_only"] else ""
+
+tumor_purity = (
+    f"--tumor-purity {snakemake.params.args["tumor_purity"]} "
+    if snakemake.params.args["tumor_purity"] is not None
+    else ""
+)
+
+op_dir = "/".join(snakemake.output.all_epitopes.split("/")[:-2])
+
+files_to_bind = {
+    "combine_vcf": snakemake.input.combine_vcf,
+    "op_dir": op_dir,
+}
+
+# TODO: Put the following in a function (decide where...)
+# Replace with full absolute paths
+files_to_bind = {k: os.path.realpath(v) for k, v in files_to_bind.items()}
+# Directories that mut be bound
+dirs_to_bind = {k: os.path.dirname(v) for k, v in files_to_bind.items()}
+# List of unique directories to bind: on cluster: <directory> -> from container: /bindings/d<i>)
+bound_dirs = {e[1]: e[0] for e in enumerate(list(set(dirs_to_bind.values())))}
+# Binding command
+bindings = " ".join(["-B {}:/bindings/d{}".format(k, v) for k, v in bound_dirs.items()])
+bound_files = {
+    k: "/bindings/d{}/{}".format(bound_dirs[dirs_to_bind[k]], os.path.basename(v))
+    for k, v in files_to_bind.items()
+}
+
+shell.executable("/bin/bash")
+
+shell(
+    r"""
+# -----------------------------------------------------------------------------
+# Redirect stderr to log file by default and enable printing executed commands
+# Also pipe stderr to log file
+if [[ -n "{snakemake.log.log}" ]]; then
+    if [[ "$(set +e; tty; set -e)" != "" ]]; then
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        exec 2> >(tee -a "{snakemake.log.log}" >&2)
+    else
+        rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log})
+        echo "No tty, logging disabled" >"{snakemake.log.log}"
+    fi
+fi
+# Write out information about conda installation.
+conda list >{snakemake.log.conda_list}
+conda info >{snakemake.log.conda_info}
+md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5}
+md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5}
+
+# Setup auto-cleaned tmpdir
+export TMPDIR=$(mktemp -d)
+trap "rm -rf $TMPDIR" EXIT
+
+# Compute md5 checksum
+md5() {{
+    fn=$1
+    d=$(dirname $fn)
+    f=$(basename $fn)
+    pushd $d 1> /dev/null 2>&1
+    checksum=$(md5sum $f)
+    popd 1> /dev/null 2>&1
+    echo "$checksum"
+}}
+
+set -x
+# -----------------------------------------------------------------------------
+
+# Write out information about conda installation
+conda list > {snakemake.log.conda_list}
+conda info > {snakemake.log.conda_info}
+
+hla_types=$(cat {snakemake.input.hla_normal_dna} {snakemake.input.hla_tumor_dna} {snakemake.input.hla_tumor_rna} | sed 's/^/HLA-/' | sort | uniq | tr '\n' ',' | sed 's/,$//')
+cmd="pvacseq run --normal-sample-name {normal_smaple} \
+    -e1 {epitope_length} \
+    --iedb-install-directory /opt/iedb \
+    -t {snakemake.threads} \
+    {bound_files[combine_vcf]} \
+    {snakemake.wildcards.tumor_library} \
+    $hla_types \
+    {algorithms} \
+    {bound_files[op_dir]} \
+    {BINDING_THRESHOLD}\
+    {percentile_threshold}\
+    {allele_specific_binding_thresholds}\
+    {aggregate_inclusion_binding_threshold}\
+    {NET_CHOP_THRESHOLD}\
+    {PROBLEMATIC_AMINO_ACIDS}\
+    {FAST_SIZE}\
+    {NORMAL_COV}\
+    {TDNA_COV}\
+    {TRNA_COV}\
+    {NORMAL_VAF}\
+    {exclude_NAs}\
+    {maximum_transcript_support_level}"
+echo 'TMPDIR=/bindings/d2' > $TMPDIR/{snakemake.wildcards.tumor_library}.sh
+echo $cmd >> $TMPDIR/{snakemake.wildcards.tumor_library}.sh
+apptainer exec --home $PWD -B $TMPDIR:/bindings/d2 {bindings} {config[path_container]} bash /bindings/d2/{snakemake.wildcards.tumor_library}.sh
+
+md5 {snakemake.output.all_epitopes} > {snakemake.output.all_epitopes_md5}
+md5 {snakemake.output.filtered_epitopes} > {snakemake.output.filtered_epitopes_md5}
+"""
+)
diff --git a/snappy_wrappers/wrappers/stringtie/environment.yaml b/snappy_wrappers/wrappers/stringtie/environment.yaml
new file mode 100644
index 000000000..b79f83c81
--- /dev/null
+++ b/snappy_wrappers/wrappers/stringtie/environment.yaml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - stringtie==2.2.1
\ No newline at end of file
diff --git a/snappy_wrappers/wrappers/stringtie/wrapper.py b/snappy_wrappers/wrappers/stringtie/wrapper.py
new file mode 100644
index 000000000..0ded26e3d
--- /dev/null
+++ b/snappy_wrappers/wrappers/stringtie/wrapper.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+"""
+Wrapper for quantify gene and transcript expression
+"""
+
+from snakemake.shell import shell
+
+__author__ = "Pham Gia Cuong"
+__email__ = "pham.gia-cuong@bih-charite.de"
+
+step = snakemake.config["pipeline_step"]["name"]
+config = snakemake.config["step_config"][step]
+
+shell(
+    r"""
+# -----------------------------------------------------------------------------
+# Redirect stderr to log file by default and enable printing executed commands
+exec 2> >(tee -a "{snakemake.log.log}")
+set -x
+# -----------------------------------------------------------------------------
+
+# Write out information about conda installation
+conda list > {snakemake.log.conda_list}
+conda info > {snakemake.log.conda_info}
+
+
+stringtie \
+    -G {snakemake.config[static_data_config][features][path]} \
+    -p 4 \
+    -v \
+    -o {snakemake.output.expression} \
+    {snakemake.input.rna_bam}
+
+pushd $(dirname {snakemake.output.expression})
+md5sum $(basename {snakemake.output.expression}) > $(basename {snakemake.output.expression_md5})
+"""
+)
+
+
+# Compute MD5 sums of logs
+shell(
+    r"""
+md5sum {snakemake.log.log} > {snakemake.log.log_md5}
+md5sum {snakemake.log.conda_list} > {snakemake.log.conda_list_md5}
+md5sum {snakemake.log.conda_info} > {snakemake.log.conda_info_md5}
+"""
+)
diff --git a/snappy_wrappers/wrappers/vep/run/wrapper.py b/snappy_wrappers/wrappers/vep/run/wrapper.py
index 84da509d6..46c4b96a6 100644
--- a/snappy_wrappers/wrappers/vep/run/wrapper.py
+++ b/snappy_wrappers/wrappers/vep/run/wrapper.py
@@ -11,6 +11,15 @@
 vep_config = snakemake.config["step_config"][current_step]["vep"]
 pick_order = ",".join(vep_config["pick_order"])
 script_output_options = " ".join(["--" + x for x in vep_config["output_options"]])
+if vep_config["plugins"]:
+    plugins = " ".join(["--plugin " + x for x in vep_config["plugins"]])
+    if not vep_config["plugins_dir"]:
+        raise Exception("Please provide plugins directory if you want to use plugins")
+    else:
+        plugins_dir = "--dir_plugins " + vep_config["plugins_dir"]
+else:
+    plugins = ""
+    plugins_dir = ""
 
 full = snakemake.output.full if "full" in snakemake.output.keys() else ""
 
@@ -43,6 +52,8 @@
             echo --dir_cache {vep_config[cache_dir]}
         fi) \
         {script_output_options} \
+        {plugins} \
+        {plugins_dir} \
         --{vep_config[tx_flag]} \
         --fasta {snakemake.config[static_data_config][reference][path]} \
         --input_file {snakemake.input.vcf} --format vcf \
@@ -63,6 +74,8 @@
         echo --dir_cache {vep_config[cache_dir]}
     fi) \
     {script_output_options} \
+    {plugins} \
+    {plugins_dir} \
     --pick --pick_order {pick_order} \
     --{vep_config[tx_flag]} \
     --fasta {snakemake.config[static_data_config][reference][path]} \
diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_neoepitope_prediction.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_neoepitope_prediction.py
new file mode 100644
index 000000000..9eb3cc435
--- /dev/null
+++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_neoepitope_prediction.py
@@ -0,0 +1,322 @@
+# -*- coding: utf-8 -*-
+"""Tests for the somatic_variant_calling workflow module code"""
+
+import textwrap
+
+import pytest
+import ruamel.yaml as ruamel_yaml
+from snakemake.io import Wildcards
+
+from snappy_pipeline.workflows.somatic_neoepitope_prediction import (
+    SomaticNeoepitopePredictionWorkflow,
+)
+
+from .common import get_expected_log_files_dict, get_expected_output_vcf_files_dict
+from .conftest import patch_module_fs
+
+
+# Test tumor mutational burden calculation with vcf file from somatic variant calling step
+@pytest.fixture(scope="module")  # otherwise: performance issues
+def minimal_config():
+    """Return YAML parsing result for configuration"""
+    yaml = ruamel_yaml.YAML()
+    return yaml.load(
+        textwrap.dedent(
+            r"""
+        static_data_config:
+          reference:
+            path: /path/to/ref.fa
+          cosmic:
+            path: /path/to/cosmic.vcf.gz
+          dbsnp:
+            path: /path/to/dbsnp.vcf.gz
+          features:
+            path: /path/to/genecode.gtf
+
+        step_config:
+          ngs_mapping:
+            tools:
+              dna: ['bwa']
+              rna: ['star']
+            star:
+              path_index: /path/to/star/index
+            bwa:
+              path_index: /path/to/bwa/index.fa
+
+          somatic_variant_calling:
+            tools:
+            - mutect2
+            - scalpel
+            mutect2: {}
+            scalpel:
+              path_target_regions: /path/to/target/regions.bed
+
+          hla_typing:
+            path_link_in: ''  # OPTIONAL Override data set configuration search paths for FASTQ files
+            tools: [optitype]   # REQUIRED - available: 'optitype' and 'arcashla'
+            optitype:
+                max_reads: 5000
+                num_mapping_threads: 4
+
+          somatic_variant_annotation:
+            path_somatic_variant_calling: ../somatic_variant_calling
+            tools: ["vep"]
+            vep:
+                cache_dir: /path/to/dir/cache
+                species: homo_sapiens_merged
+                assembly: GRCh38
+                cache_version: '102'
+                tx_flag: gencode_basic  
+                num_threads: 8
+                buffer_size: 500
+                output_options:
+                - everything
+          somatic_neoepitope_prediction:
+            path_somatic_variant_annotation: ../somatic_variant_annotation  # REQUIRED
+            path_container: ../somatic_neoepitope_prediction/work/containers/out/pvactools.simg
+            path_rna_ngs_mapping: ../ngs_mapping
+            path_hla_typing: ../hla_typing
+            tools_somatic_variant_annotation: [vep]
+            preparation:
+                format: snappy_custom    # REQUIRED - The file format of the expression file to process. (stringtie, kallisto, cufflinks, snappy_custom, custom)
+                mode: gene   # REQUIRED - Determine whether the expression file contains gene or transcript TPM values.
+            prediction:
+                algorithms: ['MHCflurry','MHCnuggetsI']
+                
+        data_sets:
+          first_batch:
+            file: sheet.tsv
+            search_patterns:
+            - {'left': '*/*/*_R1.fastq.gz', 'right': '*/*/*_R2.fastq.gz'}
+            search_paths: ['/path']
+            type: matched_cancer
+            naming_scheme: only_secondary_id
+        """
+        ).lstrip()
+    )
+
+
+@pytest.fixture
+def somatic_neoepitope_prediction_workflow(
+    dummy_workflow,
+    minimal_config,
+    config_lookup_paths,
+    work_dir,
+    config_paths,
+    cancer_sheet_fake_fs,
+    aligner_indices_fake_fs,
+    mocker,
+):
+    # Patch out file-system related things in abstract (the crawling link in step is defined there)
+    patch_module_fs("snappy_pipeline.workflows.abstract", cancer_sheet_fake_fs, mocker)
+    patch_module_fs("snappy_pipeline.workflows.ngs_mapping.model", aligner_indices_fake_fs, mocker)
+    # Update the "globals" attribute of the mock workflow (snakemake.workflow.Workflow) so we
+    # can obtain paths from the function as if we really had a NGSMappingPipelineStep there
+
+    dummy_workflow.globals = {
+        "ngs_mapping": lambda x: "NGS_MAPPING/" + x,
+        "somatic_variant_annotation": lambda x: "SOMATIC_VARIANT_ANNOTATION/" + x,
+        "hla_typing": lambda x: "HLA_TYPING/" + x,
+    }
+    # Construct the workflow object
+    return SomaticNeoepitopePredictionWorkflow(
+        dummy_workflow,
+        minimal_config,
+        config_lookup_paths,
+        config_paths,
+        work_dir,
+    )
+
+
+def test_somatic_neoepitope_preparation_step_part_get_input_files(
+    somatic_neoepitope_prediction_workflow,
+):
+    wildcards = Wildcards(
+        fromdict={
+            "mapper": "bwa",
+            "var_caller": "mutect2",
+            "anno_caller": "vep",
+            "tumor_library": "P001-T1-DNA1-WGS1",
+        }
+    )
+    # Define expected
+    vcf_base_out = (
+        "SOMATIC_VARIANT_ANNOTATION/output/{mapper}.{var_caller}.{anno_caller}.{tumor_library}/out/"
+        "{mapper}.{var_caller}.{anno_caller}.{tumor_library}"
+    )
+    # vcf_base_out = (
+    #     "SOMATIC_VARIANT_ANNOTATION/output/bwa.mutect2.vep.P001-T1-DNA1-WES1/out/"
+    #     "{mapper}.{var_caller}.{anno_caller}.{tumor_library}"
+    # )
+    ngs_mapping_base_out = "NGS_MAPPING/output/star.P001-T1-RNA1-mRNA_seq1/out/"
+    expected = {
+        "vcf": vcf_base_out + ".full.vcf.gz",
+        "vcf_tbi": vcf_base_out + ".full.vcf.gz.tbi",
+        "expression": ngs_mapping_base_out + "star.P001-T1-RNA1-mRNA_seq1.GeneCounts.tab",
+        "bam": ngs_mapping_base_out + "star.P001-T1-RNA1-mRNA_seq1.bam",
+    }
+
+    # Get actual
+    actual = somatic_neoepitope_prediction_workflow.get_input_files("pvacseq", "prepare")(wildcards)
+    assert actual == expected
+
+
+def test_somatic_neoepitope_prediction_step_part_get_input_files(
+    somatic_neoepitope_prediction_workflow,
+):
+    # Define expected
+    wildcards = Wildcards(
+        fromdict={
+            "mapper": "bwa",
+            "var_caller": "mutect2",
+            "anno_caller": "vep",
+            "tumor_library": "P001-T1-DNA1-WGS1",
+        }
+    )
+    vcf_base_out = (
+        "work/prepare/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}/out/"
+        "{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}"
+    )
+    hla_typing_base_out = "HLA_TYPING/output/"
+    expected = {
+        "container": "work/containers/out/pvactools.simg",
+        "combine_vcf": vcf_base_out + ".vcf.gz",
+        "hla_normal_dna": hla_typing_base_out
+        + "optitype.P001-N1-DNA1-WGS1/out/optitype.P001-N1-DNA1-WGS1.txt",
+        "hla_tumor_dna": hla_typing_base_out
+        + "optitype.P001-T1-DNA1-WGS1/out/optitype.P001-T1-DNA1-WGS1.txt",
+        "hla_tumor_rna": hla_typing_base_out
+        + "optitype.P001-T1-RNA1-mRNA_seq1/out/optitype.P001-T1-RNA1-mRNA_seq1.txt",
+    }
+
+    # Get actual
+    actual = somatic_neoepitope_prediction_workflow.get_input_files("pvacseq", "predict")(wildcards)
+    assert actual == expected
+
+
+def test_somatic_neoepitope_preparation_step_part_get_output_files(
+    somatic_neoepitope_prediction_workflow,
+):
+    base_out = (
+        "work/prepare/{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}/out/"
+        "{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}"
+    )
+    expected = get_expected_output_vcf_files_dict(base_out)
+    actual = somatic_neoepitope_prediction_workflow.get_output_files("pvacseq", "prepare")
+    assert actual == expected
+
+
+def test_somatic_neoepitope_preparation_step_part_get_log_files(
+    somatic_neoepitope_prediction_workflow,
+):
+    base_out = (
+        "work/prepare/{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}/log/"
+        "{mapper}.{var_caller}.{anno_caller}.GX.{tumor_library}"
+    )
+    expected = get_expected_log_files_dict(base_out=base_out)
+    actual = somatic_neoepitope_prediction_workflow.get_log_file("pvacseq", "prepare")
+    assert actual == expected
+
+
+def test_somatic_neoepitope_preparation_step_part_get_resource(
+    somatic_neoepitope_prediction_workflow,
+):
+    # Define expected
+    expected_dict = {"threads": 1, "time": "01:00:00", "memory": "6G"}
+    # Evaluate
+    for resource, expected in expected_dict.items():
+        msg_error = f"Assertion error for resource '{resource}'."
+        actual = somatic_neoepitope_prediction_workflow.get_resource(
+            "pvacseq", "prepare", resource
+        )()
+        assert actual == expected, msg_error
+
+
+def test_somatic_neoepitope_prediction_step_part_get_output_files(
+    somatic_neoepitope_prediction_workflow,
+):
+    base_out = "work/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}.epitopes/out/MHC_Class_I/{tumor_library}"
+    expected = {
+        "all_epitopes": base_out + ".all_epitopes.tsv",
+        "all_epitopes_md5": base_out + ".all_epitopes.tsv.md5",
+        "filtered_epitopes": base_out + ".filtered.tsv",
+        "filtered_epitopes_md5": base_out + ".filtered.tsv.md5",
+    }
+    actual = somatic_neoepitope_prediction_workflow.get_output_files("pvacseq", "predict")
+    assert actual == expected
+
+
+def test_somatic_neoepitope_prediction_step_part_get_log_files(
+    somatic_neoepitope_prediction_workflow,
+):
+    base_out = (
+        "work/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.{tumor_library}.epitopes/"
+        "log/{tumor_library}"
+    )
+    expected = get_expected_log_files_dict(base_out=base_out)
+    actual = somatic_neoepitope_prediction_workflow.get_log_file("pvacseq", "predict")
+    assert actual == expected
+
+
+def test_somatic_neoepitope_prediction_step_part_get_resource(
+    somatic_neoepitope_prediction_workflow,
+):
+    # Define expected
+    expected_dict = {"threads": 4, "time": "4:00:00", "memory": "30G"}
+    # Evaluate
+    for resource, expected in expected_dict.items():
+        msg_error = f"Assertion error for resource '{resource}'."
+        actual = somatic_neoepitope_prediction_workflow.get_resource(
+            "pvacseq", "predict", resource
+        )()
+        assert actual == expected, msg_error
+
+
+def test_somatic_neoepitope_prediction_workflow(somatic_neoepitope_prediction_workflow):
+    """Test simple functionality of the workflow"""
+    # Check created sub steps
+    expected = ["link_out", "pvacseq"]
+    actual = list(sorted(somatic_neoepitope_prediction_workflow.sub_steps.keys()))
+    assert actual == expected
+
+    # Check result file construction
+    tpl = (
+        "output/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.P00{i}-T{t}-DNA1-WGS1.epitopes/"
+        "out/MHC_Class_I/P00{i}-T{t}-DNA1-WGS1{ext}"
+    )
+    log_tpl = (
+        "output/predict/{mapper}.{var_caller}.{anno_caller}.{mode}.P00{i}-T{t}-DNA1-WGS1.epitopes/"
+        "log/P00{i}-T{t}-DNA1-WGS1{ext}"
+    )
+    expected = [
+        log_tpl.format(
+            mapper="bwa", var_caller=var_caller, anno_caller="vep", i=i, t=t, ext=ext, mode="GX"
+        )
+        for (i, t) in ((1, 1), (2, 2))
+        for var_caller in ("mutect2", "scalpel")
+        for ext in (
+            ".log",
+            ".log.md5",
+            ".conda_info.txt",
+            ".conda_info.txt.md5",
+            ".conda_list.txt",
+            ".conda_list.txt.md5",
+        )
+    ]
+
+    expected += [
+        tpl.format(
+            mapper="bwa", var_caller=var_caller, anno_caller="vep", i=i, t=t, ext=ext, mode="GX"
+        )
+        for (i, t) in ((1, 1), (2, 2))
+        for var_caller in ("mutect2", "scalpel")
+        for ext in (
+            ".all_epitopes.tsv",
+            ".filtered.tsv",
+            ".all_epitopes.tsv.md5",
+            ".filtered.tsv.md5",
+        )
+    ]
+    expected = list(sorted(expected))
+    actual = list(sorted(somatic_neoepitope_prediction_workflow.get_result_files()))
+    assert expected == actual