diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 503d4a5e..364e02a4 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -8,33 +8,42 @@ on:
 
 
 jobs:
+  Formatting:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Formatting
+        uses: github/super-linter@v4
+        env:
+          VALIDATE_ALL_CODEBASE: false
+          DEFAULT_BRANCH: master
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VALIDATE_SNAKEMAKE_SNAKEFMT: true
+
+
   Linting:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
     - name: Lint workflow
-      uses: snakemake/snakemake-github-action@v1.17.0
+      uses: snakemake/snakemake-github-action@v1.22.0
       with:
         directory: .
         snakefile: workflow/Snakefile
         args: "--lint"
-        stagein: |
-          export TMPDIR=/tmp
 
 
   Testing:
     runs-on: ubuntu-latest
-    needs: Linting
+    needs:
+      - Linting
+      - Formatting
     steps:
-    - uses: actions/checkout@v1
-    - name: Checkout submodules
-      uses: textbook/git-checkout-submodule-action@2.0.0
+    - uses: actions/checkout@v2
 
     - name: Test workflow (local FASTQs)
-      uses: snakemake/snakemake-github-action@v1.17.0
+      uses: snakemake/snakemake-github-action@v1.22.0
       with:
         directory: .test
         snakefile: workflow/Snakefile
         args: "--use-conda --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba"
-        stagein: |
-          export TMPDIR=/tmp
diff --git a/.gitignore b/.gitignore
index ee6ed52e..02a9591f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,23 +1,21 @@
 *
-!scripts
-!scripts/*
-!scripts/common
-!scripts/common/*
-scripts/.snakemake*
-!Snakefile
-!config.yaml
-!samples.tsv
-!resources
-!resources/*
-!envs
-!envs/*
-!environment.yaml
+!.github/workflows/*
+!.test
+!.test/*
+!.test/config/*
+!.test/data/*
+!config
+!config/*
+!config/HLA_Data/*
+!workflow
+!workflow/*
+!workflow/report/*
+!workflow/envs/*
+!workflow/schemas/*
+!workflow/scripts/*
+!workflow/rules/*
+!workflow/rules/annotation/*
+!workflow/resources/*
+!.gitignore
 !LICENSE
 !README.md
-!rules
-!rules/*
-!.gitignore
-!.editorconfig
-!.gitattributes
-!.test
-!.test/data
diff --git a/.test/config/config.yaml b/.test/config/config.yaml
index d10fc559..7eb854e1 100644
--- a/.test/config/config.yaml
+++ b/.test/config/config.yaml
@@ -1,84 +1,10 @@
 samples: "config/samples.tsv"
 units: "config/units.tsv"
 
-# boolean if read trimming should be skipped
-trimming:
-  activate: false
 
-remove_duplicates:
+neoantigen_prediction:
   activate: true
 
-calling:
-  freebayes:
-    activate: false
-  # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling
-  scenario: config/scenario.yaml
-  filter:
-    # Filter candidate variants (this filter helps to keep the number of evaluated candidates small).
-    # It should ideally generate a superset of all other filters defined below.
-    # Annotation of candidate variants tries to be as fast as possible, only using VEP
-    # default parameters.
-    candidates: ""
-    # Add any number of named filters here. They will be applied independenty,
-    # and can be referred in FDR control below to generate calls for different events.
-    # In particular, you can also filter by ID or dbsnp annotations here.
-    # See http://snpeff.sourceforge.net/SnpSift.html#filter
-    filtername: "ANN['IMPACT'] != 'MODIFIER'"
-  fdr-control:
-    threshold: 0.05
-    events: 
-      complete:
-        varlociraptor: 
-          - "somatic"
-          - "germline"
-      somatic:
-        varlociraptor:
-          - "somatic"
-      germline:
-        varlociraptor:
-          - "germline"
-
-fusion:
-  arriba:
-    activate: false
-    blacklist:
-     "arriba_blacklist"
-    params:
-      "-T -P"
-
-tmb:
-  activate: false
-  coding_genome_size: 3e7
-  # Name of the tumor sample in the scenario.yaml.
-  tumor_sample: tumor
-  somatic_events:
-    - somatic
-
-
-epitope_prediction:
-  activate: false
-
-
-affinity:
-  netMHCpan:
-    activate: false
-    params: "-BA -l 9 -s -xls"
-    location: "../netMHCpan-4.0"
-  netMHCIIpan:
-    activate: false
-    params: "-length 15 -s -xls"
-    location: "../netMHCIIpan-4.0"
-
-
-HLAtyping:
-  # activate to use razers3 to pre-filter reads before using optitype
-  optitype_prefiltering:
-    activate: false
-  optitype_data: "config/HLA_Data/hla_reference_dna.fasta"
-  # activate to predict MHC-I and MHC-II alleles with HLA-LA
-  HLA_LA:
-    activate: false
-
 
 ref:
   # Number of chromosomes to consider for calling.
@@ -87,49 +13,42 @@ ref:
   # Ensembl species name
   species: homo_sapiens
   # Ensembl release
-  release: 100
+  release: 108
   # Genome build
   build: GRCh38
 
 
-annotations:
-  vep:
-    params: "--everything"
-    plugins:
-      # Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
-      # Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
-      - LoFtool
-
 params:
-  cutadapt: ""
-  bwa:
-    "-M"
-  picard:
-    MarkDuplicates:
-      "VALIDATION_STRINGENCY=LENIENT"
-  gatk:
-    BaseRecalibrator: "--tmp-dir tmp"
-    applyBQSR: ""
-  strelka:
-    config:
-      "--exome"
-    run:
-      "--mode local"
-  razers3:
-    "-i 95 -m 1 -dr 0"
-  optitype:
-    ""
   microphaser:
-    window_len:
-        33
-    peptide_len:
-      netMHCpan:
-          9
-      netMHCIIpan:
-          15
-  kallisto:
-    "-b 100"
-  star: >-
-    --outSAMmapqUnique 60 --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip
-    --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0
-    --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3
+    events:
+      tumor: "tumor_only"
+      normal: "normal_only"
+  net_mhc_pan:
+    activate: true
+    peptide_len: 9
+    extra: ""
+    # Please download netMHCpan manually from:
+    # https://services.healthtech.dtu.dk/service.php?NetMHCpan-4.1
+    # To make the `netMHCpan` script work, you need to fix its first line in
+    # in addition to the other edits described for a complete install. To use
+    # the conda-provided tcsh installation, it needs to read (without quotes):
+    # "#!/usr/bin/env tcsh"
+    location: "../netMHCpan-4.1"
+  net_mhc_two_pan:
+    activate: false
+    peptide_len: 15
+    extra: ""
+    # Please download netMHCIIpan manually from:
+    # https://services.healthtech.dtu.dk/service.php?NetMHCIIpan-4.1
+    # To make the `netMHCIIpan` script work, you need to fix its first line in
+    # in addition to the other edits described for a complete install. To use
+    # the conda-provided tcsh installation, it needs to read (without quotes):
+    # "#!/usr/bin/env tcsh"
+    location: "../netMHCIIpan-4.1"
+  neo_fox:
+    activate: false
+    # This should be at least as long as the desired net_mhc_two_pan peptide length
+    peptide_len: 15
+    # update the version number to get a newer release of the reference set of HLA Alleles
+    hla_alleles: "https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/allelelist/Allelelist.3480.txt"
+    extra: ""
diff --git a/.test/config/groups.tsv b/.test/config/groups.tsv
new file mode 100644
index 00000000..c0a77bbd
--- /dev/null
+++ b/.test/config/groups.tsv
@@ -0,0 +1,2 @@
+group	tumorType
+A	LUSC
diff --git a/.test/config/samples.tsv b/.test/config/samples.tsv
index 044002d7..2aca82dc 100644
--- a/.test/config/samples.tsv
+++ b/.test/config/samples.tsv
@@ -1,3 +1,3 @@
-sample	type	matched_normal	purity	platform
-A_normal	normal			ILLUMINA
-A_tumor	tumor	A_normal	1	ILLUMINA
+sample_name	group	alias	purity	platform
+A_normal	A	normal	1	ILLUMINA
+A_tumor	A	tumor	.99	ILLUMINA
diff --git a/.test/config/units.tsv b/.test/config/units.tsv
index cdb8af8e..4984edd8 100644
--- a/.test/config/units.tsv
+++ b/.test/config/units.tsv
@@ -1,3 +1,3 @@
-sample	sequencing_type	unit	fq1	fq2	sra	adapters
+sample_name	sequencing_type	unit_name	fq1	fq2	sra	adapters
 A_normal	DNA	lane1	data/reads/A_normal.1.fastq.gz	data/reads/A_normal.2.fastq.gz		
 A_tumor	DNA	lane1	data/reads/A_tumor.1.fastq.gz	data/reads/A_tumor.2.fastq.gz		
diff --git a/config/config.yaml b/config/config.yaml
index f1e6818a..e00ae4d9 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,73 +1,11 @@
 samples: "config/samples.tsv"
 units: "config/units.tsv"
+groups: "config/groups.tsv"
 
-# boolean if read trimming should be skipped
-trimming:
-  activate: false
 
-remove_duplicates:
+neoantigen_prediction:
   activate: true
 
-calling:
-  freebayes:
-    activate: false
-  # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling
-  scenario: config/scenario.yaml
-  fdr-control:
-    threshold: 0.05
-    events: 
-      complete:
-        varlociraptor: 
-          - "somatic"
-          - "germline"
-      somatic:
-        varlociraptor:
-          - "somatic"
-      germline:
-        varlociraptor:
-          - "germline"
-
-fusion:
-  arriba:
-    activate: false
-    blacklist:
-     "arriba_blacklist"
-    params:
-      "-T -P"
-
-tmb:
-  activate: false
-  coding_genome_size: 3e7
-  # Name of the tumor sample in the scenario.yaml.
-  tumor_sample: tumor
-  somatic_events:
-    - somatic
-
-
-epitope_prediction:
-  activate: true
-
-
-affinity:
-  netMHCpan:
-    activate: true
-    params: "-BA -l 9 -s -xls"
-    location: "../netMHCpan-4.0"
-  netMHCIIpan:
-    activate: false
-    params: "-length 15 -s -xls"
-    location: "../netMHCIIpan-4.0"
-
-
-HLAtyping:
-  # activate to use razers3 to pre-filter reads before using optitype
-  optitype_prefiltering:
-    activate: false
-  optitype_data: "config/HLA_Data/hla_reference_dna.fasta"
-  # activate to predict MHC-I and MHC-II alleles with HLA-LA
-  HLA_LA:
-    activate: false
-
 
 ref:
   # Number of chromosomes to consider for calling.
@@ -76,49 +14,44 @@ ref:
   # Ensembl species name
   species: homo_sapiens
   # Ensembl release
-  release: 100
+  release: 108
   # Genome build
   build: GRCh38
 
 
-annotations:
-  vep:
-    params: "--everything"
-    plugins:
-      # Add any plugin from https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
-      # Plugin args can be passed as well, e.g. "LoFtool,path/to/custom/scores.txt".
-      - LoFtool
-
 params:
-  cutadapt: ""
-  bwa:
-    "-M"
-  picard:
-    MarkDuplicates:
-      "VALIDATION_STRINGENCY=LENIENT"
-  gatk:
-    BaseRecalibrator: "--tmp-dir tmp"
-    applyBQSR: ""
-  strelka:
-    config:
-      "--exome"
-    run:
-      "--mode local"
-  razers3:
-    "-i 95 -m 1 -dr 0"
-  optitype:
-    ""
   microphaser:
-    window_len:
-        33
-    peptide_len:
-      netMHCpan:
-          9
-      netMHCIIpan:
-          15
-  kallisto:
-    "-b 100"
-  star: >-
-    --outSAMmapqUnique 60 --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip
-    --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0
-    --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3
+    # window_len should be at least 3 times the longest peptide_len specified below
+    events:
+      normal: "normal_only"
+      tumor: "tumor_only"
+  net_mhc_pan:
+    activate: true
+    peptide_len: 9
+    extra: ""
+    # Please download netMHCpan manually from:
+    # https://services.healthtech.dtu.dk/service.php?NetMHCpan-4.1
+    # To make the `netMHCpan` script work, you need to fix its first line in
+    # in addition to the other edits described for a complete install. To use
+    # the conda-provided tcsh installation, it needs to read (without quotes):
+    # "#!/usr/bin/env tcsh"
+    location: "../netMHCpan-4.1"
+  net_mhc_two_pan:
+    activate: false
+    peptide_len: 15
+    extra: ""
+    # Please download netMHCIIpan manually from:
+    # https://services.healthtech.dtu.dk/service.php?NetMHCIIpan-4.1
+    # To make the `netMHCIIpan` script work, you need to fix its first line in
+    # in addition to the other edits described for a complete install. To use
+    # the conda-provided tcsh installation, it needs to read (without quotes):
+    # "#!/usr/bin/env tcsh"
+    location: "../netMHCIIpan-4.1"
+  neo_fox:
+    activate: false
+    # This should be at least as long as the desired net_mhc_two_pan peptide length
+    peptide_len: 15
+    # update the version number to get a newer release of the reference set of HLA Alleles
+    hla_alleles: "https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/allelelist/Allelelist.3480.txt"
+    extra: ""
+
diff --git a/config/groups.tsv b/config/groups.tsv
new file mode 100644
index 00000000..11353453
--- /dev/null
+++ b/config/groups.tsv
@@ -0,0 +1,3 @@
+group	tumorType
+A	LUSC
+B	LUAD
diff --git a/config/samples.tsv b/config/samples.tsv
index 6e291f15..249e0521 100644
--- a/config/samples.tsv
+++ b/config/samples.tsv
@@ -1,4 +1,5 @@
-sample	type	matched_normal	purity	platform
-A_normal	normal			ILLUMINA
-A_tumor	tumor	A_normal	1	ILLUMINA
-B_tumor	tumor	A_normal	1	ILLUMINA
+sample_name	group	alias	purity	platform
+A_normal	A	normal	1	ILLUMINA
+A_tumor	A	tumor	.99	ILLUMINA
+B_normal	B	normal	1	ILLUMINA
+B_tumor	B	tumor	.98	ILLUMINA
diff --git a/config/units.tsv b/config/units.tsv
index f8758dfb..c417a45c 100644
--- a/config/units.tsv
+++ b/config/units.tsv
@@ -1,5 +1,6 @@
-sample	sequencing_type	unit	fq1	fq2	sra	adapters
+sample_name	sequencing_type	unit_name	fq1	fq2	sra	adapters
 A_normal	DNA	lane1	A_normal_1.fastq.gz	A_normal_2.fastq.gz		
 A_tumor	DNA	lane2	A_tumor_1.fastq.gz	A_tumor_2.fastq.gz		
+B_normal	DNA	lane1	B_normal_1.fastq.gz	B_normal_2.fastq.gz		
 B_tumor	DNA	lane1	B_tumor_1.fastq.gz	B_tumor_2.fastq.gz		
 B_tumor	RNA	lane1	B_tumor_RNA_1.fastq.gz	B_tumor_RNA_2.fastq.gz		
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 2b881b44..7282e12f 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -12,6 +12,23 @@ scattergather:
     calling=24,
 
 
+##### required envvars #####
+
+
+envvars:
+    # For NeoFox installation:
+    # The tarballs for both netMHCpan and netMHCIIpan
+    # need to be donwloaded manually after registering
+    # online and deposited at a filesystem location
+    # that needs to be available as a shell environment
+    # variable when running snakemake.
+    # For downloading, see the "Downloads" tab at:
+    # https://services.healthtech.dtu.dk/service.php?NetMHCpan-4.1
+    "NET_MHC_PAN_4_1_TARBALL",
+    # https://services.healthtech.dtu.dk/service.php?NetMHCIIpan-4.0
+    "NET_MHC_TWO_PAN_4_0_TARBALL",
+
+
 ##### setup report #####
 
 
@@ -27,24 +44,14 @@ container: "docker://continuumio/miniconda3"
 
 include: "rules/common.smk"
 include: "rules/utils.smk"
-include: "rules/trim.smk"
 include: "rules/ref.smk"
-include: "rules/mapping.smk"
-include: "rules/calling.smk"
-include: "rules/candidate_calling.smk"
-include: "rules/varlociraptor.smk"
-include: "rules/annotation.smk"
-include: "rules/filtering.smk"
 include: "rules/microphaser.smk"
-include: "rules/HLAtyping.smk"
-include: "rules/MHC_binding.smk"
-include: "rules/RNA.smk"
-include: "rules/tmb.smk"
-include: "rules/vega.smk"
+include: "rules/hla_typing.smk"
+include: "rules/mhc_binding.smk"
+include: "rules/annotate_neoantigens.smk"
+include: "rules/datavzrd.smk"
 
 
 rule all:
     input:
         get_final_output(),
-        get_fusion_output(),
-        get_tmb_targets(),
diff --git a/workflow/envs/bcftools.yaml b/workflow/envs/bcftools.yaml
index 70e39da1..35bf7643 100644
--- a/workflow/envs/bcftools.yaml
+++ b/workflow/envs/bcftools.yaml
@@ -2,4 +2,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - bcftools =1.10
+  - bcftools =1.14
diff --git a/workflow/envs/datavzrd.yaml b/workflow/envs/datavzrd.yaml
new file mode 100644
index 00000000..859add75
--- /dev/null
+++ b/workflow/envs/datavzrd.yaml
@@ -0,0 +1,4 @@
+channels:
+  - conda-forge
+dependencies:
+  - datavzrd =2.2
diff --git a/workflow/envs/gawk.yaml b/workflow/envs/gawk.yaml
new file mode 100644
index 00000000..fa09e590
--- /dev/null
+++ b/workflow/envs/gawk.yaml
@@ -0,0 +1,4 @@
+channels:
+  - conda-forge
+dependencies:
+  - gawk =5.1
diff --git a/workflow/envs/hla_la.yaml b/workflow/envs/hla_la.yaml
index 6c2487fb..6a24bc29 100644
--- a/workflow/envs/hla_la.yaml
+++ b/workflow/envs/hla_la.yaml
@@ -1,8 +1,5 @@
 channels:
   - conda-forge
   - bioconda
-  - jafors
 dependencies:
-  - hla-la ==1.0.5
-  - samtools ==1.10
-  - boost-cpp ==1.73.0
+  - hla-la ==1.0.3
diff --git a/workflow/envs/htslib.yaml b/workflow/envs/htslib.yaml
new file mode 100644
index 00000000..b5d7959d
--- /dev/null
+++ b/workflow/envs/htslib.yaml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - htslib =1.14
diff --git a/workflow/envs/microphaser.yaml b/workflow/envs/microphaser.yaml
index 84d2ad83..90698479 100644
--- a/workflow/envs/microphaser.yaml
+++ b/workflow/envs/microphaser.yaml
@@ -2,4 +2,4 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - microphaser =0.2
+  - microphaser =0.7
diff --git a/workflow/envs/neo_fox_deps.post-deploy.sh b/workflow/envs/neo_fox_deps.post-deploy.sh
new file mode 100755
index 00000000..c344af3e
--- /dev/null
+++ b/workflow/envs/neo_fox_deps.post-deploy.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# set all the necessary conda paths and
+# ensure they exist
+CONDA_BIN="${CONDA_PREFIX}/bin/"
+# this is needed for the allele list files of MixMHCpred, MixMHC2pred
+# and PRIME, where NeoFox expects some of the files in that directory
+mkdir -p ${CONDA_BIN}/lib/
+CONDA_MAN1="${CONDA_PREFIX}/share/man/man1/"
+mkdir -p $CONDA_MAN1
+CONDA_INFO="${CONDA_PREFIX}/share/info/"
+mkdir -p $CONDA_INFO
+CONDA_LIB="${CONDA_PREFIX}/lib/"
+mkdir -p ${CONDA_LIB}
+CONDA_ETC="${CONDA_PREFIX}/etc/"
+mkdir -p $CONDA_ETC
+
+# install MixMHCpred, following:
+# https://github.com/GfellerLab/MixMHCpred/blob/v2.1/README
+MIX_MHC_PRED_VERSION="2.1"
+MIX_MHC_PRED_LIB_PATH="$CONDA_PREFIX/lib/mix_mhc_pred/"
+wget -q https://github.com/GfellerLab/MixMHCpred/archive/refs/tags/v${MIX_MHC_PRED_VERSION}.tar.gz
+tar xzf v${MIX_MHC_PRED_VERSION}.tar.gz
+cd MixMHCpred-${MIX_MHC_PRED_VERSION}
+g++ -O3 lib/MixMHCpred.cc -o lib/MixMHCpred.x
+# TODO: when updating to v2.2, change this line to:
+# MMP_PLACEHOLDER="/PATH_TO_MIXMHCPRED/lib"
+MMP_PLACEHOLDER="YOUR PATH TO MixMHCpred/lib FOLDER"
+grep "${MMP_PLACEHOLDER}" MixMHCpred
+sed -i "s%${MMP_PLACEHOLDER}%${MIX_MHC_PRED_LIB_PATH}%" MixMHCpred
+mv MixMHCpred ${CONDA_BIN}
+# The allele_list.txt file needs to be in the a `lib/` subdirectory of the `bin/` dir, this is hard-coded here:
+# https://github.com/TRON-Bioinformatics/neofox/blob/629443b637fc41b1ab81f4f770e7a8a1c976d3f2/neofox/references/references.py#L123
+cp lib/allele_list.txt ${CONDA_BIN}/lib/
+mv lib $MIX_MHC_PRED_LIB_PATH
+# TODO: when updating to v2.2, change this line to:
+# mv MixMHCpred_license.pdf ${CONDA_INFO}/MixMHCpred_license.pdf
+mv license.pdf ${CONDA_INFO}/MixMHCpred_license.pdf
+MixMHCpred -i test/test.fa -o test/out.txt -a A0101,A2501,B0801,B1801
+diff <(sed '4d' test/out.txt) <(sed '4d' test/out_compare.txt)
+cd ..
+rm v${MIX_MHC_PRED_VERSION}.tar.gz
+rm -r MixMHCpred-${MIX_MHC_PRED_VERSION}
+
+# install MixMHC2pred, mostly following (we use the default GitHub-created
+# .tar.gz file instead of the hand-crafted .zip file for future-proofing):
+# https://github.com/GfellerLab/MixMHC2pred/blob/v1.2/README.md
+MIX_MHC_TWO_PRED_VERSION="1.2"
+MIX_MHC_TWO_PRED_LIB_PATH="${CONDA_LIB}/mix_mhc_two_pred/"
+wget -q https://github.com/GfellerLab/MixMHC2pred/archive/refs/tags/v${MIX_MHC_TWO_PRED_VERSION}.tar.gz
+tar xzf v${MIX_MHC_TWO_PRED_VERSION}.tar.gz
+cd MixMHC2pred-${MIX_MHC_TWO_PRED_VERSION}
+mv -t ${CONDA_BIN} MixMHC2pred MixMHC2pred_unix
+# The Alleles_list.txt file needs to be in the same directory as the binaries, this is hard-coded here:
+# https://github.com/TRON-Bioinformatics/neofox/blob/629443b637fc41b1ab81f4f770e7a8a1c976d3f2/neofox/references/references.py#L114
+mv -t ${CONDA_BIN} Alleles_list.txt
+mv rpep ${CONDA_ETC}
+ln -s ${CONDA_ETC}/rpep ${CONDA_BIN}/rpep
+mv LICENSE ${CONDA_INFO}/MixMHC2pred_unix_LICENSE
+MixMHC2pred_unix -i test/testData.txt -o test/out.txt -a DRB1_11_01 DRB3_02_02 DPA1_01_03__DPB1_04_01 DQA1_05_05__DQB1_03_01
+diff test/out.txt test/out_compare.txt
+cd ..
+rm v${MIX_MHC_TWO_PRED_VERSION}.tar.gz
+rm -r MixMHC2pred-${MIX_MHC_TWO_PRED_VERSION}
+
+# install PRIME, mostly following (minor corrections):
+# https://github.com/GfellerLab/PRIME/blob/v1.0/README
+PRIME_VERSION="1.0"
+PRIME_LIB_PATH="${CONDA_LIB}/prime/"
+wget -q https://github.com/GfellerLab/PRIME/archive/refs/tags/v${PRIME_VERSION}.tar.gz
+tar xzf v${PRIME_VERSION}.tar.gz
+cd PRIME-${PRIME_VERSION}
+PRIME_PLACEHOLDER="/app/PRIME/lib"
+grep "${PRIME_PLACEHOLDER}" PRIME
+sed -i "s%${PRIME_PLACEHOLDER}%${PRIME_LIB_PATH}%" PRIME
+mv PRIME ${CONDA_BIN}
+# The alleles.txt file needs to be in the a `lib/` subdirectory of the `bin/` dir, this is hard-coded here:
+# https://github.com/TRON-Bioinformatics/neofox/blob/629443b637fc41b1ab81f4f770e7a8a1c976d3f2/neofox/references/references.py#L132
+cp lib/alleles.txt ${CONDA_BIN}/lib/
+mv lib $PRIME_LIB_PATH
+mv PRIME_license.pdf ${CONDA_INFO}
+PRIME -i test/test.txt -o test/out.txt -a A0201,A0101
+diff <(sed '4d' test/out.txt) <(sed '4d' test/out_compare.txt)
+cd ..
+rm v${PRIME_VERSION}.tar.gz
+rm -r PRIME-${PRIME_VERSION}
+
+# This is the non-portable version of the 1st line
+# in both netMHCpan and netMHCIIpan, assuming a
+# root install of tcsh. 
+TCSH_ROOT="#! /bin/tcsh -f"
+# For the scripts to work with any tcsh in the
+# $PATH, we need to change this to:
+TCSH_PATH="#!/usr/bin/env tcsh"
+
+# install netMHCpan version 4.1
+# requires tcsh to have been installed via conda dependencies
+NET_MHC_PAN_4_1_LIB="${CONDA_LIB}/netMHCpan_4_1/"
+mkdir -p ${NET_MHC_PAN_4_1_LIB}
+NET_MHC_PAN_4_1_ETC="${CONDA_ETC}/netMHCpan_4_1/"
+mkdir -p ${NET_MHC_PAN_4_1_ETC}
+tar xzf ${NET_MHC_PAN_4_1_TARBALL}
+cd netMHCpan-4.1
+wget -q https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/data.tar.gz
+tar xzf data.tar.gz
+rm data.tar.gz
+grep "${TCSH_ROOT}" netMHCpan
+sed -i "s%${TCSH_ROOT}%${TCSH_PATH}%" netMHCpan
+grep -P "setenv\s+NMHOME" netMHCpan
+sed -r -i "s%^setenv\s+NMHOME+.*$%setenv NMHOME ${NET_MHC_PAN_4_1_LIB}%" netMHCpan
+mv -t ${CONDA_BIN} netMHCpan
+mv -t ${CONDA_MAN1} netMHCpan.1
+mv -t ${CONDA_INFO} netMHCpan-4.1.readme
+mv -t ${NET_MHC_PAN_4_1_LIB} Linux_x86_64
+mv -t ${NET_MHC_PAN_4_1_ETC} data
+ln -s ${NET_MHC_PAN_4_1_ETC}/data ${NET_MHC_PAN_4_1_LIB}/data
+cd test
+netMHCpan -p test.pep -BA -xls -a HLA-A01:01,HLA-A02:01 -xlsfile my_NetMHCpan_out.xls
+diff NetMHCpan_out.xls my_NetMHCpan_out.xls
+cd ../..
+rm -r netMHCpan-4.1
+
+# install netMHCIIpan version 4.0
+# requires tcsh to have been installed via conda dependencies
+NET_MHC_TWO_PAN_4_0_LIB="${CONDA_LIB}/netMHCIIpan_4_0/"
+mkdir -p ${NET_MHC_TWO_PAN_4_0_LIB}
+NET_MHC_TWO_PAN_4_0_ETC="${CONDA_ETC}/netMHCIIpan_4_0/"
+mkdir -p ${NET_MHC_TWO_PAN_4_0_ETC}
+tar xzf ${NET_MHC_TWO_PAN_4_0_TARBALL}
+cd netMHCIIpan-4.0
+wget -q https://services.healthtech.dtu.dk/services/NetMHCIIpan-4.0/data.tar.gz 
+tar xzf data.tar.gz
+rm data.tar.gz
+grep "${TCSH_ROOT}" netMHCIIpan
+sed -i "s%${TCSH_ROOT}%${TCSH_PATH}%" netMHCIIpan
+grep -P "setenv\s+NMHOME" netMHCIIpan
+sed -r -i "s%^setenv\s+NMHOME+.*$%setenv NMHOME ${NET_MHC_TWO_PAN_4_0_LIB}%" netMHCIIpan
+mv -t ${CONDA_BIN} netMHCIIpan
+mv -t ${CONDA_MAN1} netMHCIIpan.1
+mv -t ${CONDA_INFO} netMHCIIpan-4.0.readme
+mv -t ${NET_MHC_TWO_PAN_4_0_LIB} Linux_x86_64 NetMHCIIpan-4.0.pl
+mv -t ${NET_MHC_TWO_PAN_4_0_ETC} data
+ln -s ${NET_MHC_TWO_PAN_4_0_ETC}/data ${NET_MHC_TWO_PAN_4_0_LIB}/data
+cd test
+netMHCIIpan -f example.fsa -a DRB1_0101 > example.fsa.myout
+diff example.fsa.out example.fsa.myout
+netMHCIIpan -f example.pep -inptype 1 -a DRB1_0101 > example.pep.myout
+diff example.pep.out example.pep.myout
+netMHCIIpan -f example.fsa -a H-2-IAb -s -u > example.fsa.sorted.myout
+diff example.fsa.sorted.out example.fsa.sorted.myout
+netMHCIIpan -f example.fsa -hlaseq DRB10101.fsa > example.fsa_hlaseq.myout
+diff example.fsa_hlaseq.out example.fsa_hlaseq.myout
+netMHCIIpan -f example.fsa -hlaseqA alpha.dat -hlaseq beta.dat > example.fsa_hlaseq_A+B.myout
+diff example.fsa_hlaseq_A+B.out example.fsa_hlaseq_A+B.myout
+cd ../..
+rm -r netMHCIIpan-4.0
diff --git a/workflow/envs/neo_fox_deps.yaml b/workflow/envs/neo_fox_deps.yaml
new file mode 100644
index 00000000..780af1d6
--- /dev/null
+++ b/workflow/envs/neo_fox_deps.yaml
@@ -0,0 +1,30 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  # https://neofox.readthedocs.io/en/latest/02_installation.html#step-by-step-guide-without-docker
+  - r-base =3.6
+  - python >=3.7, <=3.8
+  # implicit unmentioned dependency of MixMHCpred and PRIME
+  - perl
+  # https://neofox.readthedocs.io/en/latest/02_installation.html#install-blastp
+  - blast =2
+  # https://github.com/GfellerLab/MixMHCpred/blob/75374a7a0de214278c1cda00bb9dee4b2f475ec3/README#L64
+  - cxx-compiler
+  # needed for netMHCpan and netMHCIIpan, as their executables are tcsh-scripts
+  - tcsh =6.24
+  # just make sure this is available for the post-deploy.sh script
+  - sed =4.8
+  # R packages mentioned at the end of this section:
+  # https://neofox.readthedocs.io/en/latest/02_installation.html#configuration-of-the-reference-folder
+  - r-lattice
+  - r-ggplot2
+  - r-caret
+  - r-peptides
+  - r-doparallel
+  - r-gbm
+  - bioconductor-biostrings
+  # https://neofox.readthedocs.io/en/latest/02_installation.html#install-neofox
+  - pip
+  - pip:
+    - neofox==0.6.4
diff --git a/workflow/envs/pandas.yaml b/workflow/envs/pandas.yaml
new file mode 100644
index 00000000..5ced20fa
--- /dev/null
+++ b/workflow/envs/pandas.yaml
@@ -0,0 +1,4 @@
+channels:
+  - conda-forge
+dependencies:
+  - pandas=1.4
\ No newline at end of file
diff --git a/workflow/envs/rbt.yaml b/workflow/envs/rbt.yaml
index 6c8be1fb..94ff5bae 100644
--- a/workflow/envs/rbt.yaml
+++ b/workflow/envs/rbt.yaml
@@ -1,6 +1,6 @@
 channels:
-  - bioconda
   - conda-forge
+  - bioconda
 dependencies:
   - rust-bio-tools =0.19
-  - bcftools =1.10
+  - bcftools =1.14
diff --git a/workflow/envs/tcsh.yaml b/workflow/envs/tcsh.yaml
new file mode 100644
index 00000000..9bc374fd
--- /dev/null
+++ b/workflow/envs/tcsh.yaml
@@ -0,0 +1,4 @@
+channels:
+  - conda-forge
+dependencies:
+  - tcsh =6.24
diff --git a/workflow/envs/varlociraptor.yaml b/workflow/envs/varlociraptor.yaml
index a76d765a..270dc831 100644
--- a/workflow/envs/varlociraptor.yaml
+++ b/workflow/envs/varlociraptor.yaml
@@ -3,4 +3,4 @@ channels:
   - bioconda
 dependencies:
   - varlociraptor =2.3.0
-  - bcftools =1.10
+  - bcftools =1.14
diff --git a/workflow/report/HLA_Types.rst b/workflow/report/HLA_Types.rst
deleted file mode 100644
index 999fe494..00000000
--- a/workflow/report/HLA_Types.rst
+++ /dev/null
@@ -1 +0,0 @@
-Typing of HLA profile.
diff --git a/workflow/report/WES_results.rst b/workflow/report/WES_results.rst
deleted file mode 100644
index 4a699bdf..00000000
--- a/workflow/report/WES_results.rst
+++ /dev/null
@@ -1 +0,0 @@
-"Results - neoantigen candidate table"
diff --git a/workflow/report/hla_alleles.rst b/workflow/report/hla_alleles.rst
new file mode 100644
index 00000000..a793a914
--- /dev/null
+++ b/workflow/report/hla_alleles.rst
@@ -0,0 +1 @@
+HLA allele profile as determined by HLA-LA.
diff --git a/workflow/report/neoantigens.dna.rst b/workflow/report/neoantigens.dna.rst
new file mode 100644
index 00000000..17ba537e
--- /dev/null
+++ b/workflow/report/neoantigens.dna.rst
@@ -0,0 +1,60 @@
+Neoantigens and corresponding normal peptides as phased and determined by
+microphaser, with elution ligand / binding affinity predictions by netMHC
+(netMHCpan and netMHCIIpan) to the HLA alleles determined by HLA-LA.
+
+===================
+Column descriptions
+===================
+
+* **id**: Peptide ID assigned by microphaser.
+* **pep_seq**: Sequence of the full peptide that was given to netMHC(II)pan. Amino acids that
+    are different between the normal and the tumor sample are highlighted in lower case.
+* **pos_in_id_seq**: Position in pep_seq of the peptide that was used for prediction.
+    It seems like netMHCpan positions start at 0 and netMHCIIpan positions at 1.
+* **alias**: Indicator of the type of sample (normal vs. some tumor sample).
+* **num_binders**: Total number of peptide-HLA allele pairs from pep_seq that are considered binders,
+    either weak or strong. Cutoffs are applied to el_rank and are:
+    * netMHCpan 4.1: <0.5% (strong binder), <2.0% (weak binder)
+    * netMHCIIpan 4.1: <1.0% (strong binder), <5.0% (weak binder)
+* **freq**: Allelic frequency of the peptide as predicted by microphaser. For a
+    credible allele frequency interval, see column freq_credible_interval.
+* **depth**: Read depth at the peptide position.
+* **num_var_sites**: Number of variant sites on the peptides haplotype in that sample (alias).
+* **num_var_in_pep**: Number of variant sites within the peptide sequence.
+* **top_el_rank_allele**: HLA allele with the best eluted ligand prediction score percentile rank.
+* **top_el_rank_bind_core**: Binding core of the peptide for the HLA allele with the best
+    elution ligand score percentile rank.
+* **top_el_rank_el_rank**: Percentile rank of the elution ligand score. The rank of the predicted binding
+    score when compared to a set of random natural peptides. This measure is not
+    affected by inherent bias of certain molecules towards higher or lower mean
+    predicted affinities. It is the recommended value for determining likely
+    binders / neoantigens of interest. Cutoffs recommended by netMHC(II)pan authors
+    are:
+    * netMHCpan 4.1: <0.5% (strong binder), <2.0% (weak binder)
+    * netMHCIIpan 4.1: <1.0% (strong binder), <5.0% (weak binder)
+* **top_el_rank_el_score**: The raw eluted ligand prediction score
+* **ave_el_score**: Average across the eluted ligand prediction scores of all alleles for this
+    peptide in the particular sample (alias).
+* **top_ba_rank_allele**: HLA allele with the best binding affinity prediction score percentile rank.
+* **top_ba_rank_bind_core**: Binding core of the peptide for the HLA allele with the best
+    binding affinity score percentile rank.
+* **top_ba_rank_ba_rank**: Percentile rank of the predicted binding affinity compared to a set of 100.000
+    random natural peptides. This measure is not affected by inherent bias of certain
+    molecules towards higher or lower mean predicted affinities.
+* **top_ba_rank_ba_score**: Predicted binding affinity in log-scale.
+* **aa_changes**: List of aa changes. For normal samples, only germline changes are listed. For
+    tumor samples, only somatic tumor changes are listed, even though the germline
+    changes also affect the tumor peptide.
+* **genomic_pos**: Genomic position of the nucleotide change.
+* **nt_seq**: Nucleotide sequence underlying the peptide. Nucleotide changes underlying the
+    amino acid changes are highlighted as lower case letters.    
+* **gene_name**: Common gene name / gene symbol of the peptide's gene of origin.
+* **gene_id**: Ensembl gene id of the peptide's gene of origin.
+* **transcript**: List of Ensembl transcript ids in which the peptide occurs.
+* **chrom**: Chromosome on which the peptide's gene of origin is located.    
+* **offset**: Chromosomal position of the peptide's gene of origin.    
+* **frame**: Open reading frame that the peptide originates from. 0 indicates the regular
+    reading frame, non-zero values indicate frame shifts.
+* **strand**: Strand of the gene / transcript.
+* **freq_credible_interval**: Credible interval for freq, the allelic frequency of the peptide as predicted
+    by microphaser.
diff --git a/workflow/report/RNA_results.rst b/workflow/report/neoantigens.rna.rst
similarity index 100%
rename from workflow/report/RNA_results.rst
rename to workflow/report/neoantigens.rna.rst
diff --git a/workflow/report/neopeptides.rst b/workflow/report/neopeptides.rst
new file mode 100644
index 00000000..1e6d6625
--- /dev/null
+++ b/workflow/report/neopeptides.rst
@@ -0,0 +1,9 @@
+Neopeptides and corresponding normal peptides as phased and determined by
+microphaser, with various annotation scores gathered and provided by NeoFox,
+using the HLA alleles determined by HLA-LA.
+
+===================
+Column descriptions
+===================
+
+TODO: transform spreadsheet into RST table
diff --git a/workflow/resources/datavzrd/neo_fox-neoantigens-template.datavzrd.yaml b/workflow/resources/datavzrd/neo_fox-neoantigens-template.datavzrd.yaml
new file mode 100644
index 00000000..270c77a7
--- /dev/null
+++ b/workflow/resources/datavzrd/neo_fox-neoantigens-template.datavzrd.yaml
@@ -0,0 +1,344 @@
+__definitions__:
+  - import pandas as pd
+  - from itertools import chain
+  - from copy import deepcopy
+__variables__:
+  important_cols: ?set(params.neofox_important_cols["general"]) | set(params.neofox_important_cols[wildcards.mhc])
+  cols: ?set(pd.read_csv(input.neopeptides, sep="\t").columns.values)
+  coldefs:
+    gene:
+      link-to-url: https://www.ensembl.org/Homo_sapiens/Gene/Summary?g={value}
+    mutation_mutatedXmer:
+      custom: |
+        function(value, row) {
+          return value.split("").map(function(a) {
+            if (a === a.toLowerCase()) {
+              return `<b style="color: red">${a}</b>`
+            } else {
+              return a
+            }
+          }).join("");
+        }
+    purity_adjusted_DNA_VAF:
+      plot:
+        ticks:
+          scale: linear
+    imputedGeneExpression:
+      plot:
+        ticks:
+          scale: linear
+    PRIME_best_rank:
+      plot:
+        heatmap:
+          scale: symlog
+          domain:
+            - 0.0
+            - 100.0
+          range:
+            - "#EC0000"
+            - white
+    PRIME_best_score:
+      plot:
+        ticks:
+          scale: linear
+    Best_rank_MHCI_9mer_score:
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_rank_MHCI_9mer_score_WT
+    Best_rank_MHCI_9mer_score_WT:
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_rank_MHCI_9mer_score
+    MixMHC2pred_best_rank:
+      plot:
+        heatmap:
+          scale: linear
+          domain:
+            - 0.0
+            - 100.0
+          range:
+            - "#EC0000"
+            - "#ffffff"
+    Best_rank_MHCII_score:
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_rank_MHCII_score_WT
+    Best_rank_MHCII_score_WT:
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_rank_MHCII_score
+    Recognition_Potential_MHCI_9mer:
+      plot:
+        ticks:
+          scale: linear
+    Improved_Binder_MHCI:
+      plot:
+        heatmap:
+          scale: ordinal
+          domain:
+            - 0
+            - 1
+          range:
+            - "#ffffff"
+            - "#2CA02C"
+    Selfsimilarity_MHCI_conserved_binder:
+      plot:
+        ticks:
+          scale: linear
+    mutation_position:
+      display-mode: detail
+    dnaVariantAlleleFrequency:
+      display-mode: detail
+    rnaVariantAlleleFrequency:
+      display-mode: hidden
+    rnaExpression:
+      display-mode: hidden
+    ADN_MHCI:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+    ADN_MHCII:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+    Amplitude_MHCII_rank:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: linear
+          domain:
+            - 0.0
+            - 100.0
+          range:
+            - "#EC0000"
+            - "#ffffff"
+    Amplitude_MHCI_affinity:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+    Amplitude_MHCI_affinity_9mer:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+    MixMHC2pred_best_allele:
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - MixMHC2pred_best_allele
+            - Best_rank_MHCII_score_allele
+            - Best_rank_MHCII_score_allele_WT
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+    Best_rank_MHCII_score_allele:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - MixMHC2pred_best_allele
+            - Best_rank_MHCII_score_allele
+            - Best_rank_MHCII_score_allele_WT
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+    Best_rank_MHCII_score_allele_WT:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - MixMHC2pred_best_allele
+            - Best_rank_MHCII_score_allele
+            - Best_rank_MHCII_score_allele_WT
+            - Best_rank_MHCII_allele_WT
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+    Best_affinity_MHCII_allele:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - MixMHC2pred_best_allele
+            - Best_rank_MHCII_score_allele
+            - Best_rank_MHCII_score_allele_WT
+            - Best_rank_MHCII_allele_WT
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+    Best_affinity_MHCII_allele_WT:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - MixMHC2pred_best_allele
+            - Best_rank_MHCII_score_allele
+            - Best_rank_MHCII_score_allele_WT
+            - Best_rank_MHCII_allele_WT
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+    Best_affinity_MHCII_score:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_affinity_MHCII_score_WT
+    Best_affinity_MHCII_score_WT:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_affinity_MHCII_score
+    PRIME_best_allele:
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - PRIME_best_allele
+            - Best_rank_MHCI_9mer_allele
+            - Best_rank_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_9mer_allele
+            - Best_affinity_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_allele
+            - Best_affinity_MHCI_allele_WT
+    Best_rank_MHCI_9mer_allele:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - PRIME_best_allele
+            - Best_rank_MHCI_9mer_allele
+            - Best_rank_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_9mer_allele
+            - Best_affinity_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_allele
+            - Best_affinity_MHCI_allele_WT
+    Best_rank_MHCI_9mer_allele_WT:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - PRIME_best_allele
+            - Best_rank_MHCI_9mer_allele
+            - Best_rank_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_9mer_allele
+            - Best_affinity_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_allele
+            - Best_affinity_MHCI_allele_WT
+    Best_affinity_MHCI_9mer_score:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_affinity_MHCI_9mer_score_WT
+    Best_affinity_MHCI_9mer_score_WT:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_affinity_MHCI_9mer_score
+    Best_affinity_MHCI_allele:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - PRIME_best_allele
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+            - Best_affinity_MHCI_9mer_allele
+            - Best_affinity_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_allele
+            - Best_affinity_MHCI_allele_WT
+    Best_affinity_MHCI_allele_WT:
+      display-mode: detail
+      plot:
+        heatmap:
+          scale: ordinal
+          color-scheme: category20
+          aux-domain-columns:
+            - PRIME_best_allele
+            - Best_affinity_MHCII_allele
+            - Best_affinity_MHCII_allele_WT
+            - Best_affinity_MHCI_9mer_allele
+            - Best_affinity_MHCI_9mer_allele_WT
+            - Best_affinity_MHCI_allele
+            - Best_affinity_MHCI_allele_WT
+    Best_affinity_MHCI_score:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_affinity_MHCI_score_WT
+    Best_affinity_MHCI_score_WT:
+      display-mode: detail
+      plot:
+        ticks:
+          scale: linear
+          aux-domain-columns:
+            - Best_affinity_MHCI_score
+
+name: ?f"Neopeptide candidates for {wildcards.group}, tumor sample {wildcards.tumor_alias}"
+
+default-view: "overview"
+
+datasets:
+  neoprint:
+    path: ?input.neopeptides
+    separator: "\t"
+
+views:
+  overview:
+    desc: |
+      Neopeptide candidates with annotations gathered and provided by NeoFox.
+
+      ### Column descriptions
+      * **Recognition_Potential_MHCI_9mer:** The recognition potential of a neoantigen is the likelihood that it is effectively recognized by the TCR repertoire (definition: Amplitude_MHCI_affinity_9mer x Pathogensimiliarity_MHCI_affinity_9mer) \[[runup to equation (5) in Luksza et al. 2017](https://doi.org/10.1038/nature24473)\].
+      * **Selfsimilarity_MHCI_conserved_binder:** Score for k-mer based similarity between Best_rank_MHCI_score_epitope and Best_affinity_MHCI_epitope_WT. For conserved binders only (i.e. NOT improved binders), where this score is considered indicative of immunogenicity \[[page 3 of Bjerregaard et al., 2017, Front Immunol.](https://doi.org/10.3389/fimmu.2017.01566)\].
+      * **Improved_Binder_MHCI:** Cutoff of 1.2 on the ratio between normal and mutated peptide rank scores to designate a peptide as an improved binder (as opposed to a conserved binder) (definition: (Best_rank_MHCI_score_WT / Best_rank_MHCI_score ) > 1.2) \[[page 4 of Bjerregaard et al., 2017, Front Immunol.](https://doi.org/10.3389/fimmu.2017.01566)\]
+
+    dataset: neoprint
+    
+    render-table:
+      columns:
+        ?for col, coldef in coldefs.items():
+          __definitions__:
+            - |
+              coldef_ = deepcopy(coldef)
+              if col not in important_cols:
+                coldef_["display-mode"] = "detail"
+          ?col: ?coldef_
+        ?for col in cols:
+          ?if col not in coldefs and col not in important_cols:
+            ?col:
+              display-mode: detail
\ No newline at end of file
diff --git a/workflow/rules/HLAtyping.smk b/workflow/rules/HLAtyping.smk
deleted file mode 100644
index 682808ef..00000000
--- a/workflow/rules/HLAtyping.smk
+++ /dev/null
@@ -1,98 +0,0 @@
-rule HLA_LA:
-    input:
-        bam="results/recal/{sample}.sorted.bam",
-        bai="results/recal/{sample}.sorted.bam.bai",
-        index="resources/graphs/PRG_MHC_GRCh38_withIMGT/serializedGRAPH",
-    output:
-        "results/HLA-LA/output/{sample}/hla/R1_bestguess_G.txt",
-    threads: 7
-    log:
-        "logs/HLA-LA/{sample}.log",
-    params:
-        graph=lambda w, input: os.path.basename(os.path.dirname(input.index)),
-        graphdir=lambda w, input: os.path.dirname(os.path.dirname(input.index)),
-    conda:
-        "../envs/hla_la.yaml"
-    shell:
-        "HLA-LA.pl --bam {input.bam} --sampleID {wildcards.sample} --graph {params.graph} --customGraphDir {params.graphdir} --workingDir results/HLA-LA/output --maxThreads {threads} > {log} 2>&1"
-
-
-rule parse_HLA_LA:
-    input:
-        "results/HLA-LA/output/{sample}/hla/R1_bestguess_G.txt",
-    output:
-        report(
-            "results/HLA-LA/hlaI_{sample}.tsv",
-            caption="../report/HLA_Types.rst",
-            category="HLA-Typing(HLA-LA)",
-        ),
-        report(
-            "results/HLA-LA/hlaII_{sample}.tsv",
-            caption="../report/HLA_Types.rst",
-            category="HLA-Typing(HLA-LA)",
-        ),
-    log:
-        "logs/parse-HLA-LA/{sample}.log",
-    script:
-        "../scripts/parse_HLA_types.py"
-
-
-rule razers3:
-    input:
-        reads="results/merged/DNA/{sample}_{fq}.fastq.gz",
-    output:
-        bam="results/razers3/bam/{sample}_{fq}.bam",
-    threads: 8
-    log:
-        "logs/razers3/{sample}_{fq}.log",
-    params:
-        genome=config["HLAtyping"]["optitype_data"],
-        extra=config["params"]["razers3"],
-    wrapper:
-        "0.61.0/bio/razers3"
-
-
-rule bam2fq:
-    input:
-        "results/razers3/bam/{sample}_{fq}.bam",
-    output:
-        "results/razers3/fastq/{sample}_{fq}.fished.fastq",
-    params:
-        "",
-    log:
-        "logs/razers3-bam2fq/{sample}-{fq}.log",
-    threads: 1
-    wrapper:
-        "0.61.0/bio/samtools/bam2fq/interleaved"
-
-
-rule OptiType:
-    input:
-        reads=get_optitype_reads_input,
-    output:
-        multiext(
-            "results/optitype/{sample}/{sample}", "_coverage_plot.pdf", "_result.tsv"
-        ),
-    log:
-        "logs/optitype/{sample}.log",
-    params:
-        extra=config["params"]["optitype"],
-        sequencing_type="dna",
-    wrapper:
-        "0.63.0/bio/optitype"
-
-
-rule parse_Optitype:
-    input:
-        "results/optitype/{sample}/{sample}_result.tsv",
-    output:
-        report(
-            "results/optitype/{sample}/hla_alleles_{sample}.tsv",
-            caption="../report/HLA_Types.rst",
-            category="HLA-Typing(Optitype)",
-        ),
-    log:
-        "logs/parse-optitype/{sample}.log",
-    shell:
-        "cut {input} -f2-7 | awk 'NR == 1 {{print}} NR>1 {{for (i = 1; i<=6; ++i) sub(/^/, \"&HLA-\", $i); print}}' "
-        '| sed -e s/[*,:]//g | sed "s/ /\t/g" > {output} 2> {log}'
diff --git a/workflow/rules/MHC_binding.smk b/workflow/rules/MHC_binding.smk
deleted file mode 100644
index 2d95e343..00000000
--- a/workflow/rules/MHC_binding.smk
+++ /dev/null
@@ -1,127 +0,0 @@
-# rule mhcflurry:
-#     input:
-#         peptides="results/microphaser/fasta/{sample}/filtered/{sample}.{chr}.{group}.fa",
-#         alleles="results/optitype/{sample}/hla_alleles_{sample}.tsv",
-#         wt_alleles=get_germline_optitype
-#     output:
-#         "results/mhcflurry/{sample}/{chr}/output.{group}.csv"
-#     log:
-#         "logs/mhcflurry/{sample}-{chr}-{group}.log"
-#     run:
-#         if "wt" in input.peptides:
-#             alleles = ",".join(pd.read_csv(input.wt_alleles, sep="\t").iloc[0])
-#         else:
-#             alleles = ",".join(pd.read_csv(input.alleles, sep="\t").iloc[0])
-#         cmd = "if [ -s {input.peptides} ]; then mhctools --mhc-predictor mhcflurry --mhc-alleles {alleles} --input-fasta-file {input.peptides} --output-csv {output} > {log}; else touch {output}; fi"
-#         shell(cmd)
-
-
-rule netMHCpan:
-    input:
-        peptides="results/microphaser/fasta/{sample}/filtered/netMHCpan/{sample}.{chr}.{group}.fa",
-        alleles=get_alleles_MHCI,
-    output:
-        "results/netMHCpan/{sample}/{chr}/{sample}.{chr}.{group}.xls",
-    log:
-        "logs/netMHCpan/{sample}-{chr}-{group}.log",
-    params:
-        extra=config["affinity"]["netMHCpan"]["params"],
-        netMHC=config["affinity"]["netMHCpan"]["location"],
-    run:
-        alleles = ",".join(pd.read_csv(input.alleles, sep="\t").iloc[0])
-        cmd = "if [ -s {input.peptides} ]; then {params.netMHC}/netMHCpan {params.extra} -xlsfile {output} -a {alleles} -f {input.peptides} > {log}; else touch {output}; fi"
-        shell(cmd)
-
-
-rule netMHCIIpan:
-    input:
-        peptides="results/microphaser/fasta/{sample}/filtered/netMHCIIpan/{sample}.{chr}.{group}.fa",
-        alleles=get_alleles_MHCII,
-    output:
-        "results/netMHCIIpan/{sample}/{chr}/{sample}.{chr}.{group}.xls",
-    log:
-        "logs/netMHCIIpan/{sample}-{chr}-{group}.log",
-    params:
-        extra=config["affinity"]["netMHCIIpan"]["params"],
-        netMHC=config["affinity"]["netMHCIIpan"]["location"],
-    run:
-        alleles = ",".join(pd.read_csv(input.alleles, sep="\t")["Allele"].tolist())
-        cmd = "if [ -s {input.peptides} ]; then {params.netMHC}/netMHCIIpan {params.extra} -xlsfile {output} -a {alleles} -f {input.peptides} > {log}; else touch {output}; fi"
-        shell(cmd)
-
-
-rule parse_mhc_out:
-    input:
-        expand(
-            "results/{{mhc}}/{{sample}}/{chr}/{{sample}}.{chr}.{{group}}.xls",
-            chr=contigs,
-        ),
-    output:
-        "results/{mhc}/{sample}/{sample}.mhc.{group}.tsv",
-    log:
-        "logs/parse-mhc/{mhc}-{sample}-{group}.log",
-    wildcard_constraints:
-        group="wt|mt",
-    script:
-        "../scripts/group_mhc_output.py"
-
-
-# rule parse_mhcflurry:
-#     input:
-#         expand("results/mhcflurry/{{sample}}/{chr}/output.{{group}}.csv", chr=contigs)
-#     output:
-#         "results/mhcflurry/{sample}/{sample}.mhc.{group}.csv"
-#     wildcard_constraints:
-#         group="wt|mt"
-#     log:
-#         "logs/parse-mhc/mhcflurry-{sample}-{group}.log"
-#     conda:
-#         "../envs/xsv.yaml"
-#     shell:
-#         "xsv cat rows -d ',' {input} | cut --complement -f2,7,8 > {output}"
-
-
-rule mhc_csv_table:
-    input:
-        info="results/microphaser/info/{sample}/filtered/{mhc}/{sample}.tsv",
-        mt="results/{mhc}/{sample}/{sample}.mhc.mt.tsv",
-        wt="results/{mhc}/{sample}/{sample}.mhc.wt.tsv",
-    output:
-        report(
-            "results/neoantigens/{mhc}/{sample}.DNA.tsv",
-            caption="../report/WES_results.rst",
-            category="Results WES (netMHC)",
-        ),
-    log:
-        "logs/create-mhc-table/{mhc}-{sample}.log",
-    script:
-        "../scripts/merge_data.py"
-
-
-# rule mhcflurry_table:
-#     input:
-#         info="results/microphaser/info/{sample}/filtered/mhcflurry/{sample}.tsv",
-#         mt="results/mhcflurry/{sample}/{sample}.mhc.mt.tsv",
-#         wt="results/mhcflurry/{sample}/{sample}.mhc.wt.tsv"
-#     output:
-#         report("results/neoantigens/mhcflurry/{sample}.WES.tsv", caption="../report/WES_results.rst", category="Results WES (MHCFlurry)")
-#     script:
-#         "../scripts/merge_mhcflurry.py"
-
-
-rule add_RNA_info:
-    input:
-        counts="results/kallisto/{sample}",
-        table="results/neoantigens/{mhc}/{sample}.DNA.tsv",
-    output:
-        report(
-            "results/neoantigens/{mhc}/{sample}.RNA.tsv",
-            caption="../report/RNA_results.rst",
-            category="Results RNA",
-        ),
-    params:
-        abundance=lambda wc, input: "{}/abundance.tsv".format(input.counts),
-    log:
-        "logs/add-RNA/{mhc}-{sample}.log",
-    script:
-        "../scripts/add_rna_info.py"
diff --git a/workflow/rules/RNA.smk b/workflow/rules/RNA.smk
deleted file mode 100644
index 940d7c02..00000000
--- a/workflow/rules/RNA.smk
+++ /dev/null
@@ -1,71 +0,0 @@
-rule kallisto_quant:
-    input:
-        fastq=get_quant_reads_input,
-        index="resources/kallisto/transcripts.idx",
-    output:
-        directory("results/kallisto/{sample}"),
-    params:
-        extra=kallisto_params,
-    log:
-        "results/logs/kallisto/quant/{sample}.log",
-    wrapper:
-        "0.60.1/bio/kallisto/quant"
-
-
-rule STAR_align:
-    input:
-        "resources/STAR_index",
-        fq1=lambda wc: units.loc[(wc.sample, "RNA"), "fq1"],
-        fq2=lambda wc: units.loc[(wc.sample, "RNA"), "fq2"],
-        gtf="resources/genome.gtf",
-    output:
-        # see STAR manual for additional output files
-        "results/star/{sample}/Aligned.out.bam",
-        "results/star/{sample}/ReadsPerGene.out.tab",
-    log:
-        "logs/star/{sample}.log",
-    params:
-        # path to STAR reference genome index
-        index="STAR_index",
-        # optional parameters - designed to get chimeric alignments for fusion detection
-        extra=lambda wc, input: "--quantMode GeneCounts --sjdbGTFfile {} {}".format(
-            input.gtf, config["params"]["star"]
-        ),
-    threads: 8
-    wrapper:
-        "0.42.0/bio/star/align"
-
-
-rule arriba:
-    input:
-        bam="results/star/{sample}/Aligned.out.bam",
-        genome="resources/genome.fasta",
-        annotation="resources/genome.gtf",
-    output:
-        fusions="results/arriba/{sample}.fusions.tsv",
-        discarded="results/arriba/{sample}.fusions.discarded.tsv",
-    params:
-        blacklist=config["fusion"]["arriba"]["blacklist"],
-        extra=config["fusion"]["arriba"]["params"],
-    log:
-        "results/logs/arriba/{sample}.log",
-    threads: 1
-    wrapper:
-        "0.60.1/bio/arriba"
-
-
-## TODO: Update
-# rule fusioncatcher:
-#    input:
-#        fq1=lambda w: units.loc[(w.sample, "RNA"), "fq1"],
-#        fq2=lambda w: units.loc[(w.sample, "RNA"), "fq2"]
-#    output:
-#        directory("fusioncatcher/{sample}")
-#    params:
-#        extra="-T tmp -d ../../fusioncatcher_data"
-#    log:
-#        "logs/fusioncatcher/{sample}.log"
-#    threads:
-#        8
-#    shell:
-#        "fusioncatcher -i {input.fq1},{input.fq2} -o {output} {params.extra} -p {threads} > {log}"
diff --git a/workflow/rules/annotate_neoantigens.smk b/workflow/rules/annotate_neoantigens.smk
new file mode 100644
index 00000000..70e1a492
--- /dev/null
+++ b/workflow/rules/annotate_neoantigens.smk
@@ -0,0 +1,124 @@
+rule prepare_neo_fox_config_and_resources:
+    output:
+        config="resources/neo_fox/neo_fox_config.txt",
+        # we cannot put the exact files generated into the
+        # output, as snakemake will generate the respective
+        # subdirectories and NeoFox has default exist_ok=False
+        # set for os.makedirs:
+        # https://github.com/TRON-Bioinformatics/neofox/blob/fb6cdf9f10e77c409d0fa44657ef520eedca6994/neofox/references/installer.py#L221
+        references=directory("resources/neo_fox/references/"),
+    log:
+        "logs/neo_fox/neo_fox_config.log",
+    conda:
+        "../envs/neo_fox_deps.yaml"
+    params:
+        hla_alleles=config["params"]["neo_fox"]["hla_alleles"],
+    shell:
+        """
+        # environment variables necessary for neofox-configure
+        # NOTE: we have to provide all binaries with hard-coded
+        # paths, because NeoFox checks that the file at the path
+        # exists (and not simply that a binary exists):
+        # https://github.com/TRON-Bioinformatics/neofox/blob/629443b637fc41b1ab81f4f770e7a8a1c976d3f2/neofox/references/references.py#L90
+        CONDA_BIN=$CONDA_PREFIX/bin
+
+        ## pre-installed via conda
+        export NEOFOX_MAKEBLASTDB=$CONDA_BIN/makeblastdb
+        echo "NEOFOX_MAKEBLASTDB=$CONDA_BIN/makeblastdb" > {output.config}
+        export NEOFOX_RSCRIPT=$CONDA_BIN/Rscript
+        echo "NEOFOX_RSCRIPT=$CONDA_BIN/Rscript" >> {output.config}
+
+        ## pre-installed into conda environment via post-deploy script
+        export NEOFOX_NETMHCPAN=$CONDA_BIN/netMHCpan
+        echo "NEOFOX_NETMHCPAN=$CONDA_BIN/netMHCpan" >> {output.config}
+        export NEOFOX_NETMHC2PAN=$CONDA_BIN/netMHCIIpan
+        echo "NEOFOX_NETMHC2PAN=$CONDA_BIN/netMHCIIpan" >> {output.config}
+
+        ## specification of hla_allele link via config.yaml
+        export NEOFOX_HLA_DATABASE={params.hla_alleles}
+
+        neofox-configure --reference-folder {output.references}
+        echo "NEOFOX_REFERENCE_FOLDER={output.references}" >> {output.config}
+
+        # further environment variables needed for the config file
+
+        ## pre-installed via conda
+        echo "NEOFOX_BLASTP=$CONDA_BIN/blastp" >> {output.config}
+
+        ## pre-installed into conda environment via post-deploy script
+        echo "NEOFOX_MIXMHCPRED=$CONDA_BIN/MixMHCpred" >> {output.config}
+        echo "NEOFOX_MIXMHC2PRED=$CONDA_BIN/MixMHC2pred_unix" >> {output.config}
+        echo "NEOFOX_PRIME=$CONDA_BIN/PRIME" >> {output.config}
+        """
+
+
+rule adjust_microphaser_output_for_neo_fox:
+    input:
+        microphaser="results/microphaser/info/filtered/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.tsv",
+    output:
+        neo_fox="results/neo_fox/candidates/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.tsv",
+    log:
+        "logs/neo_fox/candidates/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.log",
+    threads: 1
+    conda:
+        "../envs/pandas.yaml"
+    script:
+        "../scripts/adjust_microphaser_output_for_neo_fox.py"
+
+
+rule create_neo_fox_group_sheet:
+    input:
+        hla_la_bestguess="results/hla_la/output/{group}_{tumor_alias}/hla/R1_bestguess_G.txt",
+    output:
+        group_sheet="results/neo_fox/patient_data/{group}.{tumor_alias}.hla_alleles.tumor_type.tsv",
+    log:
+        "logs/neo_fox/patient_data/{group}.{tumor_alias}.hla_alleles.tumor_type.log",
+    conda:
+        "../envs/pandas.yaml"
+    params:
+        group=lambda wc: group_annotation.loc[wc.group],
+    script:
+        "../scripts/create_neo_fox_group_sheet.py"
+
+
+rule neo_fox:
+    input:
+        config="resources/neo_fox/neo_fox_config.txt",
+        candidates=expand(
+            "results/neo_fox/candidates/{{group}}.{{tumor_alias}}.merged_tumor_normal.pep_len_{peptide_length}.tsv",
+            peptide_length=config["params"]["neo_fox"]["peptide_len"],
+        ),
+        group_sheet="results/neo_fox/patient_data/{group}.{tumor_alias}.hla_alleles.tumor_type.tsv",
+    output:
+        tsv="results/neo_fox/annotated/{group}.{tumor_alias}.annotated_neoantigens.tsv",
+        json="results/neo_fox/annotated/{group}.{tumor_alias}.annotated_neoantigens.json",
+    log:
+        "logs/neo_fox/annotated/{group}.{tumor_alias}.log",
+    threads: 8
+    conda:
+        "../envs/neo_fox_deps.yaml"
+    params:
+        folder=lambda wc, output: path.dirname(output.tsv),
+        prefix=lambda wc, output: path.splitext(path.basename(output.tsv))[0],
+        organism="human"
+        if config["ref"]["species"] == "homo_sapiens"
+        else "mouse"
+        if config["ref"]["species"] == "mus_musculus"
+        else "unsupported",
+    shell:
+        "(neofox "
+        "  --num-cpus {threads} "
+        "  --config {input.config} "
+        "  --candidate-file {input.candidates} "
+        "  --patient-data {input.group_sheet} "
+        "  --with-table "
+        "  --with-json "
+        "  --organism {params.organism} "
+        "  --output-folder {params.folder} "
+        "  --output-prefix {params.prefix} ; "
+        " mv {params.folder}/{params.prefix}_neoantigen_candidates_annotated.tsv {output.tsv}; "
+        " mv {params.folder}/{params.prefix}_neoantigen_candidates_annotated.json {output.json}; "
+        # this implicitly created log does not seem to contain all of stderr,
+        # so we rather do our own capturing below
+        " rm {params.folder}/{params.prefix}.log; "
+        ") 2> {log} "
diff --git a/workflow/rules/annotation.smk b/workflow/rules/annotation.smk
deleted file mode 100644
index 64088375..00000000
--- a/workflow/rules/annotation.smk
+++ /dev/null
@@ -1,44 +0,0 @@
-rule annotate_variants:
-    input:
-        calls="results/calls/{group}.{scatteritem}.bcf",
-        cache="resources/vep/cache",
-        plugins="resources/vep/plugins",
-    output:
-        calls="results/calls/{group}.{scatteritem}.annotated.bcf",
-        stats=report(
-            "results/calls/{group}.{scatteritem}.stats.html",
-            caption="../report/stats.rst",
-            category="QC",
-        ),
-    params:
-        # Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
-        # Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
-        plugins=config["annotations"]["vep"]["plugins"],
-        extra="{} --vcf_info_field ANN".format(config["annotations"]["vep"]["params"]),
-    log:
-        "logs/vep/{group}.{scatteritem}.annotate.log",
-    wrapper:
-        "0.59.2/bio/vep/annotate"
-
-
-rule annotate_strelka_variants:
-    input:
-        calls="results/strelka/{calls}.bcf",
-        cache="resources/vep/cache",
-        plugins="resources/vep/plugins",
-    output:
-        calls="results/strelka/{calls}.annotated.bcf",
-        stats=report(
-            "results/strelka/{calls}.annotated.stats.html",
-            caption="../report/stats.rst",
-            category="QC",
-        ),
-    params:
-        # Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
-        # Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
-        plugins=[],  #config["annotations"]["vep"]["plugins"],
-        extra="--vcf_info_field ANN --hgvs --symbol --canonical",
-    log:
-        "logs/vep/{calls}.strelka.annotate.log",
-    wrapper:
-        "0.59.2/bio/vep/annotate"
diff --git a/workflow/rules/calling.smk b/workflow/rules/calling.smk
deleted file mode 100644
index 98ef992f..00000000
--- a/workflow/rules/calling.smk
+++ /dev/null
@@ -1,147 +0,0 @@
-rule strelka_somatic:
-    input:
-        normal=get_normal_bam,
-        normal_index=get_normal_bai,
-        tumor="results/recal/{sample}.sorted.bam",
-        tumor_index="results/recal/{sample}.sorted.bam.bai",
-        fasta="resources/genome.fasta",
-        fasta_index="resources/genome.fasta.fai",
-        callregions="resources/genome.callregions.bed.gz",
-    output:
-        "results/strelka/somatic/{sample}/results/variants/somatic.snvs.vcf.gz",
-        "results/strelka/somatic/{sample}/results/variants/somatic.indels.vcf.gz",
-    log:
-        "logs/calling/strelka_somatic/{sample}.log",
-    params:
-        config_extra="--callRegions {} {}".format(
-            "resources/genome.callregions.bed.gz", config["params"]["strelka"]["config"]
-        ),
-        run_extra=config["params"]["strelka"]["run"],
-    threads: 22
-    wrapper:
-        "0.65.0/bio/strelka/somatic"
-
-
-rule strelka_germline:
-    input:
-        bam="results/recal/{normal}.sorted.bam",
-        normal_index="results/recal/{normal}.sorted.bam.bai",
-        fasta="resources/genome.fasta",
-        fasta_index="resources/genome.fasta.fai",
-        callregions="resources/genome.callregions.bed.gz",
-    output:
-        "results/strelka/germline/{normal}/results/variants/variants.vcf.gz",
-    log:
-        "logs/calling/strelka_germline/{normal}.log",
-    params:
-        config_extra="--callRegions {} {}".format(
-            "resources/genome.callregions.bed.gz", config["params"]["strelka"]["config"]
-        ),
-        run_extra="",
-    threads: 22
-    wrapper:
-        "0.65.0/bio/strelka/germline"
-
-
-rule vcf_to_bcf:
-    input:
-        "{variants}.vcf.gz",
-    output:
-        "{variants}.output.bcf",
-    log:
-        "logs/bcftools/to-bcf/{variants}.log",
-    params:
-        "-O b -f PASS",
-    wrapper:
-        "0.60.0/bio/bcftools/view"
-
-
-rule concat_somatic:
-    input:
-        calls=expand(
-            "results/strelka/somatic/{{sample}}/results/variants/somatic.{type}.output.bcf",
-            type=["snvs", "indels"],
-        ),
-        indices=expand(
-            "results/strelka/somatic/{{sample}}/results/variants/somatic.{type}.output.bcf.csi",
-            type=["snvs", "indels"],
-        ),
-    output:
-        "results/strelka/somatic/{sample}/results/variants/somatic.complete.bcf",
-    log:
-        "bcftools/concat-somatic/{sample}.log",
-    params:
-        "-O b -a",
-    wrapper:
-        "0.60.0/bio/bcftools/concat"
-
-
-rule get_tumor_from_somatic:
-    input:
-        "results/strelka/somatic/{sample}/results/variants/somatic.complete.bcf",
-    output:
-        "results/strelka/somatic/{sample}/results/variants/somatic.complete.tumor.bcf",
-    log:
-        "logs/bcftools/view-TUMOR/{sample}.log",
-    params:
-        "-O b -s TUMOR",
-    wrapper:
-        "0.60.0/bio/bcftools/view"
-
-
-rule reheader_germline:
-    input:
-        vcf="{germline}/variants.output.bcf",
-        samples="resources/sampleheader.txt",
-    output:
-        "{germline}/variants.reheader.bcf",
-    log:
-        "logs/bcftools/reheader/{germline}.log",
-    params:
-        extra="",
-        view_extra="-O b",
-    wrapper:
-        "0.60.0/bio/bcftools/reheader"
-
-
-rule concat_variants:
-    input:
-        calls=lambda w: get_pair_variants(w, index=False),
-        index=lambda w: get_pair_variants(w, index=True),
-    output:
-        "results/strelka/merged/{sample}/all_variants.bcf",
-    log:
-        "bcftools/concat-all/{sample}.log",
-    params:
-        extra="-O b -a",
-    wrapper:
-        "0.64.0/bio/bcftools/concat"
-
-
-rule preprocess_variants:
-    input:
-        variants="{variants}.bcf",
-    output:
-        "{variants}.prepy.bcf",
-    params:
-        extra="-L --somatic",
-        genome="resources/genome.fasta",
-    log:
-        "logs/prepy/{variants}.log",
-    threads: 2
-    wrapper:
-        "0.60.0/bio/hap.py/pre.py"
-
-
-rule norm_vcf:
-    input:
-        "{prefix}.bcf",
-        genome="resources/genome.fasta",
-    output:
-        "{prefix}.norm.bcf",
-    log:
-        "logs/bcftools/norm/{prefix}.log",
-    params:
-        "-f {} -O b -m-".format("resources/genome.fasta"),  # optional parameters for bcftools norm (except -o)
-    wrapper:
-        "0.65.0/bio/bcftools/norm"
diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk
deleted file mode 100644
index 74386763..00000000
--- a/workflow/rules/candidate_calling.smk
+++ /dev/null
@@ -1,28 +0,0 @@
-rule freebayes:
-    input:
-        ref="resources/genome.fasta",
-        # you can have a list of samples here
-        samples=get_paired_bams,
-    output:
-        "results/candidate-calls/{pair}.freebayes.bcf",
-    log:
-        "logs/{pair}.log",
-    params:
-        extra=config["params"].get("freebayes", ""),
-        chunksize=100000,
-    threads: 60
-    wrapper:
-        "0.65.0/bio/freebayes"
-
-
-rule scatter_candidates:
-    input:
-        "results/candidate-calls/{pair}.{caller}.bcf",
-    output:
-        scatter.calling("results/candidate-calls/{{pair}}.{{caller}}.{scatteritem}.bcf"),
-    log:
-        "logs/scatter-candidates/{pair}.{caller}.log",
-    conda:
-        "../envs/rbt.yaml"
-    shell:
-        "rbt vcf-split {input} {output}"
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 3bc6c60d..23a4b97e 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -1,6 +1,7 @@
 import glob
 
 import pandas as pd
+from os import path
 from snakemake.remote import FTP
 from snakemake.utils import validate
 
@@ -10,16 +11,56 @@ ftp = FTP.RemoteProvider()
 
 validate(config, schema="../schemas/config.schema.yaml")
 
-##### sample sheets #####
-
-samples = pd.read_csv(config["samples"], sep="\t").set_index("sample", drop=False)
+##### samples sheet #####
+
+samples = (
+    pd.read_csv(
+        config["samples"],
+        sep="\t",
+        dtype={
+            "sample_name": str,
+            "group": str,
+            "alias": str,
+        },
+        comment="#",
+    )
+    .set_index("sample_name", drop=False)
+    .sort_index()
+)
 validate(samples, schema="../schemas/samples.schema.yaml")
 
-units = pd.read_csv(config["units"], dtype=str, sep="\t").set_index(
-    ["sample", "sequencing_type", "unit"], drop=False
+##### units sheet #####
+
+units = (
+    pd.read_csv(
+        config["units"],
+        sep="\t",
+        dtype={
+            "sample_name": str,
+            "sequencing_type": str,
+            "unit_name": str,
+            "adapters": str,
+        },
+        comment="#",
+    )
+    .set_index(["sample_name", "sequencing_type", "unit_name"], drop=False)
+    .sort_index()
 )
 validate(units, schema="../schemas/units.schema.yaml")
 
+groups = samples["group"].unique()
+
+if "groups" in config:
+    group_annotation = (
+        pd.read_csv(config["groups"], sep="\t", dtype={"group": str})
+        .set_index("group")
+        .sort_index()
+    )
+    group_annotation = group_annotation.loc[groups]
+else:
+    group_annotation = pd.DataFrame({"group": groups}).set_index("group")
+
+
 contigs = [c for c in range(1, 23)]
 contigs.extend(["X", "Y"])
 
@@ -27,10 +68,29 @@ contigs.extend(["X", "Y"])
 
 
 wildcard_constraints:
-    pair="|".join(samples[samples.type == "tumor"]["sample"]),
-    sample="|".join(samples["sample"]),
+    cancer_sample="|".join(samples[samples.alias != "normal"]["sample_name"]),
+    normal_sample="|".join(samples[samples.alias == "normal"]["sample_name"]),
+    sample="|".join(samples["sample_name"]),
+    unit="|".join(units["unit_name"]),
+    alias="|".join(pd.unique(samples["alias"])),
+    tumor_alias="|".join(
+        pd.unique(samples.loc[samples["alias"].str.match("tumor"), "alias"])
+    ),
+    normal_alias="normal",
+    tumor_set=config["params"]["microphaser"]["events"]["tumor"],
+    normal_set=config["params"]["microphaser"]["events"]["normal"],
+    set="|".join(
+        [
+            config["params"]["microphaser"]["events"]["tumor"],
+            config["params"]["microphaser"]["events"]["normal"],
+        ]
+    ),
+    group="|".join(pd.unique(samples["group"])),
     caller="|".join(["freebayes", "delly"]),
-    event="somatic|germline|complete",
+    peptide_type="|".join(["normal", "neo"]),
+    event="|".join(["somatic", "germline", "complete"]),
+    read="|".join(["single", "R1", "R2"]),
+    seqtype="|".join(["DNA", "RNA"]),
 
 
 ### Output generation ###
@@ -44,60 +104,57 @@ def is_activated(xpath):
 
 
 def get_final_output():
-    if config["epitope_prediction"]["activate"]:
-        final_output = expand(
-            "results/neoantigens/{mhc}/{S.sample}.{S.sequencing_type}.xlsx",
-            S=units.loc[samples[samples.type == "tumor"]["sample"]]
-            .drop_duplicates(["sample", "sequencing_type"])
-            .itertuples(),
-            mhc=list(
-                filter(
-                    None,
-                    [
-                        "netMHCpan" if is_activated("affinity/netMHCpan") else None,
-                        "netMHCIIpan" if is_activated("affinity/netMHCIIpan") else None,
-                    ],
+    final_output = []
+    for group in pd.unique(samples["group"]):
+        smps = samples.loc[samples["group"] == group, "sample_name"]
+        tumor_aliases = samples.loc[
+            (samples["group"] == group) & (samples["alias"].str.match("tumor")),
+            "alias",
+        ]
+        if config["neoantigen_prediction"]["activate"]:
+            final_output.extend(
+                expand(
+                    "results/datavzrd/neoprint/{group}.{tumor_alias}.{mhc}",
+                    group=group,
+                    tumor_alias=tumor_aliases,
+                    mhc=["I", "II"],
                 )
-            ),
-        )
-    else:
-        if config["HLAtyping"]["HLA_LA"]["activate"]:
-            final_output = expand(
-                [
-                    "results/optitype/{sample}/hla_alleles_{sample}.tsv",
-                    "results/HLA-LA/hlaI_{sample}.tsv",
-                    "results/HLA-LA/hlaII_{sample}.tsv",
-                ],
-                sample=samples["sample"],
             )
+        #    sequencing_types = pd.unique(
+        #       units.loc[units["sample_name"].isin(smps), "sequencing_type"]
+        #    )
+        #    final_output.extend(
+        #       expand(
+        #           "results/neoantigens/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.{seqtype}.tsv",
+        #           group=group,
+        #           tumor_alias=tumor_aliases,
+        #           mhc=list(
+        #               filter(
+        #                   None,
+        #                   [
+        #                       "net_mhc_pan"
+        #                       if is_activated("params/net_mhc_pan")
+        #                       else None,
+        #                       "net_mhc_two_pan"
+        #                       if is_activated("params/net_mhc_two_pan")
+        #                       else None,
+        #                   ],
+        #               )
+        #           ),
+        #           seqtype=sequencing_types,
+        #       )
+        #    )
         else:
             final_output = expand(
-                "results/optitype/{sample}/hla_alleles_{sample}.tsv",
-                sample=samples["sample"],
+                [
+                    "results/hla_la/{group}.{tumor_alias}.hlaI.tsv",
+                    "results/hla_la/{group}.{tumor_alias}.hlaII.tsv",
+                ],
+                group=group,
+                tumor_alias=tumor_aliases,
             )
-    return final_output
-
-
-def get_fusion_output():
-    if config["fusion"]["arriba"]["activate"]:
-        fusion_output = expand(
-            "results/fusion/arriba/{sample}.fusions.tsv",
-            sample=units[units["sequencing_type"] == "RNA"]["sample"],
-        )
-    else:
-        fusion_output = []
-    return fusion_output
-
 
-def get_tmb_targets():
-    if is_activated("tmb"):
-        return expand(
-            "results/plots/tmb/{group}.{mode}.svg",
-            group=samples[(samples.type == "tumor")]["sample"],
-            mode=config["tmb"].get("mode", "curve"),
-        )
-    else:
-        return []
+    return final_output
 
 
 caller = list(
@@ -106,47 +163,6 @@ caller = list(
 
 ### helper functions ###
 
-## alignment ##
-
-
-def get_cutadapt_input(wildcards):
-    unit = units.loc[wildcards.sample].loc[wildcards.unit].loc[wildcards.seqtype]
-
-    if pd.isna(unit["fq1"]):
-        # SRA sample (always paired-end for now)
-        accession = unit["sra"]
-        return expand("sra/{accession}_{read}.fastq", accession=accession, read=[1, 2])
-
-    if unit["fq1"].endswith("gz"):
-        ending = ".gz"
-    else:
-        ending = ""
-
-    if pd.isna(unit["fq2"]):
-        # single end local sample
-        return "pipe/cutadapt/{S}/{T}/{U}.fq1.fastq{E}".format(
-            S=unit.sample, U=unit.unit, T=unit.sequencing_type, E=ending
-        )
-    else:
-        # paired end local sample
-        return expand(
-            "pipe/cutadapt/{S}/{T}/{U}.{{read}}.fastq{E}".format(
-                S=unit.sample, U=unit.unit, T=unit.sequencing_type, E=ending
-            ),
-            read=["fq1", "fq2"],
-        )
-
-
-def get_cutadapt_pipe_input(wildcards):
-    pattern = (
-        units.loc[wildcards.sample]
-        .loc[wildcards.seqtype]
-        .loc[wildcards.unit, wildcards.fq]
-    )
-    files = list(sorted(glob.glob(pattern)))
-    assert len(files) > 0, "no files found at {}".format(pattern)
-    return files
-
 
 def is_paired_end(sample, seqtype):
     sample_units = units.loc[sample].loc[seqtype]
@@ -163,249 +179,82 @@ def is_paired_end(sample, seqtype):
     return all_paired
 
 
-def get_fastqs(wc):
-    if config["trimming"]["activate"]:
-        return expand(
-            "results/trimmed/{sample}/{seqtype}/{unit}_{read}.fastq.gz",
-            unit=units.loc[wc.seqtype].loc[wc.sample, "unit_name"],
-            sample=wc.sample,
-            read=wc.read,
-            seqtype=wc.seqtype,
-        )
-    unit = units.loc[wc.sample].loc[wc.seqtype]
-    if all(pd.isna(unit["fq1"])):
-        # SRA sample (always paired-end for now)
-        accession = unit["sra"]
-        return expand(
-            "sra/{accession}_{read}.fastq", accession=accession, read=wc.read[-1]
-        )
-    fq = "fq{}".format(wc.read[-1])
-    return units.loc[wc.sample].loc[wc.seqtype, fq].tolist()
-
-
-def get_map_reads_input(wildcards):
-    if is_paired_end(wildcards.sample, "DNA"):
-        return [
-            "results/merged/DNA/{sample}_R1.fastq.gz",
-            "results/merged/DNA/{sample}_R2.fastq.gz",
-        ]
-    return "results/merged/DNA/{sample}_single.fastq.gz"
-
-
-def get_read_group(wildcards):
-    """Denote sample name and platform in read group."""
-    return r"-R '@RG\tID:{sample}\tSM:{sample}\tPL:{platform}'".format(
-        sample=wildcards.sample, platform=samples.loc[wildcards.sample, "platform"]
-    )
-
-
-def get_recalibrate_quality_input(wildcards, bai=False):
-    ext = ".bai" if bai else ""
-    if is_activated("remove_duplicates"):
-        return "results/dedup/{}.sorted.bam{}".format(wildcards.sample, ext)
-    else:
-        return "results/mapped/{}.sorted.bam{}".format(wildcards.sample, ext)
-
-
-## HLA Typing ##
-
-
-def get_optitype_reads_input(wildcards):
-    if is_activated("HLAtyping/optitype_prefiltering"):
-        if is_paired_end(wildcards.sample, "DNA"):
-            return expand(
-                "results/razers3/fastq/{sample}_{fq}.fished.fastq",
-                sample=wildcards.sample,
-                fq=["R1", "R2"],
-            )
-        return "results/razers3/fastq/{sample}_single.fastq"
-    else:
-        return get_map_reads_input(wildcards)
-
-
-def get_oncoprint_batch(wildcards):
-    if wildcards.batch == "all":
-        groups = samples[samples["type"] == "tumor"]["sample"].unique()
-    else:
-        groups = samples.loc[
-            samples[config["oncoprint"]["stratify"]["by-column"]] == wildcards.batch,
-            "group",
-        ].unique()
-    return expand(
-        "results/merged-calls/{group}.{{event}}.fdr-controlled.bcf", group=groups
-    )
-
-
-## variant calls ##
-
-
-def get_annotated_bcf(wildcards):
-    selection = ".annotated"
-    return "results/calls/{pair}.{scatteritem}{selection}.bcf".format(
-        pair=wildcards.pair, selection=selection, scatteritem=wildcards.scatteritem
-    )
-
-
-def get_scattered_calls(ext=".bcf"):
-    def inner(wildcards):
-        return expand(
-            "results/calls/{{pair}}.{caller}.{{scatteritem}}.sorted{ext}",
-            caller=caller,
-            ext=ext,
-        )
-
-    return inner
-
-
-def get_fdr_control_params(wildcards):
-    query = config["calling"]["fdr-control"]["events"][wildcards.event]
-    threshold = query.get(
-        "threshold", config["calling"]["fdr-control"].get("threshold", 0.05)
-    )
-    events = query["varlociraptor"]
-    return {"threshold": threshold, "events": events}
-
-
-def get_pair_variants(wildcards, index):
-    if index:
-        ext = ".csi"
-    else:
-        ext = ""
-    variants = [
-        "results/strelka/somatic/{}/results/variants/somatic.complete.tumor.bcf{}".format(
-            wildcards.sample, ext
-        )
-    ]
-    variants.append(
-        "results/strelka/germline/{}/results/variants/variants.reheader.bcf{}".format(
-            get_normal(wildcards), ext
-        )
-    )
-    return variants
-
+def get_sample_from_group_and_alias(group, alias):
+    sample = samples.loc[
+        (samples["group"] == group) & (samples["alias"] == alias), "sample_name"
+    ].squeeze()
+    return sample
 
-def get_pair_observations(wildcards):
-    return expand(
-        "results/observations/{pair}/{sample}.{caller}.{scatteritem}.bcf",
-        caller=wildcards.caller,
-        pair=wildcards.pair,
-        scatteritem=wildcards.scatteritem,
-        sample=get_paired_samples(wildcards),
-    )
 
-
-def get_merge_input(ext=".bcf"):
+def get_bam_from_group_and_alias(ext=".bam"):
     def inner(wildcards):
-        return expand(
-            "results/calls/{{pair}}.{vartype}.{{event}}.fdr-controlled{ext}",
-            ext=ext,
-            vartype=["SNV", "INS", "DEL", "MNV"],
-            filter=config["calling"]["fdr-control"]["events"][wildcards.event],
+        alias = wildcards.get(
+            "alias",
+            wildcards.get("tumor_alias", wildcards.get("normal_alias", "unknown")),
         )
+        if alias == "unknown":
+            raise CustomException(
+                "get_bam_from_group_and_alias() requires one of the following wildcards: 'alias', 'tumor_alias', 'normal_alias'."
+            )
+        sample = get_sample_from_group_and_alias(wildcards.group, alias)
+        return f"results/recal/{sample}.sorted{ext}"
 
     return inner
 
 
-def get_pair_aliases(wildcards):
-    return [
-        samples.loc[samples.loc[wildcards.pair, "matched_normal"], "type"],
-        samples.loc[wildcards.pair, "type"],
-    ]
-
-
-def get_tabix_params(wildcards):
-    if wildcards.format == "vcf":
-        return "-p vcf"
-    if wildcards.format == "txt":
-        return "-s 1 -b 2 -e 2"
-    raise ValueError("Invalid format for tabix: {}".format(wildcards.format))
-
-
-## RNA ##
-
-
-def get_quant_reads_input(wildcards):
-    if is_paired_end(wildcards.sample, "RNA"):
-        return [
-            "results/merged/RNA/{sample}_R1.fastq.gz",
-            "results/merged/RNA/{sample}_R2.fastq.gz",
-        ]
-    return "results/merged/RNA/{sample}_single.fastq.gz"
-
-
-def kallisto_params(wildcards, input):
-    extra = config["params"]["kallisto"]
-    if len(input.fastq) == 1:
-        extra += " --single"
-        extra += (
-            " --fragment-length {unit.fragment_len_mean} " "--sd {unit.fragment_len_sd}"
-        ).format(unit=units.loc[(wildcards.sample, wildcards.unit)])
-    else:
-        extra += " --fusion"
-    return extra
-
-
-## helper functions ##
-
-
-def get_paired_samples(wildcards):
-    return [
-        samples.loc[(wildcards.pair), "matched_normal"],
-        samples.loc[wildcards.pair, "sample"],
-    ]
-
-
-def get_paired_bams(wildcards):
-    return expand(
-        "results/recal/{sample}.sorted.bam", sample=get_paired_samples(wildcards)
-    )
-
-
-def get_paired_bais(wildcards):
+def get_alleles_MHCI(wildcards):
+    alias = "normal" if wildcards.peptide_type == "normal" else wildcards.tumor_alias
     return expand(
-        "results/recal/{sample}.sorted.bam.bai", sample=get_paired_samples(wildcards)
+        "results/hla_la/{group}.{alias}.hlaI.tsv",
+        group=wildcards.group,
+        alias=alias,
     )
 
 
-def get_normal(wildcards):
-    return samples.loc[(wildcards.sample), "matched_normal"]
-
-
-def get_reads(wildcards):
-    return get_seperate(wildcards.sample, wildcards.group)
-
-
-def get_seperate(sample, group):
-    return units.loc[(sample, "DNA"), "fq{}".format(str(group))]
-
-
-def get_proteome(wildcards):
+def get_alleles_MHCII(wildcards):
+    alias = "normal" if wildcards.peptide_type == "normal" else wildcards.tumor_alias
     return expand(
-        "results/microphaser/fasta/germline/{normal}/{mhc}/reference_proteome.bin",
-        normal=get_normal(wildcards),
-        mhc=wildcards.mhc,
+        # TODO: check that hlaII is correct here, and not hlaI which it previously was
+        "results/hla_la/{group}.{alias}.hlaII.tsv",
+        group=wildcards.group,
+        alias=alias,
     )
 
 
-def get_alleles_MHCI(wildcards):
-    if wildcards.group == "wt":
-        return "results/optitype/{S}/hla_alleles_{S}.tsv".format(
-            S=get_normal(wildcards)
-        )
-    else:
-        return "results/optitype/{S}/hla_alleles_{S}.tsv".format(S=wildcards.sample)
-
-
-def get_alleles_MHCII(wildcards):
-    if wildcards.group == "wt":
-        return "results/HLA-LA/hlaI_{S}.tsv".format(S=get_normal(wildcards))
-    else:
-        return "results/HLA-LA/hlaI_{S}.tsv".format(S=wildcards.sample)
-
-
-def get_normal_bam(wildcards):
-    return expand("results/recal/{normal}.sorted.bam", normal=get_normal(wildcards))
-
-
-def get_normal_bai(wildcards):
-    return expand("results/recal/{normal}.sorted.bam.bai", normal=get_normal(wildcards))
+##### Other stuff ####
+
+neofox_important_cols = {
+    "general": [
+            "gene",
+            "mutation_mutatedXmer",
+            "mutation_wildTypeXmer",
+            "purity_adjusted_DNA_VAF",
+            "imputedGeneExpression",
+        ],
+    "I": [
+            "PRIME_best_rank",
+            "PRIME_best_score",
+            "PRIME_best_peptide",
+            "PRIME_best_allele",
+            "Recognition_Potential_MHCI_9mer",
+            "Improved_Binder_MHCI",
+            "Selfsimilarity_MHCI_conserved_binder",
+            "Best_rank_MHCI_9mer_score",
+            "Best_rank_MHCI_9mer_score_WT",
+            "Best_rank_MHCI_9mer_epitope",
+            "Best_rank_MHCI_9mer_epitope_WT",
+            "Best_rank_MHCI_9mer_allele",
+            "Best_rank_MHCI_9mer_allele_WT",
+        ],
+    "II": [
+            "MixMHC2pred_best_rank",
+            "MixMHC2pred_best_peptide",
+            "MixMHC2pred_best_allele",
+            "Best_rank_MHCII_score",
+            "Best_rank_MHCII_score_WT",
+            "Best_rank_MHCII_score_epitope",
+            "Best_rank_MHCII_score_epitope_WT",
+            "Best_rank_MHCII_score_allele",
+            "Best_rank_MHCII_score_allele_WT",
+        ]
+}
\ No newline at end of file
diff --git a/workflow/rules/datavzrd.smk b/workflow/rules/datavzrd.smk
new file mode 100644
index 00000000..d09c9cd7
--- /dev/null
+++ b/workflow/rules/datavzrd.smk
@@ -0,0 +1,51 @@
+rule prepare_neoprint:
+    input:
+        neopeptides="results/neo_fox/annotated/{group}.{tumor_alias}.annotated_neoantigens.tsv",
+    output:
+        mhc_one="results/tables/neoprint/{group}.{tumor_alias}.annotated_neopeptides.I.sorted.tsv",
+        mhc_two="results/tables/neoprint/{group}.{tumor_alias}.annotated_neopeptides.II.sorted.tsv",
+    log:
+        "logs/prepare_neoprint/{group}.{tumor_alias}.log",
+    params:
+        purity = lambda wc: samples.loc[(samples["group"] == wc.group) & (samples["alias"] == wc.tumor_alias), "purity"].squeeze(),
+        neofox_important_cols=neofox_important_cols,
+    conda:
+        "../envs/pandas.yaml"
+    script:
+        "../scripts/prepare_neoprint.py"
+
+
+rule render_datavzrd_neoprint_config:
+    input:
+        template=workflow.source_path(
+            "../resources/datavzrd/neo_fox-neoantigens-template.datavzrd.yaml"
+        ),
+        neopeptides="results/tables/neoprint/{group}.{tumor_alias}.annotated_neopeptides.{mhc}.sorted.tsv",
+    output:
+        "resources/datavzrd/{group}.{tumor_alias}.datavzrd_neoprint.{mhc}.yaml",
+    params:
+        neofox_important_cols=neofox_important_cols,
+    log:
+        "logs/datavzrd_render_neoprint/{group}.{tumor_alias}.{mhc}.log",
+    template_engine:
+        "yte"
+
+
+rule datavzrd_neoprint:
+    input:
+        neopeptides="results/tables/neoprint/{group}.{tumor_alias}.annotated_neopeptides.{mhc}.sorted.tsv",
+        config="resources/datavzrd/{group}.{tumor_alias}.datavzrd_neoprint.{mhc}.yaml",
+    output:
+        report(
+            directory("results/datavzrd/neoprint/{group}.{tumor_alias}.{mhc}"),
+            htmlindex="index.html",
+            caption="../report/neopeptides.rst",
+            category="Neopeptides",
+            labels=lambda wc: {"group": wc.group, "sample_type": wc.tumor_alias, "MHC": wc.mhc},
+        ),
+    conda:
+        "../envs/datavzrd.yaml"
+    log:
+        "logs/datavzrd_neoprint/{group}.{tumor_alias}.{mhc}.log",
+    shell:
+        "datavzrd {input.config} --output {output} &> {log}"
diff --git a/workflow/rules/filtering.smk b/workflow/rules/filtering.smk
deleted file mode 100644
index f32c203d..00000000
--- a/workflow/rules/filtering.smk
+++ /dev/null
@@ -1,106 +0,0 @@
-rule filter_by_annotation:
-    input:
-        "{prefix}.bcf",
-    output:
-        "{prefix}.{filter}.filtered_ann.bcf",
-    log:
-        "logs/filter-calls/annotation/{prefix}.{filter}.log",
-    params:
-        filter=lambda w: config["calling"]["filter"][w.filter],
-    conda:
-        "../envs/vembrane.yaml"
-    shell:
-        'vembrane filter --output-fmt bcf --output {output} "{params.filter}" {input} &> {log}'
-
-
-rule filter_odds:
-    input:
-        get_annotated_bcf,
-    output:
-        "results/calls/{pair}.{event}.{scatteritem}.filtered_odds.bcf",
-    params:
-        events=lambda wc: config["calling"]["fdr-control"]["events"][wc.event][
-            "varlociraptor"
-        ],
-    log:
-        "logs/filter-calls/posterior_odds/{pair}.{scatteritem}.{event}.log",
-    conda:
-        "../envs/varlociraptor.yaml"
-    shell:
-        "varlociraptor filter-calls posterior-odds --events {params.events} --odds barely < {input} > {output} 2> {log}"
-
-
-rule gather_calls:
-    input:
-        calls=gather.calling(
-            "results/calls/{{pair}}.{{event}}.{scatteritem}.filtered_odds.bcf"
-        ),
-        idx=gather.calling(
-            "results/calls/{{pair}}.{{event}}.{scatteritem}.filtered_odds.bcf.csi"
-        ),
-    output:
-        "results/calls/{pair}.{event}.filtered_odds.bcf",
-    log:
-        "logs/gather-calls/{pair}.{event}.log",
-    params:
-        "-a -Ob",
-    wrapper:
-        "0.67.0/bio/bcftools/concat"
-
-
-rule control_fdr:
-    input:
-        "results/calls/{pair}.{event}.filtered_odds.bcf",
-    output:
-        "results/calls/{pair}.{vartype}.{event}.fdr-controlled.bcf",
-    log:
-        "logs/control-fdr/{pair}.{vartype}.{event}.log",
-    params:
-        query=get_fdr_control_params,
-    conda:
-        "../envs/varlociraptor.yaml"
-    shell:
-        "varlociraptor filter-calls control-fdr {input} --var {wildcards.vartype} "
-        "--events {params.query[events]} --fdr {params.query[threshold]} > {output} 2> {log}"
-
-
-rule merge_calls:
-    input:
-        calls=get_merge_input(".bcf"),
-        idx=get_merge_input(".bcf.csi"),
-    output:
-        "results/merged-calls/{pair}.{event}.fdr-controlled.bcf",
-    log:
-        "logs/merge-calls/{pair}.{event}.log",
-    params:
-        "-a -Ob",
-    wrapper:
-        "0.59.2/bio/bcftools/concat"
-
-
-rule change_samplenames:
-    input:
-        call="results/merged-calls/{pair}.{event}.fdr-controlled.bcf",
-    output:
-        temp("results/merged-calls/{pair}.{event}.renaming.txt"),
-    log:
-        "logs/change-samplenames/{pair}.{event}.log",
-    params:
-        prefix=lambda w, input: os.path.basename(input["call"]).split(".")[0],
-    shell:
-        "echo -e 'normal {params.prefix}_N\ntumor {params.prefix}_T' > {output}"
-
-
-rule reheader_varlociraptor:
-    input:
-        vcf="results/merged-calls/{pair}.{event}.fdr-controlled.bcf",
-        samples="results/merged-calls/{pair}.{event}.renaming.txt",
-    output:
-        "results/merged-calls/{pair}.{event}.reheader.bcf",
-    log:
-        "logs/reheader-calls/{pair}.{event}.log",
-    params:
-        extra="",
-        view_extra="-O b",
-    wrapper:
-        "0.60.0/bio/bcftools/reheader"
diff --git a/workflow/rules/hla_typing.smk b/workflow/rules/hla_typing.smk
new file mode 100644
index 00000000..98034abf
--- /dev/null
+++ b/workflow/rules/hla_typing.smk
@@ -0,0 +1,69 @@
+rule hla_la:
+    input:
+        bam=get_bam_from_group_and_alias(),
+        bai=get_bam_from_group_and_alias(ext=".bai"),
+        index="resources/graphs/PRG_MHC_GRCh38_withIMGT/serializedGRAPH",
+        ext_idx="resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.pac",
+    output:
+        "results/hla_la/output/{group}_{alias}/hla/R1_bestguess_G.txt",
+    threads: 7
+    log:
+        "logs/hla_la/{group}_{alias}.log",
+    params:
+        graph=lambda w, input: os.path.basename(os.path.dirname(input.index)),
+        graphdir=lambda w, input: os.path.dirname(os.path.dirname(input.index)),
+        workdir=lambda w, output: os.path.dirname(
+            os.path.dirname(os.path.dirname(output[0]))
+        ),
+    conda:
+        "../envs/hla_la.yaml"
+    shell:
+        "HLA-LA.pl --bam {input.bam} --sampleID {wildcards.group}_{wildcards.alias} --graph {params.graph} --customGraphDir {params.graphdir} --workingDir {params.workdir} --maxThreads {threads} > {log} 2>&1"
+
+
+rule net_mhc_pan_alleles:
+    output:
+        mhc_one_alleles="resources/hla_alleles/available_alleles.net_mhc_pan.txt",
+    conda:
+        "../envs/tcsh.yaml"
+    log:
+        "logs/net_mhc_pan/available_alleles.net_mhc_pan.log",
+    params:
+        net_mhc=config["params"]["net_mhc_pan"]["location"],
+    shell:
+        "{params.net_mhc}/netMHCpan -listMHC > {output.mhc_one_alleles} 2> {log}"
+
+
+rule net_mhc_two_pan_alleles:
+    output:
+        mhc_two_alleles="resources/hla_alleles/available_alleles.net_mhc_two_pan.txt",
+    conda:
+        "../envs/tcsh.yaml"
+    log:
+        "logs/net_mhc_pan/available_alleles.net_mhc_two_pan.log",
+    params:
+        net_mhc=config["params"]["net_mhc_two_pan"]["location"],
+    shell:
+        "{params.net_mhc}/netMHCIIpan -list > {output.mhc_two_alleles} 2> {log}"
+
+
+rule parse_and_filter_hla_alleles_for_netmhc:
+    input:
+        hla_la_bestguess="results/hla_la/output/{group}_{alias}/hla/R1_bestguess_G.txt",
+        mhc_one_alleles="resources/hla_alleles/available_alleles.net_mhc_pan.txt",
+        mhc_two_alleles="resources/hla_alleles/available_alleles.net_mhc_two_pan.txt",
+    output:
+        hlaI=report(
+            "results/hla_la/{group}.{alias}.hlaI.tsv",
+            caption="../report/hla_alleles.rst",
+            category="HLA alleles",
+        ),
+        hlaII=report(
+            "results/hla_la/{group}.{alias}.hlaII.tsv",
+            caption="../report/hla_alleles.rst",
+            category="HLA alleles",
+        ),
+    log:
+        "logs/parse_hla_la/{group}.{alias}.log",
+    script:
+        "../scripts/parse_and_filter_hla_alleles_for_netmhc.py"
diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk
deleted file mode 100644
index 8cfcb51c..00000000
--- a/workflow/rules/mapping.smk
+++ /dev/null
@@ -1,70 +0,0 @@
-rule map_reads:
-    input:
-        reads=get_map_reads_input,
-        idx=rules.bwa_index.output,
-    output:
-        temp("results/mapped/{sample}.sorted.bam"),
-    log:
-        "logs/bwa_mem/{sample}.log",
-    params:
-        index=lambda w, input: os.path.splitext(input.idx[0])[0],
-        extra=get_read_group,
-        sort="samtools",
-        sort_order="coordinate",
-    threads: 8
-    wrapper:
-        "0.56.0/bio/bwa/mem"
-
-
-rule mark_duplicates:
-    input:
-        "results/mapped/{sample}.sorted.bam",
-    output:
-        bam=temp("results/dedup/{sample}.sorted.bam"),
-        metrics="results/qc/dedup/{sample}.metrics.txt",
-    log:
-        "logs/picard/dedup/{sample}.log",
-    params:
-        config["params"]["picard"]["MarkDuplicates"],
-    wrapper:
-        "0.39.0/bio/picard/markduplicates"
-
-
-rule recalibrate_base_qualities:
-    input:
-        bam=get_recalibrate_quality_input,
-        bai=lambda w: get_recalibrate_quality_input(w, bai=True),
-        ref="resources/genome.fasta",
-        ref_dict="resources/genome.dict",
-        ref_fai="resources/genome.fasta.fai",
-        known="resources/variation.noiupac.vcf.gz",
-        tbi="resources/variation.noiupac.vcf.gz.tbi",
-    output:
-        recal_table=temp("results/recal/{sample}.grp"),
-    params:
-        extra=config["params"]["gatk"]["BaseRecalibrator"],
-        java_opts="",
-    log:
-        "logs/gatk/baserecalibrator/{sample}.log",
-    threads: 8
-    wrapper:
-        "0.62.0/bio/gatk/baserecalibratorspark"
-
-
-rule apply_bqsr:
-    input:
-        bam=get_recalibrate_quality_input,
-        bai=lambda w: get_recalibrate_quality_input(w, bai=True),
-        ref="resources/genome.fasta",
-        ref_dict="resources/genome.dict",
-        ref_fai="resources/genome.fasta.fai",
-        recal_table="results/recal/{sample}.grp",
-    output:
-        bam=protected("results/recal/{sample}.sorted.bam"),
-    log:
-        "logs/gatk/gatk_applybqsr/{sample}.log",
-    params:
-        extra=config["params"]["gatk"]["applyBQSR"],  # optional
-        java_opts="",  # optional
-    wrapper:
-        "0.62.0/bio/gatk/applybqsr"
diff --git a/workflow/rules/mhc_binding.smk b/workflow/rules/mhc_binding.smk
new file mode 100644
index 00000000..535015fc
--- /dev/null
+++ b/workflow/rules/mhc_binding.smk
@@ -0,0 +1,109 @@
+rule net_mhc_pan:
+    input:
+        peptides=expand(
+            "results/microphaser/fasta/filtered/contigs/{{group}}.{{tumor_alias}}.merged_tumor_normal.pep_len_{peptide_length}.{{contig}}.{{peptide_type}}.fa",
+            peptide_length=config["params"]["net_mhc_pan"]["peptide_len"],
+        ),
+        alleles=get_alleles_MHCI,
+    output:
+        "results/net_mhc_pan/contigs/{group}.{tumor_alias}.merged_tumor_normal.{contig}.{peptide_type}.tsv",
+    log:
+        "logs/net_mhc_pan/{group}.{tumor_alias}.merged_tumor_normal.{contig}.{peptide_type}.log",
+    conda:
+        "../envs/tcsh.yaml"
+    params:
+        extra=config["params"]["net_mhc_pan"]["extra"],
+        netMHC=config["params"]["net_mhc_pan"]["location"],
+        length=config["params"]["net_mhc_pan"]["peptide_len"],
+        alleles=lambda wc, input: ",".join(
+            pd.read_csv(input.alleles[0], header=None)[0]
+        ),
+    shell:
+        "( "
+        "if [ -s {input.peptides} ]; "
+        "then "
+        "  {params.netMHC}/netMHCpan {params.extra} -BA -s -l {params.length} -xls -xlsfile {output} -a {params.alleles} -f {input.peptides} > {log}; "
+        "else "
+        "  touch {output}; "
+        "fi "
+        " ) 2> {log}"
+
+
+rule net_mhc_two_pan:
+    input:
+        peptides=expand(
+            "results/microphaser/fasta/filtered/contigs/{{group}}.{{tumor_alias}}.merged_tumor_normal.pep_len_{peptide_length}.{{contig}}.{{peptide_type}}.fa",
+            peptide_length=config["params"]["net_mhc_two_pan"]["peptide_len"],
+        ),
+        alleles=get_alleles_MHCII,
+    output:
+        "results/net_mhc_two_pan/contigs/{group}.{tumor_alias}.merged_tumor_normal.{contig}.{peptide_type}.tsv",
+    log:
+        "logs/net_mhc_two_pan/{group}.{tumor_alias}.merged_tumor_normal.{contig}.{peptide_type}.log",
+    conda:
+        "../envs/tcsh.yaml"
+    params:
+        extra=config["params"]["net_mhc_two_pan"]["extra"],
+        netMHC=config["params"]["net_mhc_two_pan"]["location"],
+        length=config["params"]["net_mhc_two_pan"]["peptide_len"],
+        alleles=lambda wc, input: ",".join(
+            pd.read_csv(input.alleles[0], header=None)[0]
+        ),
+    shell:
+        "( "
+        "if [ -s {input.peptides} ]; "
+        "then "
+        "  {params.netMHC}/netMHCIIpan {params.extra} -BA -s -length {params.length} -xls -xlsfile {output} -a {params.alleles} -f {input.peptides} > {log}; "
+        "else "
+        "  touch {output}; "
+        "fi "
+        " ) 2> {log}"
+
+
+rule tidy_mhc_out:
+    input:
+        expand(
+            "results/{{mhc}}/contigs/{{group}}.{{tumor_alias}}.merged_tumor_normal.{contig}.{{peptide_type}}.tsv",
+            contig=contigs,
+        ),
+    output:
+        joined_mhc_out="results/{mhc}/{group}.{tumor_alias}.merged_tumor_normal.mhc.{peptide_type}.tsv",
+    log:
+        "logs/parse_mhc_out/{mhc}/{group}.{tumor_alias}.merged_tumor_normal.{peptide_type}.log",
+    script:
+        "../scripts/tidy_mhc_output.py"
+
+
+rule merge_neoantigen_info:
+    input:
+        info="results/microphaser/info/filtered/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.tsv",
+        neo="results/{mhc}/{group}.{tumor_alias}.merged_tumor_normal.mhc.neo.tsv",
+        normal="results/{mhc}/{group}.{tumor_alias}.merged_tumor_normal.mhc.normal.tsv",
+    output:
+        report(
+            "results/neoantigens/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.DNA.tsv",
+            caption="../report/neoantigens.dna.rst",
+            category="Neopeptides",
+        ),
+    log:
+        "logs/mhc_csv_table/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.log",
+    script:
+        "../scripts/merge_neoantigen_info.py"
+
+
+rule add_rna_info:
+    input:
+        counts="results/kallisto/{group}.{tumor_alias}",
+        table="results/neoantigens/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.DNA.tsv",
+    output:
+        report(
+            "results/neoantigens/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.RNA.tsv",
+            caption="../report/neoantigens.rna.rst",
+            category="Neopeptides",
+        ),
+    params:
+        abundance=lambda wc, input: "{}/abundance.tsv".format(input.counts),
+    log:
+        "logs/add_rna/{group}.{tumor_alias}.merged_tumor_normal.{mhc}.log",
+    script:
+        "../scripts/add_rna_info.py"
diff --git a/workflow/rules/microphaser.smk b/workflow/rules/microphaser.smk
index bd9e6510..a52d72ef 100644
--- a/workflow/rules/microphaser.smk
+++ b/workflow/rules/microphaser.smk
@@ -1,117 +1,175 @@
-rule microphaser_somatic:
+rule norm_bcf:
     input:
-        vcf="results/strelka/merged/{sample}/all_variants.norm.annotated.bcf",
-        bam="results/recal/{sample}.sorted.bam",
-        bai="results/recal/{sample}.sorted.bam.bai",
+        "results/final-calls/{group}.{set}.bcf",
+        ref="resources/genome.fasta",
+    output:
+        "results/final-calls/{group}.{set}.norm.bcf",
+    log:
+        "logs/bcftools/norm/{group}.{set}.log",
+    params:
+        extra="-m-"
+    wrapper:
+        "v1.12.0/bio/bcftools/norm"
+
+
+rule add_somatic_flag:
+    input:
+        bcf="results/final-calls/{group}.{set}.norm.bcf",
+        header_line="resources/somatic_flag_header_line.txt",
+        flag_bed="resources/genome.somatic_flag.bed.gz",
+        flag_bed_idx="resources/genome.somatic_flag.bed.gz.tbi",
+    output:
+        "results/final-calls/{group}.{set}.somatic_flag.norm.bcf",
+    log:
+        "logs/bcftools_annotate/{group}.{set}.somatic_flag.norm.log",
+    conda:
+        "../envs/bcftools.yaml"
+    shell:
+        "( bcftools annotate "
+        "  --annotations {input.flag_bed} "
+        "  --mark-sites +SOMATIC "
+        "  --columns CHROM,FROM,TO "
+        "  --header-lines {input.header_line} "
+        "  -O b -o {output} "
+        "  {input.bcf} "
+        ") 2> {log}"
+
+
+rule merge_tumor_normal:
+    input:
+        calls=expand(
+            "results/final-calls/{{group}}.{sets}.norm.bcf",
+            sets=[
+                config["params"]["microphaser"]["events"]["normal"],
+                config["params"]["microphaser"]["events"]["tumor"] + ".somatic_flag",
+            ],
+        ),
+        index=expand(
+            "results/final-calls/{{group}}.{sets}.norm.bcf.csi",
+            sets=[
+                config["params"]["microphaser"]["events"]["normal"],
+                config["params"]["microphaser"]["events"]["tumor"] + ".somatic_flag",
+            ],
+        ),
+    output:
+        "results/final-calls/{group}.merged_tumor_normal.norm.bcf",
+    log:
+        "logs/bcftools/concat-tumor-normal/{group}.merged_tumor_normal.log",
+    params:
+        extra="-a",
+    wrapper:
+        "v1.14.1/bio/bcftools/concat"
+
+
+rule microphaser_tumor:
+    input:
+        bcf="results/final-calls/{group}.merged_tumor_normal.norm.annotated.bcf",
+        bam=get_bam_from_group_and_alias(),
+        bai=get_bam_from_group_and_alias(ext=".bai"),
         track="resources/annotation/{contig}.gtf",
         ref="resources/genome.fasta",
     output:
-        mt_fasta="results/microphaser/fasta/{sample}/{sample}.{contig}.mt.fa",
-        wt_fasta="results/microphaser/fasta/{sample}/{sample}.{contig}.wt.fa",
-        tsv="results/microphaser/info/{sample}/{sample}.{contig}.tsv",
+        mt_fasta="results/microphaser/fasta/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.neo.fa",
+        wt_fasta="results/microphaser/fasta/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.normal.fa",
+        tsv="results/microphaser/info/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.tsv",
     log:
-        "logs/microphaser/somatic/{sample}-{contig}.log",
+        "logs/microphaser_tumor/{group}/{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.log",
     conda:
         "../envs/microphaser.yaml"
     params:
-        window_length=config["params"]["microphaser"]["window_len"],
+        window_length=lambda wc: int(wc.peptide_length) * 3,
     shell:
-        "microphaser somatic {input.bam} --variants {input.vcf} --ref {input.ref} --tsv {output.tsv} -n {output.wt_fasta} -w {params.window_length} "
+        "microphaser somatic {input.bam} --variants {input.bcf} --ref {input.ref} --tsv {output.tsv} -n {output.wt_fasta} -w {params.window_length} "
         "< {input.track} > {output.mt_fasta} 2> {log}"
 
 
-rule microphaser_germline:
+rule microphaser_normal:
     input:
-        vcf="results/strelka/germline/{normal}/results/variants/variants.reheader.norm.bcf",
-        bam="results/recal/{normal}.sorted.bam",
-        bai="results/recal/{normal}.sorted.bam.bai",
+        bcf="results/final-calls/{group}.{normal_set}.variants.reheader.norm.bcf",
+        bam=get_bam_from_group_and_alias(),
+        bai=get_bam_from_group_and_alias(ext=".bai"),
         track="resources/annotation/{contig}.gtf",
         ref="resources/genome.fasta",
     output:
         wt_fasta=(
-            "results/microphaser/fasta/germline/{normal}/{normal}.germline.{contig}.fa"
+            "results/microphaser/fasta/contigs/{group}.{normal_alias}.{normal_set}.pep_len_{peptide_length}.{contig}.fa"
         ),
         wt_tsv=(
-            "results/microphaser/info/germline/{normal}/{normal}.germline.{contig}.tsv"
+            "results/microphaser/info/contigs/{group}.{normal_alias}.{normal_set}.pep_len_{peptide_length}.{contig}.tsv"
         ),
     log:
-        "logs/microphaser/germline/{normal}-{contig}.log",
+        "logs/microphaser_normal/contigs/{group}/{normal_alias}.{normal_set}.pep_len_{peptide_length}.{contig}.log",
     conda:
         "../envs/microphaser.yaml"
     params:
-        window_length=config["params"]["microphaser"]["window_len"],
+        window_length=lambda wc: int(wc.peptide_length) * 3,
     shell:
-        "microphaser normal {input.bam} --variants {input.vcf} --ref {input.ref} -t {output.wt_tsv} -w {params.window_length} "
+        "microphaser normal {input.bam} --variants {input.bcf} --ref {input.ref} -t {output.wt_tsv} -w {params.window_length} "
         "< {input.track} > {output.wt_fasta} 2> {log}"
 
 
-rule concat_proteome:
+rule concat_normal_proteome:
     input:
         expand(
-            "results/microphaser/fasta/germline/{{normal}}/{{normal}}.germline.{contig}.fa",
+            "results/microphaser/fasta/contigs/{{group}}.normal.{{normal_set}}.pep_len_{{peptide_length}}.{contig}.fa",
             contig=contigs,
         ),
     output:
-        "results/microphaser/fasta/germline/{normal}/reference_proteome.fa",
+        "results/microphaser/fasta/{group}.{normal_set}.normal_proteome.pep_len_{peptide_length}.fa",
     log:
-        "logs/microphaser/concat-ref-proteome/{normal}.log",
+        "logs/microphaser_concat_normal_proteome/{group}.{normal_set}.pep_len_{peptide_length}.log",
     shell:
         "cat {input} > {output} 2> {log}"
 
 
-rule build_germline_proteome:
+rule build_normal_proteome_db:
     input:
-        "results/microphaser/fasta/germline/{normal}/reference_proteome.fa",
+        "results/microphaser/fasta/{group}.{normal_set}.normal_proteome.pep_len_{peptide_length}.fa",
     output:
-        bin="results/microphaser/fasta/germline/{normal}/{mhc}/reference_proteome.bin",
-        fasta="results/microphaser/fasta/germline/{normal}/{mhc}/reference_proteome.peptides.fasta",
+        bin="results/microphaser/bin/{group}.{normal_set}.normal_proteome.pep_len_{peptide_length}.bin",
+        fasta="results/microphaser/fasta/{group}.{normal_set}.normal_proteome.pep_len_{peptide_length}.peptides.fasta",
     log:
-        "logs/microphaser/build-ref-proteome-db/{normal}-{mhc}.log",
+        "logs/microphaser_build_normal_proteome_db/{group}.{normal_set}.pep_len_{peptide_length}.log",
     conda:
         "../envs/microphaser.yaml"
-    params:
-        length=lambda wildcards: config["params"]["microphaser"]["peptide_len"][
-            wildcards.mhc
-        ],
     shell:
-        "microphaser build_reference -r {input} -o {output.bin} -l {params.length} --peptides {output.fasta} > {log} 2>&1"
+        "( microphaser build_reference -r {input} -o {output.bin} -l {wildcards.peptide_length} > {output.fasta} ) 2> {log}"
 
 
 rule microphaser_filter:
     input:
-        tsv="results/microphaser/info/{sample}/{sample}.{contig}.tsv",
-        proteome=get_proteome,
+        tsv="results/microphaser/info/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.tsv",
+        proteome=expand(
+            "results/microphaser/bin/{{group}}.{normal_set}.normal_proteome.pep_len_{{peptide_length}}.bin",
+            normal_set=config["params"]["microphaser"]["events"]["normal"],
+        ),
     output:
         mt_fasta=(
-            "results/microphaser/fasta/{sample}/filtered/{mhc}/{sample}.{contig}.mt.fa"
+            "results/microphaser/fasta/filtered/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.neo.fa"
         ),
         wt_fasta=(
-            "results/microphaser/fasta/{sample}/filtered/{mhc}/{sample}.{contig}.wt.fa"
+            "results/microphaser/fasta/filtered/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.normal.fa"
         ),
-        tsv="results/microphaser/info/{sample}/filtered/{mhc}/{sample}.{contig}.tsv",
-        removed="results/microphaser/info/{sample}/removed/{mhc}/{sample}.{contig}.removed.tsv",
+        tsv="results/microphaser/info/filtered/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.tsv",
+        removed="results/microphaser/info/removed/contigs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.removed.tsv",
     log:
-        "logs/microphaser/filter/{sample}-{mhc}-{contig}.log",
+        "logs/microphaser_filter/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.{contig}.log",
     conda:
         "../envs/microphaser.yaml"
-    params:
-        length=lambda wildcards: config["params"]["microphaser"]["peptide_len"][
-            wildcards.mhc
-        ],
     shell:
-        "microphaser filter -r {input.proteome} -t {input.tsv} -o {output.tsv} -n {output.wt_fasta} -s {output.removed} -l {params.length} > {output.mt_fasta} 2>{log}"
+        "microphaser filter -r {input.proteome} -t {input.tsv} -o {output.tsv} -n {output.wt_fasta} -s {output.removed} -l {wildcards.peptide_length} > {output.mt_fasta} 2>{log}"
 
 
 rule concat_tsvs:
     input:
         expand(
-            "results/microphaser/info/{{sample}}/filtered/{{mhc}}/{{sample}}.{contig}.tsv",
+            "results/microphaser/info/filtered/contigs/{{group}}.{{tumor_alias}}.merged_tumor_normal.pep_len_{{peptide_length}}.{contig}.tsv",
             contig=contigs,
         ),
     output:
-        "results/microphaser/info/{sample}/filtered/{mhc}/{sample}.tsv",
+        "results/microphaser/info/filtered/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.tsv",
     log:
-        "logs/concat-tsv/{sample}-{mhc}.log",
+        "logs/microphaser_concat_tsvs/{group}.{tumor_alias}.merged_tumor_normal.pep_len_{peptide_length}.log",
     conda:
         "../envs/xsv.yaml"
     shell:
diff --git a/workflow/rules/oncoprint.smk b/workflow/rules/oncoprint.smk
deleted file mode 100644
index aa670044..00000000
--- a/workflow/rules/oncoprint.smk
+++ /dev/null
@@ -1,28 +0,0 @@
-rule build_oncoprint_table:
-    input:
-        bcf=get_oncoprint_batch,
-    output:
-        "plots/oncoprint/{batch}.{event}.tsv",
-    log:
-        "logs/oncoprint/{batch}.{event}.table.log",
-    conda:
-        "../envs/oncoprinttable.yaml"
-    script:
-        "../scripts/build_oncoprint_matrix.py"
-
-
-rule plot_oncoprint:
-    input:
-        "plots/oncoprint/{batch}.{event}.tsv",
-    output:
-        report(
-            "plots/oncoprint/{batch}.{event}.pdf",
-            category="Oncoprint",
-            caption="../report/oncoprint.rst",
-        ),
-    log:
-        "logs/oncoprint/{batch}.{event}.plot.log",
-    conda:
-        "../envs/oncoprint.yaml"
-    script:
-        "../scripts/oncoprint.R"
diff --git a/workflow/rules/phylogeny.smk b/workflow/rules/phylogeny.smk
index 872247ac..d7279042 100644
--- a/workflow/rules/phylogeny.smk
+++ b/workflow/rules/phylogeny.smk
@@ -1,7 +1,7 @@
 def get_somatic_calls(wildcards):
     return expand(
-        "results/strelka/somatic/{sample}/results/variants/somatic.complete.tumor.bcf",
-        sample=samples[samples.type == "tumor"]["sample"],
+        "results/final-calls/somatic/{sample}/results/variants/somatic.complete.tumor.bcf",
+        sample=samples[samples.alias == "tumor"]["sample_name"],
     )
 
 
@@ -9,20 +9,20 @@ rule merge_snvs:
     input:
         calls=get_somatic_calls,
     output:
-        "results/strelka/merged_calls.vcf",
+        "results/final-calls/merged_calls.vcf",
     log:
         "results/logs/bcftools/merge.log",
     params:
-        "--use-header strelka/sampleheader.txt --force-samples",
+        "--use-header final-calls/sampleheader.txt --force-samples",
     wrapper:
-        "0.36.0/bio/bcftools/merge"
+        "v1.12.0/bio/bcftools/merge"
 
 
 rule query:
     input:
-        "results/strelka/merged_calls.vcf",
+        "results/final-calls/merged_calls.vcf",
     output:
-        "results/strelka/call_matrix.tsv",
+        "results/final-calls/call_matrix.tsv",
     log:
         "results/logs/bcftools/query.log",
     params:
@@ -35,7 +35,7 @@ rule query:
 
 rule nj_tree:
     input:
-        matrix="results/strelka/call_matrix.tsv",
+        matrix="results/final-calls/call_matrix.tsv",
     output:
         pdf="results/plots/phylogeny_njtree.pdf",
     log:
diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk
index 86b209c3..80e62380 100644
--- a/workflow/rules/ref.smk
+++ b/workflow/rules/ref.smk
@@ -10,7 +10,7 @@ rule get_genome:
         release=config["ref"]["release"],
     cache: True
     wrapper:
-        "0.45.1/bio/reference/ensembl-sequence"
+        "v1.12.0/bio/reference/ensembl-sequence"
 
 
 rule get_cdna:
@@ -25,21 +25,7 @@ rule get_cdna:
         release=config["ref"]["release"],
     cache: True
     wrapper:
-        "0.45.1/bio/reference/ensembl-sequence"
-
-
-rule kallisto_index:
-    input:
-        "resources/genome.cdna.fasta",
-    output:
-        "resources/kallisto/transcripts.idx",
-    params:
-        extra="",
-    log:
-        "logs/kallisto/index.log",
-    cache: True
-    wrapper:
-        "0.60.1/bio/kallisto/index"
+        "v1.12.0/bio/reference/ensembl-sequence"
 
 
 rule get_annotation:
@@ -55,29 +41,24 @@ rule get_annotation:
     log:
         "logs/get-annotation.log",
     wrapper:
-        "0.45.1/bio/reference/ensembl-annotation"
+        "v1.12.0/bio/reference/ensembl-annotation"
 
 
-rule STAR_index:
+# TODO: remove this rule, once microphaser is fixed to make gene_name optional
+rule remove_records_with_gene_name_missing:
     input:
-        fasta="resources/genome.fasta",
-        gtf="resources/genome.gtf",
+        "resources/genome.gtf",
     output:
-        directory("resources/STAR_index"),
-    params:
-        sjdb_overhang="100",
-        extra="",
+        "resources/genome.records_with_gene_name.gtf",
     log:
-        "logs/star/index.log",
-    threads: 32
-    cache: True
-    wrapper:
-        "0.42.0/bio/star/index"
+        "logs/remove_records_with_gene_name_missing.log",
+    shell:
+        '( grep "gene_name" {input} > {output} ) 2> {log}'
 
 
 rule split_annotation:
     input:
-        "resources/genome.gtf",
+        "resources/genome.records_with_gene_name.gtf",
     output:
         "resources/annotation/{contig}.gtf",
     log:
@@ -95,142 +76,126 @@ rule genome_faidx:
         "logs/genome-faidx.log",
     cache: True
     wrapper:
-        "0.45.1/bio/samtools/faidx"
+        "v1.12.0/bio/samtools/faidx"
 
 
-rule genome_dict:
-    input:
-        "resources/genome.fasta",
+rule create_somatic_flag_header_line:
     output:
-        "resources/genome.dict",
+        "resources/somatic_flag_header_line.txt",
     log:
-        "logs/picard/create-dict.log",
-    cache: True
-    wrapper:
-        "0.45.1/bio/picard/createsequencedictionary"
+        "logs/create_somatic_flag_header_line.log",
+    shell:
+        """
+        ( echo '##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Somatic tumor variant">' > {output} ) 2> {log}
+        """
 
 
-rule get_callregions:
+rule create_genome_somatic_flag_bed:
     input:
         "resources/genome.fasta.fai",
     output:
-        "resources/genome.callregions.bed.gz",
+        "resources/genome.somatic_flag.bed",
     log:
-        "logs/get-callregions.log",
-    params:
-        n_contigs=config["ref"]["n_chromosomes"],
+        "logs/create_genome_somatic_flag_bed.log",
     conda:
-        "../envs/index.yaml"
+        "../envs/gawk.yaml"
+    cache: True
     shell:
-        "paste <(cut -f1 {input}) <(yes 0 | head -n {params.n_contigs}) <(cut -f2 {input})"
-        " | head -n {params.n_contigs} | bgzip -c > {output} && tabix -p bed {output}"
+        """
+        ( awk 'BEGIN {{ OFS="\\t" }} {{ print $1,0,$2 }}' {input} > {output} ) 2> {log}
+        """
 
 
-rule get_known_variants:
+rule bgzip_genome_somatic_flag_bed:
     input:
-        # use fai to annotate contig lengths for GATK BQSR
-        fai="resources/genome.fasta.fai",
+        "resources/genome.somatic_flag.bed",
     output:
-        vcf="resources/variation.vcf.gz",
+        "resources/genome.somatic_flag.bed.gz",
     log:
-        "logs/get-known-variants.log",
-    params:
-        species=config["ref"]["species"],
-        release=config["ref"]["release"],
-        build=config["ref"]["build"],
-        type="all",
-    cache: True
+        "logs/bgzip/genome.somatic_flag.log",
     wrapper:
-        "0.59.2/bio/reference/ensembl-variation"
+        "v1.12.0/bio/bgzip"
 
 
-rule remove_iupac_codes:
+rule tabix_genome_somatic_flag_bed:
     input:
-        "resources/variation.vcf.gz",
+        "resources/genome.somatic_flag.bed.gz",
     output:
-        "resources/variation.noiupac.vcf.gz",
-    log:
-        "logs/fix-iupac-alleles.log",
+        "resources/genome.somatic_flag.bed.gz.tbi",
     conda:
-        "../envs/rbt.yaml"
-    cache: True
+        "../envs/htslib.yaml"
+    log:
+        "logs/tabix/genome.somatic_flag.log",
     shell:
-        "rbt vcf-fix-iupac-alleles < {input} | bcftools view -Oz > {output}"
+        "( tabix -p bed {input} ) 2> {log}"
 
 
-rule bwa_index:
+rule genome_dict:
     input:
         "resources/genome.fasta",
     output:
-        multiext("resources/genome.fasta", ".amb", ".ann", ".bwt", ".pac", ".sa"),
+        "resources/genome.dict",
     log:
-        "logs/bwa_index.log",
+        "logs/picard/create-dict.log",
     cache: True
     wrapper:
-        "0.45.1/bio/bwa/index"
+        "v1.12.0/bio/picard/createsequencedictionary"
 
 
-rule download_HLALA_graph:
+rule download_hla_la_graph:
     output:
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/PRG"),
-        directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome"),
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/knownReferences"),
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/mapping"),
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/mapping_PRGonly"),
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/referenceGenomeSimulations"),
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/sampledReferenceGenomes"),
         directory("resources/graphs/PRG_MHC_GRCh38_withIMGT/translation"),
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.fa",
         "resources/graphs/PRG_MHC_GRCh38_withIMGT/sequences.txt",
+    params:
+        graphs_dir=lambda w, output: output[0].replace(
+            "graphs/PRG_MHC_GRCh38_withIMGT/PRG", "graphs"
+        ),
     log:
-        "logs/download-HLA-LA-graph.log",
-    cache: True
+        "logs/download_hla_la_graph.log",
     shell:
-        "cd resources/graphs && wget  http://www.well.ox.ac.uk/downloads/PRG_MHC_GRCh38_withIMGT.tar.gz "
-        "&& tar -xvzf PRG_MHC_GRCh38_withIMGT.tar.gz"
+        "( cd {params.graphs_dir} && wget  http://www.well.ox.ac.uk/downloads/PRG_MHC_GRCh38_withIMGT.tar.gz "
+        "&& tar -xvzf PRG_MHC_GRCh38_withIMGT.tar.gz && rm  PRG_MHC_GRCh38_withIMGT.tar.gz ) 2> {log}"
 
 
-rule index_HLALA:
+rule index_hla_la:
     input:
         "resources/graphs/PRG_MHC_GRCh38_withIMGT/sequences.txt",
     output:
         "resources/graphs/PRG_MHC_GRCh38_withIMGT/serializedGRAPH",
-        "resources/graphs/PRG_MHC_GRCh38_withIMGT/serializedGRAPH_preGapPathindex",
-    cache: True
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/serializedGRAPH_preGapPathIndex",
     conda:
         "../envs/hla_la.yaml"
     params:
         path=lambda wc, input: os.path.dirname(os.path.dirname(input[0])),
         graph=lambda wc, input: os.path.basename(os.path.dirname(input[0])),
     log:
-        "logs/index-HLA-LA-graph.log",
+        "logs/index_hla_la_graph.log",
     shell:
         "HLA-LA.pl --prepareGraph 1 --customGraphDir {params.path} --graph {params.graph} > {log} 2>&1"
 
 
-rule get_vep_cache:
-    output:
-        directory("resources/vep/cache"),
-    params:
-        species=config["ref"]["species"],
-        build=config["ref"]["build"],
-        release=config["ref"]["release"],
-    log:
-        "logs/vep/cache.log",
-    cache: True
-    wrapper:
-        "0.59.2/bio/vep/cache"
-
-
-rule get_vep_plugins:
+rule index_hla_la_extended_ref:
+    input:
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.fa",
     output:
-        directory("resources/vep/plugins"),
-    params:
-        release=config["ref"]["release"],
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.amb",
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.ann",
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.bwt",
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.pac",
+        "resources/graphs/PRG_MHC_GRCh38_withIMGT/extendedReferenceGenome/extendedReferenceGenome.sa",
+    conda:
+        "../envs/hla_la.yaml"
     log:
-        "logs/vep/plugins.log",
-    cache: True
-    wrapper:
-        "0.59.2/bio/vep/plugins"
+        "logs/index_hla_la_extended_ref.log",
+    shell:
+        "bwa index {input} > {log} 2>&1"
 
 
 rule make_sampleheader:
diff --git a/workflow/rules/tmb.smk b/workflow/rules/tmb.smk
deleted file mode 100644
index 2ef064a1..00000000
--- a/workflow/rules/tmb.smk
+++ /dev/null
@@ -1,20 +0,0 @@
-if config["tmb"]["activate"]:
-
-    rule estimate_tmb:
-        input:
-            "results/merged-calls/{pair}.somatic.fdr-controlled.bcf",
-        output:
-            "results/plots/tmb/{pair}.{plotmode}.vl.json",
-        conda:
-            "../envs/varlociraptor.yaml"
-        log:
-            "logs/tmb/{pair}-{plotmode}.log",
-        params:
-            **config["tmb"],
-        shell:
-            "varlociraptor estimate tmb "
-            " --plot-mode {wildcards.plotmode} "
-            "--coding-genome-size {params.coding_genome_size} "
-            "--somatic-tumor-events {params.somatic_events} "
-            "--tumor-sample {params.tumor_sample} "
-            "< {input} > {output} 2> {log}"
diff --git a/workflow/rules/trim.smk b/workflow/rules/trim.smk
deleted file mode 100644
index f26ed6f3..00000000
--- a/workflow/rules/trim.smk
+++ /dev/null
@@ -1,71 +0,0 @@
-rule get_sra:
-    output:
-        "sra/{accession}_1.fastq",
-        "sra/{accession}_2.fastq",
-    log:
-        "logs/get-sra/{accession}.log",
-    wrapper:
-        "0.56.0/bio/sra-tools/fasterq-dump"
-
-
-rule cutadapt_pipe:
-    input:
-        get_cutadapt_pipe_input,
-    output:
-        pipe("pipe/cutadapt/{sample}/{seqtype}/{unit}.{fq}.{ext}"),
-    log:
-        "logs/pipe-fastqs/cutadapt/{sample}-{seqtype}-{unit}.{fq}.{ext}.log",
-    wildcard_constraints:
-        ext=r"fastq|fastq\.gz",
-    threads: 0  # this does not need CPU
-    shell:
-        "cat {input} > {output} 2> {log}"
-
-
-rule cutadapt_pe:
-    input:
-        get_cutadapt_input,
-    output:
-        fastq1="results/trimmed/{sample}/{seqtype}/{unit}_R1.fastq.gz",
-        fastq2="results/trimmed/{sample}/{seqtype}/{unit}_R2.fastq.gz",
-        qc="results/trimmed/{sample}/{seqtype}/{unit}.paired.qc.txt",
-    log:
-        "logs/cutadapt/{sample}-{seqtype}-{unit}.log",
-    params:
-        others=config["params"]["cutadapt"],
-        adapters=lambda w: str(
-            units.loc[w.sample].loc[w.seqtype].loc[w.unit, "adapters"]
-        ),
-    threads: 8
-    wrapper:
-        "0.59.2/bio/cutadapt/pe"
-
-
-rule cutadapt_se:
-    input:
-        get_cutadapt_input,
-    output:
-        fastq="results/trimmed/{sample}/{seqtype}/{unit}.single.fastq.gz",
-        qc="results/trimmed/{sample}/{seqtype}/{unit}.single.qc.txt",
-    log:
-        "logs/cutadapt/{sample}-{seqtype}-{unit}.log",
-    params:
-        others=config["params"]["cutadapt"],
-        adapters_r1=lambda w: str(units.loc[w.sample].loc[w.unit, "adapters"]),
-    threads: 8
-    wrapper:
-        "0.59.2/bio/cutadapt/se"
-
-
-rule merge_fastqs:
-    input:
-        get_fastqs,
-    output:
-        "results/merged/{seqtype}/{sample}_{read}.fastq.gz",
-    log:
-        "logs/merge-fastqs/{seqtype}_{sample}_{read}.log",
-    wildcard_constraints:
-        read="single|R1|R2",
-        seqtype="DNA|RNA",
-    shell:
-        "cat {input} > {output} 2> {log}"
diff --git a/workflow/rules/utils.smk b/workflow/rules/utils.smk
index e7586ef0..c8b11474 100644
--- a/workflow/rules/utils.smk
+++ b/workflow/rules/utils.smk
@@ -13,38 +13,13 @@ rule bam_index:
     input:
         "{prefix}.bam",
     output:
-        "{prefix}.bam.bai",
+        "{prefix}.bai",
     log:
         "logs/bam-index/{prefix}.log",
     wrapper:
         "0.59.2/bio/samtools/index"
 
 
-rule tabix_known_variants:
-    input:
-        "resources/{prefix}.{format}.gz",
-    output:
-        "resources/{prefix}.{format}.gz.tbi",
-    log:
-        "logs/tabix/{prefix}.{format}.log",
-    params:
-        get_tabix_params,
-    cache: True
-    wrapper:
-        "0.59.2/bio/tabix"
-
-
-rule gzip_fastq:
-    input:
-        "{prefix}.fastq",
-    output:
-        "{prefix}.fastq.gz",
-    log:
-        "logs/gz-fastq/{prefix}.log",
-    shell:
-        "gzip < {input} > {output}"
-
-
 rule tsv_to_excel:
     input:
         tsv="results/{x}.tsv",
diff --git a/workflow/rules/varlociraptor.smk b/workflow/rules/varlociraptor.smk
deleted file mode 100644
index 4cb6013a..00000000
--- a/workflow/rules/varlociraptor.smk
+++ /dev/null
@@ -1,89 +0,0 @@
-rule render_scenario:
-    input:
-        config["calling"]["scenario"],
-    output:
-        report(
-            "results/scenarios/{pair}.yaml",
-            caption="../report/scenario.rst",
-            category="Variant calling scenarios",
-        ),
-    params:
-        samples=samples,
-    log:
-        "logs/scenarious/{pair}.log",
-    conda:
-        "../envs/render_scenario.yaml"
-    script:
-        "../scripts/render-scenario.py"
-
-
-rule varlociraptor_preprocess:
-    input:
-        ref="resources/genome.fasta",
-        ref_idx="resources/genome.fasta.fai",
-        candidates="results/candidate-calls/{pair}.{caller}.{scatteritem}.bcf",
-        bam="results/recal/{sample}.sorted.bam",
-        bai="results/recal/{sample}.sorted.bai",
-    output:
-        "results/observations/{pair}/{sample}.{caller}.{scatteritem}.bcf",
-    params:
-        omit_isize="",
-    log:
-        "logs/varlociraptor/preprocess/{pair}/{sample}.{caller}.{scatteritem}.log",
-    conda:
-        "../envs/varlociraptor.yaml"
-    shell:
-        "varlociraptor preprocess variants {params.omit_isize} --candidates {input.candidates} "
-        "{input.ref} --bam {input.bam} --output {output} 2> {log}"
-
-
-rule varlociraptor_call:
-    input:
-        obs=get_pair_observations,
-        scenario="results/scenarios/{pair}.yaml",
-    output:
-        temp("results/calls/{pair}.{caller}.{scatteritem}.bcf"),
-    log:
-        "logs/varlociraptor/call/{pair}.{caller}.{scatteritem}.log",
-    params:
-        obs=lambda w, input: [
-            "{}={}".format(s, f) for s, f in zip(get_pair_aliases(w), input.obs)
-        ],
-    conda:
-        "../envs/varlociraptor.yaml"
-    benchmark:
-        "benchmarks/varlociraptor/call/{pair}.{caller}.{scatteritem}.tsv"
-    shell:
-        "varlociraptor "
-        "call variants generic --obs {params.obs} "
-        "--scenario {input.scenario} > {output} 2> {log}"
-
-
-rule sort_calls:
-    input:
-        "results/calls/{pair}.{caller}.{scatteritem}.bcf",
-    output:
-        temp("results/calls/{pair}.{caller}.{scatteritem}.sorted.bcf"),
-    log:
-        "logs/bcf-sort/{pair}.{caller}.{scatteritem}.log",
-    conda:
-        "../envs/bcftools.yaml"
-    resources:
-        mem_mb=8000,
-    shell:
-        "bcftools sort --max-mem {resources.mem_mb}M --temp-dir `mktemp -d` "
-        "-Ob {input} > {output} 2> {log}"
-
-
-rule bcftools_concat:
-    input:
-        calls=get_scattered_calls(),
-        indexes=get_scattered_calls(ext=".bcf.csi"),
-    output:
-        "results/calls/{pair}.{scatteritem}.bcf",
-    log:
-        "logs/concat-calls/{pair}.{scatteritem}.log",
-    params:
-        "-a -Ob",  # TODO Check this
-    wrapper:
-        "0.59.2/bio/bcftools/concat"
diff --git a/workflow/rules/vega.smk b/workflow/rules/vega.smk
deleted file mode 100644
index 4199e8fc..00000000
--- a/workflow/rules/vega.smk
+++ /dev/null
@@ -1,15 +0,0 @@
-rule vg2svg:
-    input:
-        "{prefix}.vl.json",
-    output:
-        report(
-            "{prefix}.svg",
-            caption="../report/tmb.rst",
-            category="Tumor Mutational Burden",
-        ),
-    log:
-        "logs/vega/{prefix}.log",
-    conda:
-        "../envs/vega.yaml"
-    shell:
-        "vl2svg {input} {output} > {log} 2>&1"
diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml
index 6fafb49f..05e55b83 100644
--- a/workflow/schemas/config.schema.yaml
+++ b/workflow/schemas/config.schema.yaml
@@ -4,21 +4,6 @@ description: snakemake configuration file
 
 type: object
 
-definitions:
-  filterentry:
-    type: object
-    additionalProperties:
-      type: string
-  evententry:
-    type: object
-    properties:
-      varlociraptor:
-        type: array
-        items: 
-          type: string
-      filter:
-        type: string
-
 properties:
   samples:
     type: string
@@ -42,198 +27,61 @@ properties:
       - build
       - n_chromosomes
 
-
-  tmb:
-    type: object
-    properties:
-      activate:
-        type: boolean
-      coding_genome_size:
-        # TODO allow integer here!
-        type: string
-      tumor_sample:
-        type: string
-      somatic_events:
-        type: array
-        items: 
-          type: string
-    required:
-      - activate
-      - coding_genome_size
-      - tumor_sample
-      - somatic_events
-
-
-  calling:
-    type: object
-    properties:
-      freebayes:
-        type: object
-        properties:
-          activate:
-            type: boolean
-      scenario:
-        type: string
-      fdr-control:
-        type: object
-        properties:
-          threshold:
-            type: number
-            minimum: 0.0
-            maximum: 1.0
-          events:
-            $ref: "#/definitions/evententry"
-            description: "a map of <eventname: event> pairs"
-        required:
-          - threshold
-          - events
-    required:
-      - freebayes
-      - scenario
-      - fdr-control
-  
-  remove_duplicates:
-    type: object
-    properties:
-      activate:
-        type: boolean
-
-  trimming:
-    type: object
-    properties:
-      activate:
-        type: boolean
-
-  epitope_prediction:
+  neoantigen_prediction:
     type: object
     properties:
       activate:
         type: boolean
   
-  affinity:
+  params:
     type: object
     properties:
-      netMHCpan:
+      microphaser:
         type: object
         properties:
-          activate:
-            type: boolean
-          params:
-            type: string
-      netMHCIIpan:
+          events:
+            type: object
+            properties:
+              normal:
+                type: string
+              tumor:
+                type: string
+            required:
+              - normal
+              - tumor
+      net_mhc_pan:
         type: object
         properties:
           activate:
             type: boolean
-          params:
+          peptide_len:
+            type: integer
+          extra:
             type: string
-
-  HLAtyping:
-    type: object
-    properties:
-      HLA_LA:
-        type: object
-        properties:
-          activate:
-            type: boolean
-
-  annotations:
-    type: object
-    properties:
-      vep:
-        properties:
-          params:
+          location:
             type: string
-          plugins:
-            type: array
-            items:
-              type: string
         required:
-          - params
-          - plugins
-    required:
-      - vep
-
-  fusion:
-    type: object
-    properties:
-      arriba:
+          - activate
+      net_mhc_two_pan:
         type: object
         properties:
           activate:
             type: boolean
-          blacklist:
-            type: string
-          params:
-            type: string
-
-  params:
-    type: object
-    properties:
-      cutadapt:
-        type: string
-      bwa:
-        type: string
-      gatk:
-        type: object
-        properties:
-          BaseRecalibrator:
-            type: string
-          applyBQSR:
-            type: string
-        required:
-          - BaseRecalibrator
-          - applyBQSR
-      picard:
-        type: object
-        properties:
-          MarkDuplicates:
+          peptide_len:
+            type: integer
+          extra:
             type: string
         required:
-          - MarkDuplicates
-      strelka:
-        type: object
-        properties:
-          config:
-            type: string
-          run:
-            type: string
-      razers3:
-        type: string
-      optitype:
-        type: string
-      microphaser:
-        type: object
-        properties:
-          window_len:
-            type: integer
-          peptide_len:
-            type: object
-            properties:
-              netMHCpan:
-                type: integer
-              netMHCIIpan:
-                type: integer
-      kallisto:
-        type: string
-      star:
-        type: string
+          - activate
     required:
-      - bwa
-      - gatk
-      - picard
-      - strelka
-      - razers3
       - microphaser
-      - optitype
+      - net_mhc_pan
+      - net_mhc_two_pan
+
 
 required:
   - samples
   - units
   - ref
-  - tmb
-  - calling
   - params
-  - annotations
-  - epitope_prediction
-  - affinity
+  - neoantigen_prediction
diff --git a/workflow/schemas/samples.schema.yaml b/workflow/schemas/samples.schema.yaml
index e626f60f..e795cdee 100644
--- a/workflow/schemas/samples.schema.yaml
+++ b/workflow/schemas/samples.schema.yaml
@@ -2,12 +2,18 @@ $schema: "http://json-schema.org/draft-04/schema#"
 
 description: an entry in the sample sheet
 properties:
-  sample:
+  sample_name:
     type: string
-    description: sample name/identifier
-  type:
+    description: sample name/identifier (alphanumeric string, that may additionally contain '_' and '-')
+    pattern: "^[a-zA-Z_0-9-]+$"
+  alias:
     type: string
-    description: healthy or tumor sample
+    description: sample name within the VCF/BCF files generated for a group (e.g. tumor, normal, etc.) (alphanumeric string, that may additionally contain '_' and '-')
+    pattern: "^[a-zA-Z_0-9-]+$"
+  group:
+    type: string
+    description: group of samples called jointly (alphanumeric string, that may additionally contain '_' and '-')
+    pattern: "^[a-zA-Z_0-9-]+$"
   matched_normal:
     type: string
     description: the corresponding healthy control to this tumor sample
@@ -18,11 +24,25 @@ properties:
     description: Purity to use for tumor/normal groups.
   platform:
     type: string
-    enum: ["CAPILLARY", "LS454", "ILLUMINA", "SOLID", "HELICOS", "IONTORRENT", "ONT", "PACBIO"]
-
+    enum:
+      - "CAPILLARY"
+      - "LS454"
+      - "ILLUMINA"
+      - "SOLID"
+      - "HELICOS"
+      - "IONTORRENT"
+      - "ONT"
+      - "PACBIO"
+    description: used sequencing platform
+  purity:
+    type: number
+    minimum: 0.0
+    maximum: 1.0
+    description: Purity to use for tumor/normal groups.
   
 
 required:
-  - sample
-  - type
+  - sample_name
+  - alias
+  - group
   - platform
diff --git a/workflow/schemas/units.schema.yaml b/workflow/schemas/units.schema.yaml
index 6fb9a6cf..4fec2074 100644
--- a/workflow/schemas/units.schema.yaml
+++ b/workflow/schemas/units.schema.yaml
@@ -2,18 +2,22 @@ $schema: "http://json-schema.org/draft-04/schema#"
 description: row of the units.tsv, representing a sequencing unit, i.e. single-end or paired-end data
 type: object
 properties:
-  sample:
+  sample_name:
     type: string
-    description: sample name/id the unit has been sequenced from
+    pattern: "^[a-zA-Z_0-9-]+$"
+    description: sample name/id the unit has been sequenced from (alphanumeric string, that may additionally contain '_' and '-')
   sequencing_type:
     type: string
     enum: ["DNA", "RNA"]
-  unit:
+    description: type of sequenced material ('DNA' or 'RNA')
+  unit_name:
     type: string
-    description: unit or lane name
+    pattern: "^[a-zA-Z_0-9-]+$"
+    description: unit id (alphanumeric string, that may additionally contain '_' and '-')
   fq1:
     type: string
-    description: path to FASTQ file
+    pattern: "^[^ \t]+$"
+    description: path to FASTQ file (may not contain whitespace)
   fq2:
     type: string
     description: path to second FASTQ file (leave empty in case of single-end)
@@ -22,9 +26,9 @@ properties:
     description: SRA id for automatic download of unit
   adapters:
     type: string
-    description: adapter trimming settings to use (for cutadapt)
+    description: cutadapt adapter trimming settings to use (see https://cutadapt.readthedocs.io)
 
 required:
-  - sample
-  - unit
+  - sample_name
+  - unit_name
   - sequencing_type
diff --git a/workflow/scripts/add_rna_info.py b/workflow/scripts/add_rna_info.py
index bdcdcac2..0eb2298e 100644
--- a/workflow/scripts/add_rna_info.py
+++ b/workflow/scripts/add_rna_info.py
@@ -1,13 +1,19 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
 import pandas as pd
 
 ## load data table
-data = pd.read_csv(snakemake.input["table"], sep='\t')
+data = pd.read_csv(snakemake.input["table"], sep="\t")
 
 ## Merge transcript count
-transcript_count = pd.read_csv(snakemake.params["abundance"], sep='\t')
+transcript_count = pd.read_csv(snakemake.params["abundance"], sep="\t")
 transcript_count = transcript_count[["target_id", "tpm"]]
 transcript_count.columns = ["Transcript_ID", "TPM"]
-transcript_count["Transcript_ID"] = transcript_count["Transcript_ID"].str.split('.', expand=True)[0]
+transcript_count["Transcript_ID"] = transcript_count["Transcript_ID"].str.split(
+    ".", expand=True
+)[0]
 data = data.merge(transcript_count, on="Transcript_ID", how="left")
 
-data.to_csv(snakemake.output[0], sep='\t', index=False)
+data.to_csv(snakemake.output[0], sep="\t", index=False)
diff --git a/workflow/scripts/adjust_microphaser_output_for_neo_fox.py b/workflow/scripts/adjust_microphaser_output_for_neo_fox.py
new file mode 100644
index 00000000..4806d56e
--- /dev/null
+++ b/workflow/scripts/adjust_microphaser_output_for_neo_fox.py
@@ -0,0 +1,20 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
+import pandas as pd
+
+columns_mapping = {
+    "gene_name": "gene",
+    "normal_peptide": "mutation.wildTypeXmer",
+    "tumor_peptide": "mutation.mutatedXmer",
+    "freq": "dnaVariantAlleleFrequency",
+}
+
+candidates = (
+    pd.read_csv(snakemake.input.microphaser, sep="\t", quoting=3)
+    .rename(columns=columns_mapping)
+    .assign(patientIdentifier=snakemake.wildcards.group)
+)
+
+candidates.to_csv(snakemake.output.neo_fox, sep="\t", quoting=3, index=False)
diff --git a/workflow/scripts/build_oncoprint_matrix.py b/workflow/scripts/build_oncoprint_matrix.py
deleted file mode 100644
index 2b65721b..00000000
--- a/workflow/scripts/build_oncoprint_matrix.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import pysam
-import pandas as pd
-import os
-import sys
-
-# redirect output to log file
-log = open(snakemake.log[0], "w")
-sys.stdout = log
-sys.stderr = log
-
-input_files = snakemake.input
-
-df = pd.DataFrame(columns=["Sample"])
-for sample_file in input_files:
-    variant_file = pysam.VariantFile(sample_file)
-    sample_name = os.path.basename(sample_file).split(".")[0]
-    gene_variant_dict = {"Sample": [sample_name]}
-    for rec in variant_file.fetch():
-        for sample in rec.samples:
-            allele_frequencies = rec.samples[sample]["AF"] #Can be multiple entries
-            for allele_frequency in allele_frequencies:
-                variant = rec.info["SVLEN"]
-                if variant[0]:
-                    variant_type = "INDEL"
-                else:
-                    variant_type = "SNV" 
-                transcripts = rec.info["ANN"]
-                for transcript in transcripts:
-                    gene = transcript.split("|")[3]
-                    impact = transcript.split("|")[2]
-                    if gene and impact != "MODIFIER":
-                        if gene not in gene_variant_dict:
-                            gene_variant_dict[gene] = set()
-                        gene_variant_dict[gene].add(variant_type)
-                break
-    for key, value in gene_variant_dict.items():
-        gene_variant_dict[key] = ','.join(value)
-    sample_df = pd.DataFrame(gene_variant_dict, index=[0])
-    df = pd.concat([df, sample_df], join="outer", ignore_index=False, sort=False)
-df.set_index("Sample", inplace=True)
-with open(snakemake.output[0], 'w') as output_f:
-    print(df.to_csv(sep="\t", index=True), file=output_f)
diff --git a/workflow/scripts/count_neoantigen_occurrences.py b/workflow/scripts/count_neoantigen_occurrences.py
index 1b6050e4..eb9c917a 100644
--- a/workflow/scripts/count_neoantigen_occurrences.py
+++ b/workflow/scripts/count_neoantigen_occurrences.py
@@ -1,3 +1,7 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
 import pandas as pd
 import glob
 
@@ -5,16 +9,21 @@
 
 dfs, dfs_id = dict(), []
 
-for f in files: 
-	df = pd.read_csv(f, sep = '\t') 
-	dfs[f] = df
-	df = df[["ID_tumor"]]
-	dfs_id.append(df)
+for f in files:
+    df = pd.read_csv(f, sep="\t")
+    dfs[f] = df
+    df = df[["ID_tumor"]]
+    dfs_id.append(df)
 
 ids = pd.concat(dfs_id)
-ids=ids.groupby(ids.columns.tolist(), as_index = False).size().reset_index().rename(columns = {0:"occurrences"})
-ids.to_csv("out/tables/candidate_occurrences.tsv", sep = '\t', index = False)
+ids = (
+    ids.groupby(ids.columns.tolist(), as_index=False)
+    .size()
+    .reset_index()
+    .rename(columns={0: "occurrences"})
+)
+ids.to_csv("out/tables/candidate_occurrences.tsv", sep="\t", index=False)
 
 for k, v in dfs.items():
-	df = v.merge(ids, on = "ID_tumor")
-	df.to_csv(k.replace("filtered.tsv", "filtered.counts.csv"), sep = ',', index = False)
+    df = v.merge(ids, on="ID_tumor")
+    df.to_csv(k.replace("filtered.tsv", "filtered.counts.csv"), sep=",", index=False)
diff --git a/workflow/scripts/create_neo_fox_group_sheet.py b/workflow/scripts/create_neo_fox_group_sheet.py
new file mode 100644
index 00000000..4242d684
--- /dev/null
+++ b/workflow/scripts/create_neo_fox_group_sheet.py
@@ -0,0 +1,64 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
+import pandas as pd
+
+HLA_SUFFIXES_REGEX = r"[NLSCAQ]?"
+
+# allowed loci according to NeoFox input data documentation:
+# https://neofox.readthedocs.io/en/latest/03_01_input_data.html#file-with-patient-data
+# * mhcIAlleles: comma separated MHC I alleles of the patient for HLA-A, HLA-B and
+#   HLA-C. If homozygous, the allele should be added twice.
+# * mhcIIAlleles: comma separated MHC II alleles of the patient for HLA-DRB1, HLA-DQA1,
+#   HLA-DQB1, HLA-DPA1 and HLA-DPB1. If homozygous, the allele should be added twice.
+ALLOWED_LOCI = {
+    "A",
+    "B",
+    "C",
+    "DRB1",
+    "DQA1",
+    "DQB1",
+    "DPA1",
+    "DPB1",
+}
+
+mhc_alleles = pd.read_csv(
+        snakemake.input.hla_la_bestguess,
+        sep="\t",
+    )
+# the Allele column can contain multiple ";"-separated entries for the
+# same locus -- NeoFox does a hard assertion that only two alleles per
+# gene exist, so we chose to only ever keep the first of such multiple
+# possibilities
+mhc_alleles.loc[:, "Allele"] = mhc_alleles["Allele"].str.split(pat=";")
+mhc_alleles = mhc_alleles.explode(["Allele"]).drop_duplicates(subset=["Locus", "Chromosome"])
+mhc_alleles = mhc_alleles[mhc_alleles["Locus"].isin(ALLOWED_LOCI)]
+mhc_alleles.loc[:, "Allele"] = (
+    mhc_alleles["Allele"]
+    .str
+    .replace(
+        r"([A-Z]+\d?)\*(\d+):(\d+)(:\d+)*G?(" + HLA_SUFFIXES_REGEX + r")",
+        r"HLA-\1*\2:\3\5",
+        regex=True,
+    )
+)
+# the multiple ";"-separated entries from above can be identical after reducing
+# to the allele group (1st number) and specific HLA protein (2nd number)
+mhc_alleles = mhc_alleles.drop_duplicates(subset=["Chromosome", "Allele"])
+
+mhc_one_alleles = ",".join( mhc_alleles.loc[ mhc_alleles["Locus"].str.len() == 1, "Allele"] )
+mhc_two_alleles = ",".join( mhc_alleles.loc[ mhc_alleles["Locus"].str.len() > 1, "Allele"] )
+
+patient_info = pd.DataFrame(
+    data={
+        "identifier": [ snakemake.params.group.name ],
+        "tumorType": [ snakemake.params.group["tumorType"] ],
+        "mhcIAlleles": [ mhc_one_alleles ],
+        "mhcIIAlleles": [ mhc_two_alleles ],
+    }
+# This is required for cases where no tumorType is available, as NeoFox does not
+# seem to be able to handle empty entries, here -- so we remove the whole column
+).dropna(axis="columns")
+
+patient_info.to_csv(snakemake.output.group_sheet, sep="\t", quoting=3, index=False)
diff --git a/workflow/scripts/group_mhc_output.py b/workflow/scripts/group_mhc_output.py
deleted file mode 100644
index 5ce0cf15..00000000
--- a/workflow/scripts/group_mhc_output.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import sys
-import os
-import pandas as pd
-import numpy as np
-
-first = True
-out = open(snakemake.output[0], "w")
-for e in snakemake.input:
-    if os.path.getsize(e) > 0:
-        mhcout = open(e, 'r')
-        alleles = next(mhcout).split('\t')
-        header = next(mhcout).rstrip().split('\t')
-        if first:
-            allele = ''
-            for i in range(0, len(header)):
-                #print(header[i].rstrip())
-                if i < len(alleles):
-                    #print(alleles[i])
-                    if alleles[i] != '':
-                        allele = alleles[i].rstrip() + '_'
-                header[i] = allele + header[i].rstrip()
-            header[len(header) -1] = "NB"
-            first = False
-            #print(header)
-            out.write('\t'.join(header) + '\n')
-        for line in mhcout:
-            out.write(line)
-out.close()
diff --git a/workflow/scripts/merge_data.py b/workflow/scripts/merge_data.py
deleted file mode 100644
index b90af1cb..00000000
--- a/workflow/scripts/merge_data.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import sys
-import os
-import pandas as pd
-import numpy as np
-
-
-def select_columns(mhc):
-    rank_cols = [c for c in mhc.columns if "Rank" in c]
-    affinity_cols = [c for c in mhc.columns if "nM" in c]
-    mhc_cols = ["Pos"] + ["ID"] + ["Peptide"] + rank_cols + affinity_cols + ["NB"]
-    mhc = mhc[mhc_cols]
-    mhc["Rank_min"] = mhc[rank_cols].min(axis=1)
-    mhc["Aff_min"] = mhc[affinity_cols].min(axis=1)
-    mhc["Top_rank_HLA"] = mhc[rank_cols].idxmin(axis=1)
-    mhc["Top_affinity_HLA"] = mhc[affinity_cols].idxmin(axis=1)
-    mhc["Top_rank_HLA"] = mhc["Top_rank_HLA"].str.replace("_Rank","")
-    mhc["Top_affinity_HLA"] = mhc["Top_affinity_HLA"].str.replace("_nM","")
-    return mhc
-
-def merge(info, tumor, normal, outfile):
-    tumor = select_columns(tumor)
-    normal = select_columns(normal)
-    id_length = len(tumor.ID[0])
-    print(info.columns)
-    info["ID"] = info["id"].astype(str).str[:id_length]
-    merged_mhc = tumor.merge(normal,how='left', on=['Pos','ID'])
-    merged_mhc = merged_mhc.rename(columns={col: col.replace("_y","_normal") for col in merged_mhc.columns}).rename(columns={col: col.replace("_x","_tumor") for col in merged_mhc.columns})
-    info = info.rename(columns={"gene_id":"Gene_ID","gene_name":"Gene_Symbol","strand":"Strand","positions":"Variant_Position","chrom":"Chromosome","somatic_aa_change":"Somatic_AminoAcid_Change"})
-    merged_dataframe = merged_mhc.merge(info, how='left', on = 'ID')
-
-    merged_dataframe["Peptide_tumor"] = merged_dataframe[["Peptide_tumor","Peptide_normal"]].apply(lambda x: diffEpitope(*x), axis=1)
-    ## Are all possible variants in the peptide ("Cis") or not ("Trans")
-    merged_dataframe["Variant_Orientation"] = "Cis"
-    trans = merged_dataframe.nvariant_sites > merged_dataframe.nvar
-    merged_dataframe.loc[trans, "Variant_Orientation"] = "Trans"
-
-    ## check misssense/silent mutation status
-    nonsilent = merged_dataframe.Peptide_tumor != merged_dataframe.Peptide_normal
-    merged_dataframe = merged_dataframe[nonsilent]
-    merged_dataframe = merged_dataframe.drop_duplicates(subset=["transcript","offset","Peptide_tumor","Somatic_AminoAcid_Change"])
-
-    data = merged_dataframe[["ID","transcript","Gene_ID","Gene_Symbol","Chromosome","offset","freq","depth",
-    "Somatic_AminoAcid_Change", "nvar", "nsomatic", "somatic_positions", "Peptide_tumor","NB_tumor","Rank_min_tumor","Aff_min_tumor",
-    "Top_rank_HLA_tumor","Top_affinity_HLA_tumor","Peptide_normal","NB_normal",
-    "Rank_min_normal","Aff_min_normal","Top_rank_HLA_normal","Top_affinity_HLA_normal"]]
-
-    data.columns = ["ID","Transcript_ID","Gene_ID","Gene_Symbol","Chromosome","Position","Frequency","Read_Depth",
-    "Somatic_AminoAcid_Change", "nvar", "nsomatic", "somatic_positions", "Peptide_tumor","BindingHLAs_tumor","Rank_min_tumor","Affinity_min_tumor",
-    "Top_rank_HLA_tumor","Top_affinity_HLA_tumor","Peptide_normal","BindingHLAs_normal",
-    "Rank_min_normal","Aff_min_normal","Top_rank_HLA_normal","Top_affinity_HLA_normal"]
-
-    # data = data[data.BindingHLAs_tumor > 0]
-    # data = data[(data.NB_normal.isna()) | (data.NB_normal == 0)]
-    #data = data[(data.BindingHLAs_normal == 0)]
-
-    ### Delete Stop-Codon including peptides
-    data = data[data.Peptide_tumor.str.count("x") == 0]
-    data = data[data.Peptide_tumor.str.count("X") == 0]
-    data.sort_values(["Chromosome", "somatic_positions"], inplace=True)
-    ### Remove Duplicate kmers
-    data = data.drop_duplicates(["Transcript_ID", "Peptide_tumor", "Somatic_AminoAcid_Change", "Peptide_normal"])
-
-    data.to_csv(outfile, index=False, sep = '\t')
-
-
-## highlight the difference between mutated neopeptide and wildtype
-def diffEpitope(e1,e2):
-    if str(e2) == 'nan' or str(e2) == '':
-        return(e1)
-    e1 = str(e1)
-    e2 = str(e2)
-    diff_pos = [i for i in range(len(e1)) if e1[i] != e2[i]]
-    e_new = e1
-    e2_new = e2
-    for p in diff_pos:
-        e_new = e_new[:p] + e_new[p].lower() + e_new[p+1:]
-        e2_new = e2_new[:p] + e2_new[p].lower() + e2_new[p+1:]
-    return(e_new)
-
-
-def main():
-    info = pd.read_csv(snakemake.input[0], sep = '\t', dtype=str)
-    tumor = pd.read_csv(snakemake.input[1], sep = '\t')
-    normal = pd.read_csv(snakemake.input[2], sep = '\t')
-    outfile = snakemake.output[0]
-    merge(info, tumor, normal, outfile)
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/workflow/scripts/merge_mhcflurry.py b/workflow/scripts/merge_mhcflurry.py
deleted file mode 100644
index fcc6aba4..00000000
--- a/workflow/scripts/merge_mhcflurry.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import sys
-import os
-import pandas as pd
-import numpy as np
-
-
-## highlight the difference between mutated neopeptide and wildtype
-def diffEpitope(e1,e2):
-    if str(e2) == 'nan':
-        return(e1)
-    e1 = str(e1)
-    e2 = str(e2)
-    diff_pos = [i for i in range(len(e1)) if e1[i] != e2[i]]
-    e_new = e1
-    e2_new = e2
-    for p in diff_pos:
-        e_new = e_new[:p] + e_new[p].lower() + e_new[p+1:]
-        e2_new = e2_new[:p] + e2_new[p].lower() + e2_new[p+1:]
-    return(e_new)
-
-info = pd.read_csv(snakemake.input[0])
-tumor = pd.read_csv(snakemake.input[1])
-normal = pd.read_csv(snakemake.input[2])
-outfile = snakemake.output[0]
-
-
-tumor = tumor[["source_sequence_name","peptide","allele","affinity","percentile_rank"]]
-tumor = tumor.pivot_table(["affinity","percentile_rank"],["source_sequence_name","peptide"],"allele").reset_index()
-tumor.columns = tumor.columns.map("-".join)
-tumor = tumor.rename(columns={col: col.replace("-","") for col in tumor.columns if col.endswith("-")})
-
-normal = normal[["source_sequence_name","peptide","allele","affinity","percentile_rank"]]
-normal = normal.pivot_table(["affinity","percentile_rank"],["source_sequence_name","peptide"],"allele").reset_index()
-normal.columns = normal.columns.map("-".join)
-normal = normal.rename(columns={col: col.replace("-","") for col in normal.columns if col.endswith("-")})
-
-merged = tumor.merge(normal, how="left", on=["source_sequence_name"])
-
-merged = merged.rename(columns={col: col.replace("_y","_normal") for col in merged.columns}).rename(columns={col: col.replace("_x","_tumor") for col in merged.columns})
-## add info
-info = info.rename(columns={"id":"ID","gene_id":"Gene_ID","gene_name":"Gene_Symbol","strand":"Strand","positions":"Variant_Position","chrom":"Chromosome","somatic_aa_change":"Somatic_AminoAcid_Change"})
-merged_dataframe = merged.merge(info, how="left", left_on="source_sequence_name", right_on="ID")
-
-merged_dataframe["peptide_tumor"]=merged_dataframe[["peptide_tumor","peptide_normal"]].apply(lambda x: diffEpitope(*x), axis=1)
-## Are all possible variants in the peptide ("Cis") or not ("Trans")
-merged_dataframe["Variant_Orientation"] = "Cis"
-trans = merged_dataframe.nvariant_sites > merged_dataframe.nvar
-merged_dataframe.loc[trans, "Variant_Orientation"] = "Trans"
-
-## check misssense/silent mutation status
-nonsilent = merged_dataframe.peptide_tumor != merged_dataframe.peptide_normal
-merged_dataframe = merged_dataframe[nonsilent]
-data = merged_dataframe.drop_duplicates(subset=["Gene_ID","offset","peptide_tumor","Somatic_AminoAcid_Change"])
-
-### Delete Stop-Codon including peptides
-data = data[data.peptide_tumor.str.count("x") == 0]
-data = data[data.peptide_tumor.str.count("X") == 0]
-data.to_csv(outfile, index=False, sep = '\t')
-
-
-
-
-
diff --git a/workflow/scripts/merge_neoantigen_info.py b/workflow/scripts/merge_neoantigen_info.py
new file mode 100644
index 00000000..669a7716
--- /dev/null
+++ b/workflow/scripts/merge_neoantigen_info.py
@@ -0,0 +1,295 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
+import pandas as pd
+from typing import List, Tuple
+
+
+def get_best_rank_per_peptide(df: pd.DataFrame, rank_type: str) -> pd.DataFrame:
+    df = df.set_index(["id", "pos_in_id_seq", "pep_seq", "allele"])
+    rank_col = f"{rank_type}_rank"
+    score_col = f"{rank_type}_score"
+    prefix = f"top_{rank_col}_"
+    columns_to_keep = ["bind_core", rank_col, score_col]
+    return (
+        df.loc[df.groupby(["pep_seq", "id"])[rank_col].idxmin(), columns_to_keep]
+        .reset_index(level="allele")
+        .add_prefix(prefix)
+        .reset_index()
+        .drop_duplicates()
+    )
+
+
+def get_filtered_per_alias(sample: pd.DataFrame, alias: str) -> pd.DataFrame:
+    merge_cols = ["id", "pos_in_id_seq", "pep_seq"]
+    common_info = sample.loc[
+        :, merge_cols + ["ave_el_score", "num_binders"]
+    ].drop_duplicates()
+    sample_el = get_best_rank_per_peptide(sample, "el")
+    sample_ba = get_best_rank_per_peptide(sample, "ba")
+    sample_filtered = sample_el.merge(sample_ba, on=merge_cols)
+    return sample_filtered.merge(common_info, how="left", on=merge_cols).assign(
+        alias=alias
+    )
+
+
+def highlight_peptides_diff(tumor_p: str, normal_p: str) -> Tuple[str, str]:
+    """
+    Highlight the difference between mutated neopeptide and normal peptide
+    """
+    if normal_p == "nan" or normal_p == "":
+        return (tumor_p, normal_p)
+    assert len(tumor_p) == len(
+        normal_p
+    ), f"Tumor peptide '{tumor_p}' and normal peptide '{normal_p}' have different lengths."
+    diff_pos = [i for i in range(len(tumor_p)) if tumor_p[i] != normal_p[i]]
+    tp_changed = tumor_p
+    np_changed = normal_p
+    for p in diff_pos:
+        tp_changed = tp_changed[:p] + tp_changed[p].lower() + tp_changed[p + 1 :]
+        np_changed = np_changed[:p] + np_changed[p].lower() + np_changed[p + 1 :]
+    return (tp_changed, np_changed)
+
+
+def diff_tumor_normal_peptides(
+    group: pd.DataFrame, column: str, tumor_alias: str
+) -> pd.DataFrame:
+    group = group
+    normal_pep = group.loc[group["alias"] == "normal", column].fillna("")
+    if normal_pep.empty:
+        normal_pep = ""
+    else:
+        normal_pep = normal_pep.squeeze()
+    tumor_pep = group.loc[group["alias"] == tumor_alias, column].fillna("").squeeze()
+    # Remove groups where the tumor peptide contains a stop codon.
+    # TODO: Maybe this should be a hard fail complaining to fix this upstream?
+    if "X" in tumor_pep:
+        print(f"Warning: ", file=sys.stderr)
+        return group.loc[[], :]
+    # Silent mutations should not be included in microphaser output.
+    if normal_pep == tumor_pep:
+        raise ValueError(
+            f"For peptide '{group['id'][0]}' the normal and the tumor peptide have an identical sequence ({normal_pep}).\n"
+            "Please fix this upstream or comment out this check to ignore this problem.\n"
+        )
+    t_diff, n_diff = highlight_peptides_diff(tumor_pep, normal_pep)
+    group.loc[group["alias"] == tumor_alias, column] = t_diff
+    group.loc[group["alias"] == "normal", column] = n_diff
+    return group
+
+
+def tidy_info(info: pd.DataFrame, tumor_alias: str) -> pd.DataFrame:
+    """
+    Get the -o info output of the microphaser filter command into tidy data format.
+    """
+    info = info.rename(columns={"credible_interval": "freq_credible_interval"})
+    # Aggregate multiple identical entries that differ only in 'id' and 'transcript'
+    # into one, taking the first 'id' and collecting all 'transcript's into a '|'-separated
+    # list.
+    cols = [c for c in info.columns if c not in ["id", "transcript", "depth", "freq", "freq_credible_interval"]]
+    aggregation_functions = {
+        "id": lambda i: list(i),
+        "transcript": lambda t: "|".join(list(t)),
+        "depth": lambda d: "|".join(list(d)),
+        "freq": lambda f: "|".join(list(f)),
+        "freq_credible_interval": lambda c: "|".join(list(c)),
+    }
+    info = (
+        info.groupby(cols, dropna=False)
+        .agg(aggregation_functions)
+        .reset_index()
+        .explode("id")
+        .set_index(
+            [
+                "id",
+                "transcript",
+                "gene_id",
+                "gene_name",
+                "chrom",
+                "offset",
+                "frame",
+                "freq",
+                "freq_credible_interval",
+                "depth",
+                "strand",
+            ]
+        )
+    )
+    int_cols = ["nvar", "nsomatic", "nvariant_sites", "nsomvariant_sites"]
+    info[int_cols] = info[int_cols].astype("int32")
+    # TODO: Ensure that microphaser output contains only one entry per id.
+    # If there is more than one entry per index, ensure that they are identical
+    if len(info.groupby(info.index).filter(lambda g: (g.nunique() > 1).any())) > 0:
+        raise ValueError(
+            f"Found multiple differing entries for an 'id' in file '{snakemake.input.info}'. Please ensure that entries are unique per 'id'.\n"
+        )
+    # Always take the first entry for each index.
+    info = info.groupby(info.index).head(1)
+    # TODO: Ensure that microphaser output is tidy data, with one row each for tumor and normal.
+    # TODO: factor out tidying of column pairs into a function.
+    num_var_in_pep_tidy = (
+        info.assign(ngermline=lambda x: x.nvar - x.nsomatic)
+        .melt(
+            var_name="alias",
+            value_name="num_var_in_pep",
+            value_vars=["ngermline", "nsomatic"],
+            ignore_index=False,
+        )
+        .replace({"ngermline": "normal", "nsomatic": tumor_alias})
+        .set_index("alias", append=True)
+    )
+    num_var_sites_tidy = (
+        info.assign(ngermvariant_sites=lambda x: x.nvariant_sites - x.nsomvariant_sites)
+        .melt(
+            var_name="alias",
+            value_name="num_var_sites",
+            value_vars=["ngermvariant_sites", "nsomvariant_sites"],
+            ignore_index=False,
+        )
+        .replace({"ngermvariant_sites": "normal", "nsomvariant_sites": tumor_alias})
+        .set_index("alias", append=True)
+    )
+    genomic_pos_tidy = (
+        info.melt(
+            var_name="alias",
+            value_name="genomic_pos",
+            value_vars=["somatic_positions", "germline_positions"],
+            ignore_index=False,
+        )
+        .replace({"somatic_positions": tumor_alias, "germline_positions": "normal"})
+        .set_index("alias", append=True)
+    )
+    aa_changes_tidy = (
+        info.melt(
+            var_name="alias",
+            value_name="aa_changes",
+            value_vars=["somatic_aa_change", "germline_aa_change"],
+            ignore_index=False,
+        )
+        .replace({"somatic_aa_change": tumor_alias, "germline_aa_change": "normal"})
+        .set_index("alias", append=True)
+    )
+    nt_seq_tidy = (
+        info.melt(
+            var_name="alias",
+            value_name="nt_seq",
+            value_vars=["normal_sequence", "mutant_sequence"],
+            ignore_index=False,
+        )
+        .replace({"normal_sequence": "normal", "mutant_sequence": tumor_alias})
+        .set_index("alias", append=True)
+    )
+    all_tidy = num_var_in_pep_tidy.join(
+        [num_var_sites_tidy, genomic_pos_tidy, aa_changes_tidy, nt_seq_tidy]
+    )
+    return all_tidy.reset_index()
+
+
+def check_duplicates(df: pd.DataFrame, cols: List[str], specific_error: str = ""):
+    if sum(df.duplicated(subset=cols)) > 0:
+        duplicates = df[
+            df.duplicated(
+                subset=cols,
+                keep=False,
+            )
+        ]
+        cols_str = '", "'.join(cols)
+        raise ValueError(
+            f'Found multiple rows with identical [ "{cols_str}" ] entries.\n'
+            "This indicates an upstream issue, please fix this.\n"
+            f"{specific_error}"
+            "The offending entries are:\n"
+            f"{duplicates}\n"
+        )
+
+
+def merge_data_frames(
+    info: pd.DataFrame, tumor: pd.DataFrame, normal: pd.DataFrame, tumor_alias: str
+) -> pd.DataFrame:
+    # get and merge tumor and normal
+    tumor_filtered = get_filtered_per_alias(tumor, tumor_alias)
+    normal_filtered = get_filtered_per_alias(normal, "normal")
+    all_filtered = (
+        pd.concat([tumor_filtered, normal_filtered])
+        .groupby("id", group_keys=False)
+        .apply(diff_tumor_normal_peptides, column="pep_seq", tumor_alias=tumor_alias)
+    )
+    info_tidy = tidy_info(info, tumor_alias)
+
+    # netMHCpan 4.1 truncates the fasta entry IDs, so we have to cut down the IDs
+    # that microphaser originally provided to make the following .join() work
+    len_tumor_id = len(tumor_filtered["id"][0])
+    len_normal_id = len(normal_filtered["id"][0])
+    assert (
+        len_tumor_id == len_normal_id
+    ), f"'id's' are of different length, tumor: {len_tumor_id}, normal: {len_normal_id}, please check your input data.\n"
+    info_tidy["id"] = info_tidy["id"].str[:len_tumor_id]
+    # Double-check for duplicates resulting from the id truncation
+    check_duplicates(
+        info_tidy,
+        ["id", "alias"],
+        specific_error="Here, the problem is most likely the truncation of 'id's by netMHCpan.\n",
+    )
+
+    all_annotated = all_filtered.merge(info_tidy, how="left", on=["id", "alias"])
+
+    # Double-check for weird duplicates, as previously done in Jan's code.
+    # Jan's code was only checking for ["transcript", "offset", "pep_seq", "aa_changes"],
+    # we check for everything except id.
+    cols_without_id = [c for c in all_annotated.columns if c not in ["id"]]
+    all_annotated = all_annotated.drop_duplicates(subset=cols_without_id)
+    check_duplicates(all_annotated, cols_without_id)
+
+    column_order = [
+        "id",
+        "pep_seq",
+        "pos_in_id_seq",
+        "alias",
+        "num_binders",
+        "freq",
+        "depth",
+        "num_var_sites",
+        "num_var_in_pep",
+        "top_el_rank_allele",
+        "top_el_rank_bind_core",
+        "top_el_rank_el_rank",
+        "top_el_rank_el_score",
+        "ave_el_score",
+        "top_ba_rank_allele",
+        "top_ba_rank_bind_core",
+        "top_ba_rank_ba_rank",
+        "top_ba_rank_ba_score",
+        "aa_changes",
+        "genomic_pos",
+        "nt_seq",
+        "gene_name",
+        "gene_id",
+        "transcript",
+        "chrom",
+        "offset",
+        "frame",
+        "strand",
+        "freq_credible_interval",
+    ]
+
+    def get_id_rank(group: pd.DataFrame, tumor_alias: str):
+        return group.loc[group["alias"] == tumor_alias, "top_el_rank_el_rank"].squeeze()
+
+    sort_rank = (
+        all_annotated.groupby("id").apply(get_id_rank, tumor_alias).rename("sort_rank")
+    )
+    all_sorted = (
+        all_annotated.merge(sort_rank, on=["id"], how="left")
+        .sort_values(["sort_rank", "id", "alias"], ascending=[True, True, False])
+        .drop(columns="sort_rank")
+    )
+
+    return all_sorted.reindex(columns=column_order)
+
+
+info = pd.read_csv(snakemake.input.info, sep="\t", dtype=str)
+tumor = pd.read_csv(snakemake.input.neo, sep="\t", dtype={"pos_in_id_seq": str})
+normal = pd.read_csv(snakemake.input.normal, sep="\t", dtype={"pos_in_id_seq": str})
+data = merge_data_frames(info, tumor, normal, snakemake.wildcards.tumor_alias)
+data.to_csv(snakemake.output[0], index=False, sep="\t")
diff --git a/workflow/scripts/oncoprint.R b/workflow/scripts/oncoprint.R
deleted file mode 100644
index 5476b305..00000000
--- a/workflow/scripts/oncoprint.R
+++ /dev/null
@@ -1,56 +0,0 @@
-# log to file
-log <- file(snakemake@log[[1]], open="wt")
-sink(log)
-sink(log, type="message")
-
-library(ComplexHeatmap)
-library(ggplot2)
-
-table = read.table(snakemake@input[[1]], sep="\t", header=TRUE, row.names=1)
-mat = as.matrix(table)
-mat = t(mat)
-## remove "full" lines
-mat = mat[rowSums(mat == "") > 0,]
-## remove single lines
-mat = mat[rowSums(mat != "") > 0,]
-
-col = c(SNV = "blue", INDEL = "red")
-
-alter_fun = list(
-        SNV = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, 
-            gp = gpar(fill = col["SNV"], col = NA)),
-        INDEL = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.4, 
-            gp = gpar(fill = col["INDEL"], col = NA))
-    )
-
-
-heatmap_legend_param = list(title = "Alterations", at = c("SNV", "INDEL"), 
-        labels = c("SNV", "INDEL"))
-if (ncol(mat) > 1 ){
-    mat <- mat[order(apply(mat, 1, function(row) sum(row != "")), decreasing = T), ]
-}
-
-i = 0
-c = 0
-matlist = list()
-while (i + 2000 < nrow(mat)) {
-    m <- mat[(i + 1):(i + 2000), , drop=FALSE]
-    rows_matrix <- nrow(m)
-    height_plot <- (rows_matrix/5)
-    if (height_plot < 4) {
-        height_plot <- 4
-    }
-    pdf(file = sub("0.pdf", paste(c, ".pdf", sep=''), snakemake@output[[1]]), height=height_plot)
-    if (rows_matrix > 0) {
-        oncoprint <- oncoPrint(m,
-            alter_fun = alter_fun, col = col, 
-            remove_empty_columns = FALSE, remove_empty_rows = TRUE,
-            pct_side = "right", row_names_side = "left",
-            show_column_names=T,
-            column_title = "OncoPrint", heatmap_legend_param = heatmap_legend_param)
-        draw(oncoprint, newpage=F)
-    }
-    dev.off()
-    i = i + 2000
-    c = c + 1
-}
\ No newline at end of file
diff --git a/workflow/scripts/parse_HLA_types.py b/workflow/scripts/parse_HLA_types.py
deleted file mode 100644
index ff1906f3..00000000
--- a/workflow/scripts/parse_HLA_types.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import pandas as pd
-
-hlaI = ["A","B","C"]
-
-hlaII = ["DRB1", "DPA1", "DPB1", "DQA1", "DQB1"]
-
-hlas = pd.read_csv(snakemake.input[0], sep='\t')
-
-hlasI = hlas[hlas.Locus.isin(hlaI)]
-hlasI["Allele"]="HLA-" + hlasI.Allele.str.split(":", expand=True)[[0,1]].apply(lambda x: ''.join(x), axis=1).str.replace('*','')
-hlasI = hlasI[["Allele"]].drop_duplicates()
-hlasI.to_csv(snakemake.output[0], sep='\t', index=False)
-
-hlasII = hlas[hlas.Locus.isin(hlaII)]
-hlasII["HLA"] = hlasII.Locus.str[0:2]
-hlasII["Allele"] = hlasII.Allele.str.split(":", expand=True)[[0,1]].apply(lambda x: ''.join(x), axis=1).str.replace('*','')
-
-hlasII = pd.DataFrame("HLA-" + hlasII.groupby(["HLA","Chromosome"])["Allele"].apply(lambda x: "-".join(x)).reset_index()["Allele"]).drop_duplicates()
-hlasII.loc[hlasII.Allele.str.contains("DRB"), "Allele"] = hlasII[hlasII.Allele.str.contains("DRB")]["Allele"].str.replace("HLA-DRB1","DRB1_")
-hlasII.to_csv(snakemake.output[1], sep='\t', index=False)
diff --git a/workflow/scripts/parse_and_filter_hla_alleles_for_netmhc.py b/workflow/scripts/parse_and_filter_hla_alleles_for_netmhc.py
new file mode 100644
index 00000000..79c6994c
--- /dev/null
+++ b/workflow/scripts/parse_and_filter_hla_alleles_for_netmhc.py
@@ -0,0 +1,145 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
+import pandas as pd
+
+# # read in available alleles
+
+
+def read_allele_list(filename: str):
+    with open(filename, "r") as alleles_in:
+        alleles = set()
+        for line in alleles_in:
+            if not (line.startswith("#") or line == "\n"):
+                alleles.add(line.strip())
+        return alleles
+
+
+HLA_SUFFIXES_REGEX = r"[NLSCAQ]?"
+
+# netMHCpan alleles and loci
+HLA_ONE_NET_MHC_ALLELES = read_allele_list(snakemake.input.mhc_one_alleles)
+hla_one_net_mhc_alleles = pd.Series(list(HLA_ONE_NET_MHC_ALLELES))
+HLA_ONE_LOCI = set(
+    hla_one_net_mhc_alleles[hla_one_net_mhc_alleles.str.startswith("HLA-")]
+    .str.replace(r"HLA-([A-Z])[\d:]+" + HLA_SUFFIXES_REGEX, r"\1", regex=True)
+    .drop_duplicates()
+)
+
+
+# netMHCIIpan alleles and loci
+HLA_TWO_NET_MHC_ALLELES = read_allele_list(snakemake.input.mhc_two_alleles)
+hla_two_net_mhc_alleles = pd.Series(list(HLA_TWO_NET_MHC_ALLELES))
+# DRB alleles need to be formatted differently from DP and DQ alleles,
+# so we extract them separately.
+DRB_LOCI = set(
+    hla_two_net_mhc_alleles[hla_two_net_mhc_alleles.str.startswith("DRB")]
+    .str.replace(r"(DRB\d)_\d+" + HLA_SUFFIXES_REGEX, r"\1", regex=True)
+    .drop_duplicates()
+)
+
+ALPHA_BETA_LOCI = set(
+    hla_two_net_mhc_alleles[hla_two_net_mhc_alleles.str.startswith("HLA-")]
+    .str.replace(
+        r"HLA-(D[A-Z]A\d)\d+"
+        + HLA_SUFFIXES_REGEX
+        + r"-(D[A-Z]B\d)\d+"
+        + HLA_SUFFIXES_REGEX,
+        r"\1_\2",
+        regex=True,
+    )
+    .str.split("_")
+    .explode()
+    .drop_duplicates()
+)
+
+# read in alleles as determined by HLA-LA
+hlas = pd.read_csv(snakemake.input.hla_la_bestguess, sep="\t")
+
+# the Allele column can contain multiple ";"-separated entries for the
+# same locus
+hlas.loc[:, "Allele"] = hlas["Allele"].str.split(pat=";")
+hlas["alternative"] = hlas["Allele"].apply(lambda x: range(len(x)))
+hlas = hlas.explode(["Allele", "alternative"])
+
+# reformat to netMHCpan allele list format:
+# * https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/allele.list
+# it needs to be in the format of the first column of the above list, as explained in
+# the "Instructions" tab under "MHC SELECTION" point "2)" at:
+# * https://services.healthtech.dtu.dk/service.php?NetMHCpan-4.1
+hla_one_alleles = (
+    hlas.loc[hlas["Locus"].isin(HLA_ONE_LOCI), "Allele"]
+    .str.replace(
+        r"([A-Z])\*(\d+):(\d+)(:\d+)*G?(" + HLA_SUFFIXES_REGEX + r")",
+        r"HLA-\1\2:\3\5",
+        regex=True,
+    )
+    .drop_duplicates()
+)
+hla_one_alleles[hla_one_alleles.isin(HLA_ONE_NET_MHC_ALLELES)].to_csv(
+    snakemake.output.hlaI, sep="\t", index=False, header=False
+)
+
+# reformat to netMHCIIpan allele list format:
+# * https://services.healthtech.dtu.dk/services/netMHCIIpan-4.1/alleles_name.list
+# contrary to the format in that list, alleles actually need to be formatted like this,
+# with <gene>s found in the HLA-LA "Locus" column and syntax for the sub-numbering (only
+# the 1st and 2nd sub-number are used) according to the official nomenclature (see:
+# http://www.hla.alleles.org/nomenclature/naming.html ):
+# * DRB alleles: "<gene>_<allele_group><specific_HLA_protein>"
+# * DP and DQ alleles (alpha means A and beta means B in the gene name, for example DPA):
+#   "HLA-<alpha_gene><allele_group><specific_HLA_protein>-<beta_gene><allele_group><specific_HLA_protein>"
+# This format was determined by manually selecting combinations above the
+# "type a list of molecules names" field of the "Submission" tab at:
+# * https://services.healthtech.dtu.dk/service.php?netMHCIIpan-4.1
+
+# TODO: check whether Jan's previous parsing of DRB alleles into this format is necessary:
+# * example: DRB1_1501-DRB30101-DRB40301N
+# * "DRB1_<allele_group><specific_HLA_protein>-DRB3<allele_group><specific_HLA_protein>-DRB4<allele_group><specific_HLA_protein>"
+drb_alleles = hlas.loc[hlas["Locus"].isin(DRB_LOCI)]
+hla_two_alleles = (
+    drb_alleles["Allele"]
+    .str.replace(
+        r"([A-Z]+\d)\*(\d+):(\d+)(:\d+)*G?(" + HLA_SUFFIXES_REGEX + r")",
+        r"\1_\2\3\5",
+        regex=True,
+    )
+    .drop_duplicates()
+)
+
+# handle alleles where a combination of alpha and beta always exists
+alpha_beta_alleles = hlas.loc[hlas["Locus"].isin(ALPHA_BETA_LOCI)]
+alpha_beta_alleles.loc[:, "Allele"] = alpha_beta_alleles.Allele.str.replace(
+    r"([A-Z]+\d)\*(\d+):(\d+)(:\d+)*G?(" + HLA_SUFFIXES_REGEX + r")",
+    r"\1\2\3\5",
+    regex=True,
+)
+# we need a variable to group alpha and beta of the same gene combination together
+alpha_beta_alleles.loc[:, "gene_group"] = alpha_beta_alleles["Locus"].str.replace(
+    r"(D[A-Z])[AB]\d", r"\1", regex=True
+)
+# we need to handle cases where we had multiple allele entries in an
+# alpha or beta locus, adding in a duplicate of the corresponding locus
+alleles_to_duplicate = alpha_beta_alleles.loc[
+    alpha_beta_alleles["alternative"] > 0,
+    ["Locus", "Chromosome", "alternative"],
+].replace(regex={"Locus": {"(D[A-Z])A(\d)": r"\1B\2", "(D[A-Z])B(\d)": r"\1A\2"}})
+alleles_to_insert = alleles_to_duplicate.merge(
+    alpha_beta_alleles.drop("alternative", axis="columns"),
+    on=["Locus", "Chromosome"],
+    how="left",
+)
+alpha_beta_alleles = pd.concat(
+    [alpha_beta_alleles, alleles_to_insert]
+).drop_duplicates()
+
+hla_two_alleles = hla_two_alleles.append(
+    alpha_beta_alleles.groupby(["gene_group", "Chromosome", "alternative"])["Allele"]
+    .transform(lambda x: f"HLA-{'-'.join(x)}")
+    .drop_duplicates()
+)
+
+hla_two_alleles[hla_two_alleles.isin(HLA_TWO_NET_MHC_ALLELES)].to_csv(
+    snakemake.output.hlaII, sep="\t", index=False, header=False
+)
diff --git a/workflow/scripts/phylogeny.R b/workflow/scripts/phylogeny.R
index 7f26becf..9e32c131 100644
--- a/workflow/scripts/phylogeny.R
+++ b/workflow/scripts/phylogeny.R
@@ -1,3 +1,7 @@
+log <- file(snakemake@log[[1]], open="wt")
+sink(log)
+sink(log, type="message")
+
 library(phangorn)
 
 ## read the variant matrix
diff --git a/workflow/scripts/prepare_neoprint.py b/workflow/scripts/prepare_neoprint.py
new file mode 100644
index 00000000..0a535d14
--- /dev/null
+++ b/workflow/scripts/prepare_neoprint.py
@@ -0,0 +1,88 @@
+import sys
+import json
+
+sys.stderr = open(snakemake.log[0], "w")
+
+import pandas as pd
+from typing import Tuple
+
+def highlight_peptides_diff(tumor_p: str, normal_p: str) -> Tuple[str, str]:
+    """
+    Highlight the difference between mutated neopeptide and normal peptide
+    """
+    if normal_p == "nan" or normal_p == "NA" or normal_p == "":
+        return (tumor_p, normal_p)
+    assert len(tumor_p) == len(
+        normal_p
+    ), f"Tumor peptide '{tumor_p}' and normal peptide '{normal_p}' have different lengths."
+    diff_pos = [i for i in range(len(tumor_p)) if tumor_p[i] != normal_p[i]]
+    tp_changed = tumor_p
+    np_changed = normal_p
+    for p in diff_pos:
+        tp_changed = tp_changed[:p] + tp_changed[p].lower() + tp_changed[p + 1 :]
+        np_changed = np_changed[:p] + np_changed[p].lower() + np_changed[p + 1 :]
+    return (tp_changed, np_changed)
+
+all_neopeptides = pd.read_csv(snakemake.input.neopeptides, sep="\t")
+
+# If we leave in any dots, datavzrd will interpret this as attributes
+all_neopeptides.columns = all_neopeptides.columns.str.replace(".", "_", regex=False)
+
+# Aggregate multiple identical entries that differ only in 'id' and 'transcript'
+# into one, taking the first 'id' and collecting all 'transcript's into a '|'-separated
+# list.
+# TODO: Remove this redundancy from microphaser output before passing it along to other
+# tools.
+cols = [c for c in all_neopeptides.columns if c not in ["id", "transcript"]]
+aggregation_functions = {
+    "id": lambda i: list(i)[0],
+    "transcript": lambda t: "|".join(list(t)),
+}
+all_neopeptides = (
+    all_neopeptides.groupby(cols, dropna=False)
+    .agg(aggregation_functions)
+    .reset_index()
+    .explode("id")
+)
+
+# highlight mutations in the original Xmers
+all_neopeptides[["mutation_mutatedXmer", "mutation_wildTypeXmer"]] = (
+    pd.DataFrame(
+        all_neopeptides
+        .fillna({"mutation_wildTypeXmer": ""})
+        .apply(lambda row: highlight_peptides_diff(row["mutation_mutatedXmer"], row["mutation_wildTypeXmer"]), axis="columns")
+        .tolist()
+    )
+)
+
+# create new purity-adjusted DNA variant allele frequency
+all_neopeptides["purity_adjusted_DNA_VAF"] = all_neopeptides["dnaVariantAlleleFrequency"] / snakemake.params.purity
+
+# round all floats to the specified decimals
+all_neopeptides = all_neopeptides.round(decimals=5)
+
+# define important columns to move to the left of the table
+
+neofox_important_cols = snakemake.params.neofox_important_cols
+
+important_cols = neofox_important_cols["general"] + neofox_important_cols["I"] + neofox_important_cols["II"]
+
+
+mhc_one = (
+    all_neopeptides[ important_cols + [ col for col in all_neopeptides.columns if col not in important_cols ] ]
+    .sort_values(by = ["PRIME_best_rank", "MixMHC2pred_best_rank"])
+    .groupby("PRIME_best_rank")
+    .head(n=1)
+)
+
+mhc_one.to_csv(snakemake.output.mhc_one, sep="\t", index=False)
+
+# move important columns to the left of the table
+mhc_two = (
+    all_neopeptides[ neofox_important_cols["general"] + neofox_important_cols["II"] + neofox_important_cols["I"] + [ col for col in all_neopeptides.columns if col not in important_cols ] ]
+    .sort_values(by = ["MixMHC2pred_best_rank", "PRIME_best_rank"])
+    .groupby("MixMHC2pred_best_rank")
+    .head(n=1)
+)
+
+mhc_two.to_csv(snakemake.output.mhc_two, sep="\t", index=False)
diff --git a/workflow/scripts/render-scenario.py b/workflow/scripts/render-scenario.py
deleted file mode 100644
index 692b0ee5..00000000
--- a/workflow/scripts/render-scenario.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from jinja2 import Template
-import pandas as pd
-
-with open(snakemake.input[0]) as template, open(snakemake.output[0], "w") as out:
-    samples = snakemake.params.samples
-    out.write(Template(template.read()).render(
-        samples=samples[(samples["sample"] == snakemake.wildcards.pair) | (samples["sample"] == samples.loc[snakemake.wildcards.pair, "matched_normal"])]
-    ))
diff --git a/workflow/scripts/sample_comp_plot.py b/workflow/scripts/sample_comp_plot.py
index 3c7c287d..ffbd7aaa 100644
--- a/workflow/scripts/sample_comp_plot.py
+++ b/workflow/scripts/sample_comp_plot.py
@@ -1,3 +1,7 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
 import os
 import glob
 import pandas as pd
@@ -6,8 +10,10 @@
 import matplotlib.pyplot as plt
 from pysam import VariantFile
 
-variant_df = pd.read_csv(snakemake.input[0], sep='\t').fillna(0.0)
-variant_df = variant_df[["CHROM", "POS"] + [c for c in variant_df.columns if c.endswith("Freq")]]
+variant_df = pd.read_csv(snakemake.input[0], sep="\t").fillna(0.0)
+variant_df = variant_df[
+    ["CHROM", "POS"] + [c for c in variant_df.columns if c.endswith("Freq")]
+]
 ## tidy data - for facet plot
 tidy_df = variant_df.melt(id_vars=["CHROM", "POS"], var_name="Sample", value_name="VAF")
 g = sns.FacetGrid(tidy_df, col="Sample")
@@ -15,25 +21,27 @@
 g.savefig(snakemake.output["facet"])
 plt.close()
 ## pairplot
-sns.pairplot(variant_df.drop(["CHROM", "POS"],axis=1), diag_kind="kde")
+sns.pairplot(variant_df.drop(["CHROM", "POS"], axis=1), diag_kind="kde")
 plt.savefig(snakemake.output["pairplot"])
 plt.close()
 
+
 def overlap_pct(x, y, **kws):
     n = 0
-    for i in range(0,len(x)):
+    for i in range(0, len(x)):
         if (x[i] > 0) & (y[i] > 0):
             n += 1
-    overlap=n/len([e for e in x if e > 0])
+    overlap = n / len([e for e in x if e > 0])
     ax = plt.gca()
-    ax.annotate("Shared Fraction: {:.2f}".format(overlap), xy=(.2, .4))
-    ax.annotate("Shared Variants: {}".format(n), xy=(.2, .6))
+    ax.annotate("Shared Fraction: {:.2f}".format(overlap), xy=(0.2, 0.4))
+    ax.annotate("Shared Variants: {}".format(n), xy=(0.2, 0.6))
+
 
 def variants(x, **kws):
     positive = len([e for e in x if e > 0])
-    ax = plt.gca() 
-    ax.annotate("#Variants: {}".format(positive), xy=(.2, .5),
-xycoords=ax.transAxes)
+    ax = plt.gca()
+    ax.annotate("#Variants: {}".format(positive), xy=(0.2, 0.5), xycoords=ax.transAxes)
+
 
 g = sns.PairGrid(variant_df.drop(["CHROM", "POS"], axis=1))
 
@@ -42,7 +50,7 @@ def variants(x, **kws):
 g.savefig(snakemake.output["grid"])
 plt.close()
 
-#def neg_overlap_pct(x, y, **kws):
+# def neg_overlap_pct(x, y, **kws):
 #    n = 0
 #    for i in range(0,len(x)):
 #        if (x[i] == 0) & (y[i] == 0):
@@ -52,25 +60,24 @@ def variants(x, **kws):
 #    ax.annotate("Shared Fraction: {:.2f}".format(overlap), xy=(.2, .4))
 #    ax.annotate("Shared missing variants: {}".format(n), xy=(.2, .6))
 
-#def neg_variants(x, **kws):
+# def neg_variants(x, **kws):
 #    zero = len([e for e in x if e == 0])
-#    ax = plt.gca() 
+#    ax = plt.gca()
 #    ax.annotate("#Missing Variants: {}".format(zero), xy=(.2, .5),
-#xycoords=ax.transAxes)
+# xycoords=ax.transAxes)
 
-#g = sns.PairGrid(variant_df.drop(["CHROM", "POS"], axis=1))
+# g = sns.PairGrid(variant_df.drop(["CHROM", "POS"], axis=1))
 
-#g.map_offdiag(neg_overlap_pct)
-#g.map_diag(neg_variants)
-#g.savefig("plots/Mssing_Variant_table.pdf")
-#plt.close()
+# g.map_offdiag(neg_overlap_pct)
+# g.map_diag(neg_variants)
+# g.savefig("plots/Mssing_Variant_table.pdf")
+# plt.close()
 
-for c in variant_df.drop(["CHROM", "POS"], axis=1).columns: 
-    sns.distplot(variant_df[[c]][variant_df[c] > 0]) 
-    plt.title(c) 
-    plt.savefig("plots/positive_" + c +".distplot.pdf")
+for c in variant_df.drop(["CHROM", "POS"], axis=1).columns:
+    sns.distplot(variant_df[[c]][variant_df[c] > 0])
+    plt.title(c)
+    plt.savefig("plots/positive_" + c + ".distplot.pdf")
     plt.close()
     sns.distplot(variant_df[[c]])
-    plt.savefig("plots/all_" + c +".distplot.pdf")
+    plt.savefig("plots/all_" + c + ".distplot.pdf")
     plt.close()
-
diff --git a/workflow/scripts/tidy_mhc_output.py b/workflow/scripts/tidy_mhc_output.py
new file mode 100644
index 00000000..fa129c0a
--- /dev/null
+++ b/workflow/scripts/tidy_mhc_output.py
@@ -0,0 +1,125 @@
+import sys
+
+sys.stderr = open(snakemake.log[0], "w")
+
+import os
+import pandas as pd
+
+from itertools import cycle
+
+# assumptions of this script about netMHCpan or netMHCIIpan:
+# * version 4.1
+# * output generated via `-xls` option
+# * generated with the `-BA` option to include binding affinity prediction
+
+# The mapping of index column names used here to original names in netMHCpan files
+# is (please excuse the pd.NA tuples, they make header and index handling
+# easier further down the line):
+INDEX_NAMES = {
+    (pd.NA, "Pos"): "pos_in_id_seq",
+    (pd.NA, "Peptide"): "pep_seq",
+    (pd.NA, "ID"): "id",
+    (pd.NA, "Ave"): "ave_el_score",
+    (pd.NA, "NB"): "num_binders",
+}
+
+if snakemake.wildcards.mhc == "net_mhc_pan":
+    # The mapping of column names used here to original names in netMHCpan files is:
+    COLUMN_NAMES = {
+        "BA-score": "ba_score",
+        "BA_Rank": "ba_rank",
+        "EL-score": "el_score",
+        "EL_Rank": "el_rank",
+        "core": "bind_core",
+    }
+elif snakemake.wildcards.mhc == "net_mhc_two_pan":
+    # The mapping of column names used here to original names in netMHCIIpan files is:
+    COLUMN_NAMES = {
+        "Score_BA": "ba_score",
+        "Rank_BA": "ba_rank",
+        "Score": "el_score",
+        "Rank": "el_rank",
+        "Core": "bind_core",
+    }
+else:
+    sys.exit(f"Wildcard `mhc` has unknown value: {snakemake.wildcards.mhc}")
+
+COLUMNS_TO_DROP = {
+    # I have not found any docs or indication in the manuscript, what the column
+    # `Target` is. It might be the column `Exp_Bind` from Figure 1B here (it's
+    # the only column that only appears in netMHCIIpan output and not netMHCpan
+    # output):
+    # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7319546/figure/F1/
+    # If so, it is only for benchmarking purposes according to the docs. And it
+    # is always NA, even in their manuscript. So we simply remove it here.
+    "Target",
+    # There doesn't seem to be an `icore` equivalent in netMHCIIpan output.
+    "icore",
+    # There doesn't seem to be an `nM` equivalent in netMHCpan output.
+    "nM",
+}
+
+
+def parse_file(mhc_in: str):
+    """
+    Parse an netMHCpan or netMHCIIpan output file from the `-xls -xlsfile <filename>`
+    directive into a tidy pandas data frame.
+    """
+    if os.path.getsize(mhc_in) == 0:
+        # Short-circuit empty files, but generate correct header.
+        return pd.DataFrame(
+            columns=list(COLUMN_NAMES.values())
+            + ["allele"]
+            + list(INDEX_NAMES.values())
+        )
+
+    # We need to fix the utterly broken headers first
+
+    # parse first header line into pandas.Series and name it
+    first_header_line = pd.read_csv(mhc_in, nrows=1, header=None, sep="\t").iloc[0, :]
+    first_header_line.name = "allele"
+
+    # parse second header line into pandas.Series and name it
+    second_header_line = pd.read_csv(
+        mhc_in, skiprows=1, nrows=1, header=None, sep="\t"
+    ).iloc[0, :]
+    second_header_line.name = "column_name"
+
+    header = pd.concat([first_header_line, second_header_line], axis="columns")
+    header = header.fillna(method="ffill")
+    header.loc[
+        header.column_name.isin([index_col for (_, index_col) in INDEX_NAMES.keys()]),
+        "allele",
+    ] = pd.NA
+
+    # It's a compound header over two rows and a compound row index in the initial
+    # three and final two columns of the table. For some reason, the final two
+    # columns are added to the index but not removed from the table, so we do this
+    # manually with `.iloc[]``.
+    data = pd.read_csv(mhc_in, sep="\t", skiprows=2, header=None)
+
+    data.columns = pd.MultiIndex.from_frame(header)
+
+    # remove columns only present in one of the two tools' output
+    columns_to_keep = [
+        col
+        for col in list(data.columns.get_level_values("column_name"))
+        if col not in COLUMNS_TO_DROP
+    ]
+    idx = pd.IndexSlice
+    data = data.loc[:, idx[:, columns_to_keep]]
+
+    # properly set row index columns (values to repeat in every row while doing the stack below)
+    data = data.set_index(list(INDEX_NAMES.keys()))
+    data.index.set_names(INDEX_NAMES, inplace=True)
+
+    # Turn into longer table with one HLA Allele per row instead of MultiIndex
+    # header, rename columns to something readable and turn index into columns.
+    data = data.stack(level="allele").rename(columns=COLUMN_NAMES).reset_index()
+
+    return data
+
+
+all_data = pd.concat((parse_file(f) for f in snakemake.input), axis="index")
+
+all_data.to_csv(snakemake.output.joined_mhc_out, sep="\t", index=False)
diff --git a/workflow/scripts/tsv_to_xlsx.py b/workflow/scripts/tsv_to_xlsx.py
index 28a24594..f99e77ec 100644
--- a/workflow/scripts/tsv_to_xlsx.py
+++ b/workflow/scripts/tsv_to_xlsx.py
@@ -1,7 +1,8 @@
 import sys
-import pandas as pd
 
 sys.stderr = open(snakemake.log[0], "w")
 
+import pandas as pd
+
 data = pd.read_csv(snakemake.input.tsv, sep="\t")
-data.to_excel(snakemake.output.xlsx, index=False)
\ No newline at end of file
+data.to_excel(snakemake.output.xlsx, index=False)