diff --git a/README.md b/README.md
index ee519fed..9265afde 100644
--- a/README.md
+++ b/README.md
@@ -120,15 +120,15 @@ _For instance:_
 The following argument is specific to `predict`:
 
 - -m / --model **(required)**: The path to a custom pickled CAPICE model that includes
-  attributes `CAPICE_version` (`str`) and `impute_values` (`dict`). Models can be found as attachments on the [GitHub releases](https://github.com/molgenis/capice/releases) page.
+  attributes `CAPICE_version` (`str`) and `model_features` (`list`). Models can be found as attachments on the [GitHub releases](https://github.com/molgenis/capice/releases) page.
 
 The following arguments are specific to `train`:
 
-- -m / --impute **(required)**: The path to a JSON containing the impute values with the column name as key and the
-  impute value as value.
+- -m / --impute **(required)**: The path to a JSON containing the features desired for training. Each key is a training feature, each value is ignored and can be left `NULL`.
   **Please note that CAPICE is value type specific!**
 - -s / --split _(optional)_: Percentage of input data that should be used to measure performance during training.
   Argument should be given in float from 0.1 (10%) to 0.9 (90%), default = 0.2.
+- -t / --threads _(optional)_: The amount of processing cores the training protocol can use. Default = 1.
 
 You can also use `capice {module} --help` to show help on the command line.
 
@@ -164,7 +164,7 @@ A file will be put out containing the following element:
 
 - `xgb_classifier`: Custom [Pickled](https://docs.python.org/3/library/pickle.html) instance of
   a [XGBClassifier](https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.XGBClassifier) instance that
-  has successfully trained on the input data, containing additional attributes CAPICE_version and impute_values.
+  has successfully trained on the input data, containing additional attributes CAPICE_version and model_features.
 
 _Note: To load in a pickled instance of a model, use the following commands:_
 
diff --git a/resources/train_features.json b/resources/train_features.json
new file mode 100644
index 00000000..b48d6b5b
--- /dev/null
+++ b/resources/train_features.json
@@ -0,0 +1,62 @@
+{
+    "PolyPhenCat": null,
+    "PolyPhenVal": null,
+    "cDNApos": null,
+    "relcDNApos": null,
+    "SIFTcat": null,
+    "SIFTval": null,
+    "protPos": null,
+    "relProtPos": null,
+    "oAA": null,
+    "nAA": null,
+    "CDSpos": null,
+    "relCDSpos": null,
+    "ref": null,
+    "alt": null,
+    "is_regulatory_region_variant": null,
+    "is_regulatory_region_ablation": null,
+    "is_regulatory_region_amplification": null,
+    "is_missense_variant": null,
+    "is_intron_variant": null,
+    "is_upstream_gene_variant": null,
+    "is_downstream_gene_variant": null,
+    "is_synonymous_variant": null,
+    "is_TF_binding_site_variant": null,
+    "is_splice_donor_variant": null,
+    "is_coding_sequence_variant": null,
+    "is_splice_region_variant": null,
+    "is_stop_gained": null,
+    "is_splice_acceptor_variant": null,
+    "is_frameshift_variant": null,
+    "is_3_prime_UTR_variant": null,
+    "is_inframe_insertion": null,
+    "is_inframe_deletion": null,
+    "is_5_prime_UTR_variant": null,
+    "is_start_lost": null,
+    "is_non_coding_transcript_exon_variant": null,
+    "is_non_coding_transcript_variant": null,
+    "is_TFBS_ablation": null,
+    "is_TFBS_amplification": null,
+    "is_protein_altering_variant": null,
+    "is_stop_lost": null,
+    "is_stop_retained_variant": null,
+    "is_transcript_ablation": null,
+    "is_intergenic_variant": null,
+    "is_start_retained_variant": null,
+    "is_transcript_amplification": null,
+    "is_incomplete_terminal_codon_variant": null,
+    "is_mature_miRNA_variant": null,
+    "is_NMD_transcript_variant": null,
+    "is_feature_elongation": null,
+    "is_feature_truncation": null,
+    "SpliceAI_pred_DP_AG": null,
+    "SpliceAI_pred_DP_AL": null,
+    "SpliceAI_pred_DP_DG": null,
+    "SpliceAI_pred_DP_DL": null,
+    "SpliceAI_pred_DS_AG": null,
+    "SpliceAI_pred_DS_AL": null,
+    "SpliceAI_pred_DS_DG": null,
+    "SpliceAI_pred_DS_DL": null,
+    "Type": null,
+    "Length": null
+}
\ No newline at end of file
diff --git a/resources/train_impute_values.json b/resources/train_impute_values.json
deleted file mode 100644
index 5ac2a992..00000000
--- a/resources/train_impute_values.json
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-  "PolyPhenCat": "unknown",
-  "PolyPhenVal": 0.0,
-  "cDNApos": 0.0,
-  "relcDNApos": 0.0,
-  "SIFTcat": "UD",
-  "SIFTval": 0.0,
-  "protPos": 0.0,
-  "relProtPos": 0.0,
-  "oAA": "unknown",
-  "nAA": "unknown",
-  "CDSpos": 0.0,
-  "relCDSpos": 0.0,
-  "ref": "N",
-  "alt": "N",
-  "is_regulatory_region_variant": 0,
-  "is_regulatory_region_ablation": 0,
-  "is_regulatory_region_amplification": 0,
-  "is_missense_variant": 0,
-  "is_intron_variant": 0,
-  "is_upstream_gene_variant": 0,
-  "is_downstream_gene_variant": 0,
-  "is_synonymous_variant": 0,
-  "is_TF_binding_site_variant": 0,
-  "is_splice_donor_variant": 0,
-  "is_coding_sequence_variant": 0,
-  "is_splice_region_variant": 0,
-  "is_stop_gained": 0,
-  "is_splice_acceptor_variant": 0,
-  "is_frameshift_variant": 0,
-  "is_3_prime_UTR_variant": 0,
-  "is_inframe_insertion": 0,
-  "is_inframe_deletion": 0,
-  "is_5_prime_UTR_variant": 0,
-  "is_start_lost": 0,
-  "is_non_coding_transcript_exon_variant": 0,
-  "is_non_coding_transcript_variant": 0,
-  "is_TFBS_ablation": 0,
-  "is_TFBS_amplification": 0,
-  "is_protein_altering_variant": 0,
-  "is_stop_lost": 0,
-  "is_stop_retained_variant": 0,
-  "is_transcript_ablation": 0,
-  "is_intergenic_variant": 0,
-  "is_start_retained_variant": 0,
-  "is_transcript_amplification": 0,
-  "is_incomplete_terminal_codon_variant": 0,
-  "is_mature_miRNA_variant": 0,
-  "is_NMD_transcript_variant": 0,
-  "is_feature_elongation": 0,
-  "is_feature_truncation": 0,
-  "SpliceAI_pred_DP_AG": 0,
-  "SpliceAI_pred_DP_AL": 0,
-  "SpliceAI_pred_DP_DG": 0,
-  "SpliceAI_pred_DP_DL": 0,
-  "SpliceAI_pred_DS_AG": 0,
-  "SpliceAI_pred_DS_AL": 0,
-  "SpliceAI_pred_DS_DG": 0,
-  "SpliceAI_pred_DS_DL": 0
-}
\ No newline at end of file
diff --git a/src/molgenis/capice/__init__.py b/src/molgenis/capice/__init__.py
index 6a157dcb..fb1080bc 100644
--- a/src/molgenis/capice/__init__.py
+++ b/src/molgenis/capice/__init__.py
@@ -1 +1 @@
-__version__ = '3.3.0'
+__version__ = '4.0.0-rc1'
diff --git a/src/molgenis/capice/cli/args_handler_train.py b/src/molgenis/capice/cli/args_handler_train.py
index 344bfae5..55cf775d 100644
--- a/src/molgenis/capice/cli/args_handler_train.py
+++ b/src/molgenis/capice/cli/args_handler_train.py
@@ -17,7 +17,7 @@ def __init__(self, parser):
 
     @property
     def _extension(self):
-        return '.tsv.gz', '.tsv'
+        return '.tsv.gz'
 
     @property
     def _required_output_extensions(self):
@@ -42,7 +42,7 @@ def create(self):
             action='append',
             type=str,
             required=True,
-            help='path to impute values file (.json) (required)'
+            help='path to the json containing the features that can be used in training (required)'
         )
         self.parser.add_argument(
             '-s',
diff --git a/src/molgenis/capice/core/capice_exporter.py b/src/molgenis/capice/core/capice_exporter.py
index cc88ba1a..cf15c460 100644
--- a/src/molgenis/capice/core/capice_exporter.py
+++ b/src/molgenis/capice/core/capice_exporter.py
@@ -66,4 +66,4 @@ def export_capice_model(self, model):
         with open(export_path, 'wb') as model_dump:
             pickle.dump(model, model_dump)
         if not self.output_given:
-            print('Successfully exported CAPICE model to: %s', export_path)
+            print('Successfully exported CAPICE model to: ', export_path)
diff --git a/src/molgenis/capice/main_capice.py b/src/molgenis/capice/main_capice.py
index 2e98a407..bcd9ec95 100644
--- a/src/molgenis/capice/main_capice.py
+++ b/src/molgenis/capice/main_capice.py
@@ -5,7 +5,6 @@
 from molgenis.capice.utilities.input_parser import InputParser
 from molgenis.capice.core.capice_exporter import CapiceExporter
 from molgenis.capice.utilities.preprocessor import PreProcessor
-from molgenis.capice.utilities.capice_imputing import CapiceImputing
 from molgenis.capice.utilities.manual_vep_processor import ManualVEPProcessor
 from molgenis.capice.utilities.load_file_postprocessor import LoadFilePostProcessor
 from molgenis.capice.validators.post_file_parse_validator import PostFileParseValidator
@@ -74,16 +73,6 @@ def process(loaded_data):
         processed_data = processor.process(dataset=loaded_data)
         return processed_data
 
-    @staticmethod
-    def impute(loaded_data, impute_values):
-        """
-        Function to perform imputing over the loaded data.
-        self.model can be None, but impute_json has to be defined in that case.
-        """
-        capice_imputer = CapiceImputing(impute_values=impute_values)
-        capice_data = capice_imputer.impute(loaded_data)
-        return capice_data
-
     def preprocess(self, loaded_data, model_features=None):
         """
         Function to perform the preprocessing of the loaded data to convert
diff --git a/src/molgenis/capice/main_predict.py b/src/molgenis/capice/main_predict.py
index 27bf7595..a45e0422 100644
--- a/src/molgenis/capice/main_predict.py
+++ b/src/molgenis/capice/main_predict.py
@@ -27,7 +27,6 @@ def run(self):
                                                                     Column.feature.value,
                                                                     Column.feature_type.value])
         capice_data = self.process(loaded_data=capice_data)
-        capice_data = self.impute(loaded_data=capice_data, impute_values=self.model.impute_values)
         capice_data = self.preprocess(loaded_data=capice_data,
                                       model_features=self.model.get_booster().feature_names)
         capice_data = self.predict(loaded_data=capice_data)
diff --git a/src/molgenis/capice/main_train.py b/src/molgenis/capice/main_train.py
index bbb7f0d6..e083691d 100644
--- a/src/molgenis/capice/main_train.py
+++ b/src/molgenis/capice/main_train.py
@@ -59,13 +59,12 @@ def run(self):
             json_dict = json.load(impute_values_file)
         self._validate_impute_complete(data, json_dict)
 
-        imputed_data = self.impute(loaded_data=data, impute_values=json_dict)
-        processed_data = self.preprocess(loaded_data=imputed_data)
+        processed_data = self.preprocess(loaded_data=data)
         self._get_processed_features(dataset=processed_data, impute_keys=json_dict.keys())
         processed_train, processed_test = self.split_data(dataset=processed_data,
                                                           test_size=self.train_test_size)
         model = self.train(test_set=processed_test, train_set=processed_train)
-        setattr(model, "impute_values", json_dict)
+        setattr(model, "model_features", list(json_dict.keys()))
         setattr(model, 'CAPICE_version', __version__)
         self.exporter.export_capice_model(model=model)
 
diff --git a/src/molgenis/capice/utilities/capice_imputing.py b/src/molgenis/capice/utilities/capice_imputing.py
deleted file mode 100644
index 0b7f2f52..00000000
--- a/src/molgenis/capice/utilities/capice_imputing.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import pandas as pd
-
-from molgenis.capice.core.logger import Logger
-from molgenis.capice.utilities.enums import Column
-
-
-class CapiceImputing:
-    """
-    Class to perform the imputing on a fully VEP processed pandas dataframe.
-    """
-
-    def __init__(self, impute_values: dict):
-        """
-        :param impute_values: dict, Dictionary containing all features to be
-        imputed as keys and the fill value as value. Can come from either the
-        model or a loaded json.
-        """
-        self.log = Logger().logger
-        self.log.info('Imputer started.')
-        self.impute_values = impute_values
-        self.pre_dtypes = {}
-        self.dtypes = {}
-
-    def impute(self, datafile: pd.DataFrame):
-        """
-        Function to call the CapiceImputing to start imputing.
-        :return: pandas DataFrame
-        """
-        # Get the amount of NaN per column
-        self._get_nan_ratio_per_column(dataset=datafile)
-
-        self._correct_dtypes(datafile=datafile)
-        datafile.fillna(self.impute_values, inplace=True)
-        datafile = datafile.astype(dtype=self.pre_dtypes, copy=False)
-        datafile = datafile.astype(dtype=self.dtypes, copy=False)
-        self.log.info('Imputing successfully performed.')
-        return datafile
-
-    def _correct_dtypes(self, datafile: pd.DataFrame):
-        """
-        Function to correct the dtypes that originate from the lookup annotator
-        according to the dtypes specified within the data json.
-        """
-        # First, correct the Chromosome column, then the rest.
-        datafile[Column.chr.value] = datafile[Column.chr.value].astype(str)
-        for key, item in self.impute_values.items():
-            if key in datafile.columns:
-                # Required, see pydoc of _save_dtypes()
-                self._save_dtypes(key=key, item=item)
-
-    def _save_dtypes(self, key, item):
-        """
-        Pre-dtypes are required since converting to an integer requires a float
-        """
-        if isinstance(item, int):
-            self.pre_dtypes[key] = float
-        else:
-            self.pre_dtypes[key] = type(item)
-        self.dtypes[key] = type(item)
-
-    def _get_nan_ratio_per_column(self, dataset: pd.DataFrame):
-        """
-        Generic function to get the percentage of gaps per column
-        :param dataset: not imputed pandas DataFrame
-        """
-        for column in dataset.columns:
-            series = dataset[column]
-            self._calculate_percentage_nan(column=series)
-
-    @staticmethod
-    def _calculate_percentage(value, total):
-        return round((value / total) * 100, ndigits=2)
-
-    def _calculate_percentage_nan(self, column):
-        n_nan = column.isnull().sum()
-        if n_nan > 0:
-            n_samples = column.size
-            p_nan = self._calculate_percentage(n_nan, n_samples)
-            self.log.debug('NaN detected in column %s, percentage: %s%%.', column.name, p_nan)
diff --git a/src/molgenis/capice/validators/model_validator.py b/src/molgenis/capice/validators/model_validator.py
index c828fe10..5ef1dae2 100644
--- a/src/molgenis/capice/validators/model_validator.py
+++ b/src/molgenis/capice/validators/model_validator.py
@@ -16,7 +16,7 @@ def validate_has_required_attributes(model):
         Function to validate if the required attributes CAPICE_version,
         impute_values and predict_proba are present.
         """
-        required_attributes = ['CAPICE_version', 'impute_values', 'predict_proba']
+        required_attributes = ['CAPICE_version', 'model_features', 'predict_proba']
         for attribute in required_attributes:
             if attribute not in dir(model):
                 raise AttributeError(f'Unable to locate attribute {attribute} in model file!')
diff --git a/src/molgenis/capice/validators/post_vep_processing_validator.py b/src/molgenis/capice/validators/post_vep_processing_validator.py
index c1c792fb..738fdf8e 100644
--- a/src/molgenis/capice/validators/post_vep_processing_validator.py
+++ b/src/molgenis/capice/validators/post_vep_processing_validator.py
@@ -13,7 +13,7 @@ def validate_features_present(self, datafile):
         presently processed.
         """
         column_utils = ColumnUtils()
-        column_utils.set_specified_columns(self.model.impute_values.keys())
+        column_utils.set_specified_columns(self.model.model_features)
         features_not_present = column_utils.get_missing_diff_with(datafile.columns)
         if len(features_not_present) > 0:
             error_message = 'Detected required feature(s) %s not ' \
diff --git a/src/molgenis/capice/vep/length.py b/src/molgenis/capice/vep/length.py
index 590e42a7..e8d7da25 100644
--- a/src/molgenis/capice/vep/length.py
+++ b/src/molgenis/capice/vep/length.py
@@ -8,7 +8,7 @@ class Length(Template):
     def __init__(self):
         super(Length, self).__init__(
             name=Column.ref.value,
-            usable=False
+            usable=True
         )
 
     @property
diff --git a/src/molgenis/capice/vep/type.py b/src/molgenis/capice/vep/type.py
index eac48237..4d004c90 100644
--- a/src/molgenis/capice/vep/type.py
+++ b/src/molgenis/capice/vep/type.py
@@ -8,7 +8,7 @@ class Type(Template):
     def __init__(self):
         super(Type, self).__init__(
             name=Column.ref.value,
-            usable=False
+            usable=True
         )
 
     @property
diff --git a/tests/capice/core/test_specific_logcalls.py b/tests/capice/core/test_specific_logcalls.py
deleted file mode 100644
index d13ca32b..00000000
--- a/tests/capice/core/test_specific_logcalls.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import os
-import pickle
-import unittest
-import pandas as pd
-
-from molgenis.capice.core.capice_manager import CapiceManager
-from molgenis.capice.utilities.capice_imputing import CapiceImputing
-from tests.capice.test_templates import teardown, _project_root_directory
-
-
-class TestSpecificLogCalls(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        print('Setting up.')
-        cls.manager = CapiceManager()
-        cls.manager.loglevel = 10
-        with open(
-                os.path.join(
-                    _project_root_directory,
-                    'tests',
-                    'resources',
-                    'xgb_booster_poc.pickle.dat'
-                ), 'rb'
-        ) as model_file:
-            cls.model = pickle.load(model_file)
-
-    @classmethod
-    def tearDownClass(cls):
-        print('Tearing down.')
-        teardown()
-
-    def setUp(self):
-        print('Testing case:')
-
-    def test_nan_calculator(self):
-        print('Nan calculator (using piping of stderr to variable)')
-        nan_dataframe = pd.DataFrame(
-            {
-                'foo': [1, 2, 3, 4],
-                'bar': [55, None, None, 66],
-                'baz': [None, 77, 88, 99]
-            }
-        )
-        messages_present = [
-            'DEBUG:CAPICE:NaN detected in column bar, percentage: 50.0%.',
-            'DEBUG:CAPICE:NaN detected in column baz, percentage: 25.0%.'
-        ]
-        imputer = CapiceImputing(self.model)
-        with self.assertLogs(level=10) as captured:
-            imputer._get_nan_ratio_per_column(dataset=nan_dataframe)
-        self.assertGreater(len(captured.output), 0)
-        for message in messages_present:
-            self.assertIn(message, captured.output)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/capice/test_edge_cases_predict.py b/tests/capice/test_edge_cases_predict.py
index e7be997f..1a36090c 100644
--- a/tests/capice/test_edge_cases_predict.py
+++ b/tests/capice/test_edge_cases_predict.py
@@ -61,13 +61,8 @@ def test_edge_cases(self):
         self.manager.output_filename = 'edge_cases_vep_capice.tsv.gz'
         self.main.run()
         observed_output = self.get_observed_results()
-        expected_output = pd.Series(
-            [0.42409733, 0.53885114, 0.45975062, 0.44440997, 0.56147087, 0.571394]
-        ).astype(np.float64).rename('score')
-        # rtol = atol = 0.0005, because 0.5 * 10 ** -3 = 0.0005 for a tolerance of 3 decimals
-        pd.testing.assert_series_equal(
-            observed_output['score'], expected_output, check_exact=False, rtol=0.0005, atol=0.0005
-        )
+        self.assertGreater(observed_output['score'].sum(), 0)
+        self.assertFalse(observed_output['score'].hasnans)
 
     def test_symbolic_alleles(self):
         print('Symbolic alleles')
@@ -75,13 +70,8 @@ def test_symbolic_alleles(self):
         self.manager.output_filename = 'symbolic_alleles_vep_capice.tsv.gz'
         self.main.run()
         observed_output = self.get_observed_results()
-        expected_output = pd.Series(
-            [0.42409733, 0.44440997, 0.55765855, 0.41767898, 0.4985433, 0.42409733]
-        ).astype(np.float64).rename('score')
-        # rtol = atol = 0.0005, because 0.5 * 10 ** -3 = 0.0005 for a tolerance of 3 decimals
-        pd.testing.assert_series_equal(
-            observed_output['score'], expected_output, check_exact=False, rtol=0.0005, atol=0.0005
-        )
+        self.assertGreater(observed_output['score'].sum(), 0)
+        self.assertFalse(observed_output['score'].hasnans)
 
     def test_breakpoints(self):
         print('Breakpoints')
@@ -89,13 +79,8 @@ def test_breakpoints(self):
         self.manager.output_filename = 'breakends_vep_capice.tsv.gz'
         self.main.run()
         observed_output = self.get_observed_results()
-        expected_output = pd.Series(
-            [0.517514, 0.42409733, 0.45975062, 0.571394, 0.4985433, 0.44440997]
-        ).astype(np.float64).rename('score')
-        # rtol = atol = 0.0005, because 0.5 * 10 ** -3 = 0.0005 for a tolerance of 3 decimals
-        pd.testing.assert_series_equal(
-            observed_output['score'], expected_output, check_exact=False, rtol=0.0005, atol=0.0005
-        )
+        self.assertGreater(observed_output['score'].sum(), 0)
+        self.assertFalse(observed_output['score'].hasnans)
 
 
 if __name__ == '__main__':
diff --git a/tests/capice/test_main_train.py b/tests/capice/test_main_train.py
index fc7d8118..3e0f8f8b 100644
--- a/tests/capice/test_main_train.py
+++ b/tests/capice/test_main_train.py
@@ -1,3 +1,4 @@
+import json
 import os
 import pickle
 import unittest
@@ -29,7 +30,7 @@ def setUp(self):
         train_file = os.path.join(_project_root_directory, 'resources', 'train_input.tsv.gz')
         impute_json = os.path.join(_project_root_directory,
                                    'resources',
-                                   'train_impute_values.json')
+                                   'train_features.json')
         self.main = CapiceTrain(input_path=train_file,
                                 json_path=impute_json,
                                 test_split=0.2,
@@ -138,6 +139,53 @@ def test__set_eval_set(self):
         pd.testing.assert_series_equal(test_set['binarized_label'], eval_set[0][1])
         self.assertEqual(2, len(eval_set[0]))
 
+    def test_processed_features(self):
+        with open(
+                os.path.join(
+                    _project_root_directory, 'tests', 'resources', 'features_test.json'
+                ), 'rt'
+        ) as fh:
+            features = json.load(fh)
+        dataset = pd.DataFrame(
+            {
+                'unused_feature_1': [1, 2, 3],
+                'feature_1': ['foo', 'bar', 'baz'],
+                'unused_feature_2': [3, 4, 5],
+                'feature_foobarbaz': ['bar', 'baz', 'foo'],
+                'feature_3_cat1': [10, 20, 30],
+                'feature_3_cat2': [10, 20, 30],
+                'feature_3_cat3': [10, 20, 30]
+            }
+        )
+        self.main._get_processed_features(dataset, features.keys())
+        self.assertSetEqual(
+            {'feature_1',
+             'feature_foobarbaz',
+             'feature_3_cat1',
+             'feature_3_cat2',
+             'feature_3_cat3'},
+            set(self.main.processed_features)
+        )
+
+    def test_full_processed_features(self):
+        loaded_dataset = pd.DataFrame(
+            {
+                'ref': ['C', 'GC'],
+                'alt': ['A', 'G'],
+                'PolyPhen': [0.1, 0.01],
+                'Sift': [0.1, 0.01],
+                'Other_feature': ['foo', 'bar']
+            }
+        )
+        processed_data = self.main.process(loaded_dataset)
+        with open(self.main.json_path, 'rt') as fh:
+            features = json.load(fh).keys()
+        self.main._get_processed_features(processed_data, features)
+        self.assertSetEqual(
+            {'ref', 'alt', 'Length', 'Type', 'PolyPhenVal', 'PolyPhenCat'},
+            set(self.main.processed_features)
+        )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/capice/utilities/test_capice_imputing.py b/tests/capice/utilities/test_capice_imputing.py
deleted file mode 100644
index 8f0c36d2..00000000
--- a/tests/capice/utilities/test_capice_imputing.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import unittest
-
-import pandas as pd
-
-from molgenis.capice.utilities.capice_imputing import CapiceImputing
-
-
-class TestInputProcessor(unittest.TestCase):
-
-    @classmethod
-    def setUp(cls):
-        print('Setting up.')
-        cls.imputing = CapiceImputing({})
-
-    def test__calculate_percentage(self):
-        actual = self.imputing._calculate_percentage(10, 100)
-        self.assertEqual(10, actual)
-
-    def test__correct_dtypes(self):
-        input_data_frame = pd.DataFrame(
-            {'chr': [1, 2, 4], 'test': ['1', '2', '3'], 6: [1, 2, 3]})
-        self.imputing.impute_values = {'test': 1, 6: 'test'}
-        self.imputing._correct_dtypes(input_data_frame)
-        self.assertEqual({'test': float, 6: str}, self.imputing.pre_dtypes)
-        self.assertEqual({'test': int, 6: str}, self.imputing.dtypes)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/capice/utilities/test_imputer.py b/tests/capice/utilities/test_imputer.py
deleted file mode 100644
index a9957bca..00000000
--- a/tests/capice/utilities/test_imputer.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import unittest
-
-from tests.capice.test_templates import set_up_impute_preprocess, teardown
-
-
-class TestImputer(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        print('Setting up.')
-        cls.main, cls.model = set_up_impute_preprocess()
-
-    @classmethod
-    def tearDownClass(cls):
-        print('Tearing down.')
-        teardown()
-
-    def setUp(self):
-        print('Testing case:')
-
-    def test_unit_imputation_file(self):
-        """
-        Unit test for imputation to be called with only the file header
-        information.
-        """
-        print('Imputing (unit) (file)')
-        self.main.impute(
-            loaded_data=self.main.process(
-                self.main._load_file()
-            ), impute_values=self.model.impute_values
-        )
-
-    def test_component_imputation(self):
-        """
-        component test for the imputer to see if there are any gaps after the
-        imputer has processed the data.
-        """
-        print('Imputing (component)')
-        imputed_file = self.main.impute(
-            loaded_data=self.main.process(
-                self.main._load_file()
-            ), impute_values=self.model.impute_values
-        )
-        self.assertFalse(imputed_file[self.model.impute_values.keys()].isnull().values.any())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/capice/utilities/test_manual_vep_processor.py b/tests/capice/utilities/test_manual_vep_processor.py
index b85dd81c..1399970e 100644
--- a/tests/capice/utilities/test_manual_vep_processor.py
+++ b/tests/capice/utilities/test_manual_vep_processor.py
@@ -87,7 +87,9 @@ def test_component_annotator(self):
                 'is_feature_truncation': {0: 0, 1: 0},
                 'is_splice_donor_5th_base_variant': {0: 0, 1: 0},
                 'is_splice_donor_region_variant': {0: 0, 1: 0},
-                'is_splice_polypyrimidine_tract_variant': {0: 0, 1: 0}
+                'is_splice_polypyrimidine_tract_variant': {0: 0, 1: 0},
+                'Type': {0: 'SNV', 1: 'SNV'},
+                'Length': {0: 0, 1: 0}
             }
         )
         expected_outcome = pd.concat(
diff --git a/tests/capice/utilities/test_predict.py b/tests/capice/utilities/test_predict.py
index c3641b8d..a9262dce 100644
--- a/tests/capice/utilities/test_predict.py
+++ b/tests/capice/utilities/test_predict.py
@@ -25,10 +25,8 @@ def test_unit_prediction(self):
         print('Prediction (unit)')
         self.main.predict(
             self.main.preprocess(
-                self.main.impute(
-                    self.main.process(
-                        self.main._load_file()
-                    ), impute_values=self.model.impute_values
+                self.main.process(
+                    self.main._load_file()
                 ), model_features=self.model.get_booster().feature_names
             )
         )
@@ -41,10 +39,8 @@ def test_component_prediction(self):
         print('Prediction (component)')
         prediction = self.main.predict(
             self.main.preprocess(
-                self.main.impute(
-                    self.main.process(
-                        self.main._load_file()
-                    ), impute_values=self.model.impute_values
+                self.main.process(
+                    self.main._load_file()
                 ), model_features=self.model.get_booster().feature_names
             )
         )
diff --git a/tests/capice/utilities/test_predictor.py b/tests/capice/utilities/test_predictor.py
index e5c99484..d031b9ed 100644
--- a/tests/capice/utilities/test_predictor.py
+++ b/tests/capice/utilities/test_predictor.py
@@ -14,22 +14,15 @@ def setUpClass(cls):
         main, model = set_up_impute_preprocess()
         cls.predictor = Predictor(model)
         cls.dataset = main.preprocess(
-            main.impute(
-                main.process(
-                    main._load_file()
-                ), impute_values=model.impute_values
+            main.process(
+                main._load_file()
             ), model_features=model.get_booster().feature_names
         )
 
     def test_predict(self):
         observed = self.predictor.predict(self.dataset)
-        expected = pd.Series(
-            [0.20261085, 0.4030959, 0.5546794, 0.71313614]
-        ).astype(np.float32).rename('score')
-        # rtol = atol = 0.0005, because 0.5 * 10 ** -3 = 0.0005 for a tolerance of 3 decimals
-        pd.testing.assert_series_equal(
-            expected, observed['score'], check_exact=False, atol=0.0005, rtol=0.0005
-        )
+        self.assertGreater(observed['score'].sum(), 0)
+        self.assertFalse(observed['score'].hasnans)
 
 
 if __name__ == '__main__':
diff --git a/tests/capice/utilities/test_preprocessing.py b/tests/capice/utilities/test_preprocessing.py
index 4ff102ea..2c1f38c6 100644
--- a/tests/capice/utilities/test_preprocessing.py
+++ b/tests/capice/utilities/test_preprocessing.py
@@ -24,11 +24,9 @@ def test_unit_preprocessing_file(self):
         """
         print('Preprocessing (unit) (file)')
         self.main.preprocess(
-            loaded_data=self.main.impute(
-                loaded_data=self.main.process(
+            loaded_data=self.main.process(
                     self.main._load_file()
-                ), impute_values=self.model.impute_values
-            ), model_features=self.model.get_booster().feature_names
+                ), model_features=self.model.get_booster().feature_names
         )
 
     def test_component_preprocessing(self):
@@ -41,10 +39,8 @@ def test_component_preprocessing(self):
         """
         print('Preprocessing (component)')
         processed_file = self.main.preprocess(
-            self.main.impute(
-                self.main.process(
-                    self.main._load_file()
-                ), impute_values=self.model.impute_values
+            self.main.process(
+                self.main._load_file()
             ), model_features=self.model.get_booster().feature_names
         )
         model_features = self.model.get_booster().feature_names
@@ -64,17 +60,15 @@ def test_component_preprocessing_train(self):
         """
         print('Preprocessing (train) (component)')
         preprocessed_file = self.main.preprocess(
-            self.main.impute(
-                self.main.process(
-                    self.main._load_file()
-                ), impute_values=self.model.impute_values
+            self.main.process(
+                self.main._load_file()
             )
         )
 
         # Test if all columns matching,
         # or starting with features within the imputing
         # file are not classified objects.
-        impute_features = self.model.impute_values.keys()
+        impute_features = self.model.model_features
         processed_columns = preprocessed_file.columns
         present_features = 1
         # Should be one, since the for loop quits before
diff --git a/tests/resources/VEP104.json b/tests/resources/VEP104.json
index de28ff1c..3dbe85e5 100644
--- a/tests/resources/VEP104.json
+++ b/tests/resources/VEP104.json
@@ -1,95 +1,95 @@
 {
-    "Ref": "N",
-    "Alt": "N",
-    "Consequence": "UNKNOWN",
-    "GC": 0.42,
-    "CpG": 0.02,
-    "motifECount": 0,
-    "motifEScoreChng": 0.0,
-    "motifEHIPos": 0,
-    "oAA": "unknown",
-    "nAA": "unknown",
-    "cDNApos": 0.0,
-    "relcDNApos": 0.0,
-    "CDSpos": 0.0,
-    "relCDSpos": 0.0,
-    "protPos": 0.0,
-    "relProtPos": 0.0,
-    "Domain": "UD",
-    "Dst2Splice": 0.0,
-    "Dst2SplType": "unknown",
-    "minDistTSS": 5.5,
-    "minDistTSE": 5.5,
-    "SIFTcat": "UD",
-    "SIFTval": 0.0,
-    "PolyPhenCat": "unknown",
-    "PolyPhenVal": 0.0,
-    "priPhCons": 0.115,
-    "mamPhCons": 0.079,
-    "verPhCons": 0.094,
-    "priPhyloP": -0.033,
-    "mamPhyloP": -0.038,
-    "verPhyloP": 0.017,
-    "bStatistic": 800,
-    "targetScan": 0,
-    "mirSVR-Score": 0.0,
-    "mirSVR-E": 0.0,
-    "mirSVR-Aln": 0,
-    "cHmmTssA": 0.0667,
-    "cHmmTssAFlnk": 0.0667,
-    "cHmmTxFlnk": 0.0667,
-    "cHmmTx": 0.0667,
-    "cHmmTxWk": 0.0667,
-    "cHmmEnhG": 0.0667,
-    "cHmmEnh": 0.0667,
-    "cHmmZnfRpts": 0.0667,
-    "cHmmHet": 0.667,
-    "cHmmTssBiv": 0.667,
-    "cHmmBivFlnk": 0.0667,
-    "cHmmEnhBiv": 0.0667,
-    "cHmmReprPC": 0.0667,
-    "cHmmReprPCWk": 0.0667,
-    "cHmmQuies": 0.0667,
-    "GerpRS": 0.0,
-    "GerpRSpval": 0.0,
-    "GerpN": 1.91,
-    "GerpS": -0.2,
-    "TFBS": 0.0,
-    "TFBSPeaks": 0.0,
-    "TFBSPeaksMax": 0.0,
-    "tOverlapMotifs": 0.0,
-    "motifDist": 0.0,
-    "Segway": "unknown",
-    "EncH3K27Ac": 0.0,
-    "EncH3K4Me1": 0.0,
-    "EncH3K4Me3": 0.0,
-    "EncExp": 0.0,
-    "EncNucleo": 0.0,
-    "EncOCC": 5,
-    "EncOCCombPVal": 0.0,
-    "EncOCDNasePVal": 0.0,
-    "EncOCFairePVal": 0.0,
-    "EncOCpolIIPVal": 0.0,
-    "EncOCctcfPVal": 0.0,
-    "EncOCmycPVal": 0.0,
-    "EncOCDNaseSig": 0.0,
-    "EncOCFaireSig": 0.0,
-    "EncOCpolIISig": 0.0,
-    "EncOCctcfSig": 0.0,
-    "EncOCmycSig": 0.0,
-    "Grantham": 0.0,
-    "Dist2Mutation": 0.0,
-    "Freq100bp": 0,
-    "Rare100bp": 0,
-    "Sngl100bp": 0,
-    "Freq1000bp": 0,
-    "Rare1000bp": 0,
-    "Sngl1000bp": 0,
-    "Freq10000bp": 0,
-    "Rare10000bp": 0,
-    "Sngl10000bp": 0,
-    "dbscSNV-ada_score": 0.0,
-    "dbscSNV-rf_score": 0.0,
+    "Ref": null,
+    "Alt": null,
+    "Consequence": null,
+    "GC": null,
+    "CpG": null,
+    "motifECount": null,
+    "motifEScoreChng": null,
+    "motifEHIPos": null,
+    "oAA": null,
+    "nAA": null,
+    "cDNApos": null,
+    "relcDNApos": null,
+    "CDSpos": null,
+    "relCDSpos": null,
+    "protPos": null,
+    "relProtPos": null,
+    "Domain": null,
+    "Dst2Splice": null,
+    "Dst2SplType": null,
+    "minDistTSS": null,
+    "minDistTSE": null,
+    "SIFTcat": null,
+    "SIFTval": null,
+    "PolyPhenCat": null,
+    "PolyPhenVal": null,
+    "priPhCons": null,
+    "mamPhCons": null,
+    "verPhCons": null,
+    "priPhyloP": null,
+    "mamPhyloP": null,
+    "verPhyloP": null,
+    "bStatistic": null,
+    "targetScan": null,
+    "mirSVR-Score": null,
+    "mirSVR-E": null,
+    "mirSVR-Aln": null,
+    "cHmmTssA": null,
+    "cHmmTssAFlnk": null,
+    "cHmmTxFlnk": null,
+    "cHmmTx": null,
+    "cHmmTxWk": null,
+    "cHmmEnhG": null,
+    "cHmmEnh": null,
+    "cHmmZnfRpts": null,
+    "cHmmHet": null,
+    "cHmmTssBiv": null,
+    "cHmmBivFlnk": null,
+    "cHmmEnhBiv": null,
+    "cHmmReprPC": null,
+    "cHmmReprPCWk": null,
+    "cHmmQuies": null,
+    "GerpRS": null,
+    "GerpRSpval": null,
+    "GerpN": null,
+    "GerpS": null,
+    "TFBS": null,
+    "TFBSPeaks": null,
+    "TFBSPeaksMax": null,
+    "tOverlapMotifs": null,
+    "motifDist": null,
+    "Segway": null,
+    "EncH3K27Ac": null,
+    "EncH3K4Me1": null,
+    "EncH3K4Me3": null,
+    "EncExp": null,
+    "EncNucleo": null,
+    "EncOCC": null,
+    "EncOCCombPVal": null,
+    "EncOCDNasePVal": null,
+    "EncOCFairePVal": null,
+    "EncOCpolIIPVal": null,
+    "EncOCctcfPVal": null,
+    "EncOCmycPVal": null,
+    "EncOCDNaseSig": null,
+    "EncOCFaireSig": null,
+    "EncOCpolIISig": null,
+    "EncOCctcfSig": null,
+    "EncOCmycSig": null,
+    "Grantham": null,
+    "Dist2Mutation": null,
+    "Freq100bp": null,
+    "Rare100bp": null,
+    "Sngl100bp": null,
+    "Freq1000bp": null,
+    "Rare1000bp": null,
+    "Sngl1000bp": null,
+    "Freq10000bp": null,
+    "Rare10000bp": null,
+    "Sngl10000bp": null,
+    "dbscSNV-ada_score": null,
+    "dbscSNV-rf_score": null,
     "Type": null,
     "Length": null
 }
\ No newline at end of file
diff --git a/tests/resources/features_test.json b/tests/resources/features_test.json
new file mode 100644
index 00000000..37512938
--- /dev/null
+++ b/tests/resources/features_test.json
@@ -0,0 +1,5 @@
+{
+  "feature_1": null,
+  "feature_foobarbaz": null,
+  "feature_3": null
+}
\ No newline at end of file
diff --git a/tests/resources/xgb_booster_poc.pickle.dat b/tests/resources/xgb_booster_poc.pickle.dat
index 37bf6e04..318d4582 100644
Binary files a/tests/resources/xgb_booster_poc.pickle.dat and b/tests/resources/xgb_booster_poc.pickle.dat differ