diff --git a/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java b/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java
index c91ebde..764e953 100644
--- a/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java
+++ b/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java
@@ -24,6 +24,7 @@
 import edu.kit.kastel.mcse.ardoco.core.execution.runner.AnonymousRunner;
 import edu.kit.kastel.mcse.ardoco.core.pipeline.AbstractPipelineStep;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
+import edu.kit.kastel.mcse.ardoco.core.tests.eval.HistoricProject;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.helper.StageTest;
 import edu.kit.kastel.mcse.ardoco.tlr.text.providers.TextPreprocessingAgent;
@@ -40,9 +41,7 @@ protected TextExtractionResult runComparable(TextProject project, SortedMap<Stri
         var wordAbbreviations = DataRepositoryHelper.getTextState(dataRepository).getWordAbbreviations();
         var phraseAbbreviations = DataRepositoryHelper.getTextState(dataRepository).getPhraseAbbreviations();
 
-        var result = new TextExtractionResult(wordAbbreviations, phraseAbbreviations);
-
-        return result;
+        return new TextExtractionResult(wordAbbreviations, phraseAbbreviations);
     }
 
     @Override
@@ -87,14 +86,6 @@ void evaluateNonHistoricalDiagramRecognition(TextProject project) {
         runComparable(project);
     }
 
-    @DisplayName("Evaluate Text Extraction (Historical)")
-    @ParameterizedTest(name = "{0}")
-    @EnumSource(value = TextProject.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
-    @Order(2)
-    void evaluateHistoricalDiagramRecognition(TextProject project) {
-        runComparable(project);
-    }
-
     public enum TextProject implements GoldStandardProject {
         MEDIASTORE(//
                 Project.MEDIASTORE, //
@@ -117,26 +108,26 @@ public enum TextProject implements GoldStandardProject {
                         "FreeSWITCH Event Socket Layer" }), new Disambiguation("SVG", new String[] { "scalable vector graphics" }))  //
         ), //
         TEASTORE_HISTORICAL( //
-                Project.TEASTORE_HISTORICAL, //
+                HistoricProject.TEASTORE_HISTORICAL, //
                 List.of(new Disambiguation("REST", new String[] { "representational state transfer" }), new Disambiguation("JSP", new String[] {
                         "Java Server Page" }), new Disambiguation("JSPs", new String[] { "Java Server Pages" }), new Disambiguation("OPEN.xtrace",
                                 new String[] { "Open Execution Trace " + "Exchange" })) //
         ), //
         TEAMMATES_HISTORICAL( //
-                Project.TEAMMATES_HISTORICAL, //
+                HistoricProject.TEAMMATES_HISTORICAL, //
                 List.of(new Disambiguation("GAE", new String[] { "Google App Engine" }), new Disambiguation("JSP", new String[] { "Java Server Pages" }),
                         new Disambiguation("POJOs", new String[] { "Plain Old Java Objects" }), new Disambiguation("CRUD", new String[] {
                                 "Create Read Update Delete" })) //
         ), //
         BIGBLUEBUTTON_HISTORICAL( //
-                Project.BIGBLUEBUTTON_HISTORICAL, //
+                HistoricProject.BIGBLUEBUTTON_HISTORICAL, //
                 List.of(new Disambiguation("LMS", new String[] { "learning management system" })) //
         );
 
-        private final Project project;
+        private final GoldStandardProject project;
         private final ImmutableList<Disambiguation> disambiguations;
 
-        TextProject(Project project, List<Disambiguation> disambiguations) {
+        TextProject(GoldStandardProject project, List<Disambiguation> disambiguations) {
             this.project = project;
             this.disambiguations = Lists.immutable.ofAll(disambiguations);
         }
diff --git a/tests/integration-tests/tests-base/pom.xml b/tests/integration-tests/tests-base/pom.xml
index ebad13e..279977a 100644
--- a/tests/integration-tests/tests-base/pom.xml
+++ b/tests/integration-tests/tests-base/pom.xml
@@ -17,6 +17,12 @@
       <groupId>com.tngtech.archunit</groupId>
       <artifactId>archunit-junit5</artifactId>
     </dependency>
+    <dependency>
+      <groupId>io.github.ardoco</groupId>
+      <artifactId>metrics</artifactId>
+      <version>0.1.1-SNAPSHOT</version>
+      <scope>compile</scope>
+    </dependency>
     <dependency>
       <groupId>io.github.ardoco.core</groupId>
       <artifactId>common</artifactId>
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java
index ba2a76c..7adba91 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java
@@ -3,17 +3,14 @@
 
 import java.util.Locale;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 import org.eclipse.collections.api.collection.ImmutableCollection;
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.ImmutableList;
 import org.slf4j.Logger;
 
 import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
+import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;
 
 /**
  * This utility class provides methods for running the tests, especially regarding the evaluations.
@@ -37,20 +34,18 @@ public static <T> EvaluationResults<T> compareTLR(ArDoCoResult arDoCoResult, Imm
         Set<T> distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
         Set<T> distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());
 
-        // True Positives are the trace links that are contained on both lists
-        Set<T> truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
-        ImmutableList<T> truePositivesList = Lists.immutable.ofAll(truePositives);
+        int sentences = arDoCoResult.getText().getSentences().size();
+        int modelElements = 0;
+        for (var model : arDoCoResult.getModelIds()) {
+            modelElements += arDoCoResult.getModelState(model).getInstances().size();
+        }
 
-        // False Positives are the trace links that are only contained in the result set
-        Set<T> falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
-        ImmutableList<T> falsePositivesList = Lists.immutable.ofAll(falsePositives);
+        int confusionMatrixSum = sentences * modelElements;
 
-        // False Negatives are the trace links that are only contained in the gold standard
-        Set<T> falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
-        ImmutableList<T> falseNegativesList = Lists.immutable.ofAll(falseNegatives);
+        var calculator = ClassificationMetricsCalculator.getInstance();
 
-        int trueNegatives = TestUtil.calculateTrueNegativesForTLR(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
-        return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
+        var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
+        return new EvaluationResults<>(classification);
     }
 
     /**
@@ -67,57 +62,11 @@ public static <T> EvaluationResults<T> compareInconsistencies(ArDoCoResult arDoC
         Set<T> distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
         Set<T> distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());
 
-        // True Positives are the trace links that are contained on both lists
-        Set<T> truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
-        ImmutableList<T> truePositivesList = Lists.immutable.ofAll(truePositives);
-
-        // False Positives are the trace links that are only contained in the result set
-        Set<T> falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
-        ImmutableList<T> falsePositivesList = Lists.immutable.ofAll(falsePositives);
-
-        // False Negatives are the trace links that are only contained in the gold standard
-        Set<T> falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
-        ImmutableList<T> falseNegativesList = Lists.immutable.ofAll(falseNegatives);
-
-        int trueNegatives = TestUtil.calculateTrueNegativesForInconsistencies(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
-        return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
-    }
-
-    /**
-     * Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
-     * total sum of all entries in the confusion matrix and then substracts the true positives, false positives, and false negatives.
-     *
-     * @param arDoCoResult   the output of ArDoCo
-     * @param truePositives  nr of true positives
-     * @param falsePositives nr of false positives
-     * @param falseNegatives nr of false negatives
-     * @return the number of true negatives
-     */
-    public static int calculateTrueNegativesForTLR(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
-        int sentences = arDoCoResult.getText().getSentences().size();
-        int modelElements = 0;
-        for (var model : arDoCoResult.getModelIds()) {
-            modelElements += arDoCoResult.getModelState(model).getInstances().size();
-        }
-
-        int confusionMatrixSum = sentences * modelElements;
-        return confusionMatrixSum - (truePositives + falsePositives + falseNegatives);
-    }
-
-    /**
-     * Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
-     * total sum of all sentences in the {@link ArDoCoResult} and then substracts the true positives, false positives, and false negatives.
-     *
-     * @param arDoCoResult   the output of ArDoCo
-     * @param truePositives  nr of true positives
-     * @param falsePositives nr of false positives
-     * @param falseNegatives nr of false negatives
-     * @return the number of true negatives
-     */
-    public static int calculateTrueNegativesForInconsistencies(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
-        int numberOfSentences = arDoCoResult.getText().getSentences().size();
-        return numberOfSentences - (truePositives + falsePositives + falseNegatives);
+        int confusionMatrixSum = arDoCoResult.getText().getSentences().size();
 
+        var calculator = ClassificationMetricsCalculator.getInstance();
+        var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
+        return new EvaluationResults<>(classification);
     }
 
     /**
@@ -168,19 +117,6 @@ public static void logExplicitResults(Logger logger, String name, EvaluationResu
         logger.info(logString);
     }
 
-    /**
-     * Log the provided {@link EvaluationResults} using the provided logger and name. Additionally, provided the expected results.
-     *
-     * @param logger          Logger to use
-     * @param name            Name to show in the output
-     * @param results         the results
-     * @param expectedResults the expected results
-     */
-    public static void logResultsWithExpected(Logger logger, String name, EvaluationResults<?> results, ExpectedResults expectedResults) {
-        var infoString = String.format(Locale.ENGLISH, "%n%s:%n%s", name, results.getResultStringWithExpected(expectedResults));
-        logger.info(infoString);
-    }
-
     public static void logExtendedResultsWithExpected(Logger logger, Object testClass, String name, EvaluationResults<?> results,
             ExpectedResults expectedResults) {
         var infoString = String.format(Locale.ENGLISH, """
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java
index ac86729..6dfab30 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java
@@ -50,7 +50,7 @@ public class DeterministicArDoCoTest {
 
     @ArchTest
     public static final ArchRule forbidUnorderedSetsAndMaps = noClasses().that()
-            .resideOutsideOfPackages("..tests..")
+            .resideOutsideOfPackages("..tests..", "..metrics..")
             .and(areNotDirectlyAnnotatedWith(Deterministic.class))
             .should()
             .accessClassesThat(areForbiddenClasses())
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java
index 31255a6..bab51bb 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java
@@ -5,6 +5,7 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UncheckedIOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.List;
@@ -159,8 +160,7 @@ public String getCodeModelDirectory() {
             loadCodeModelFromResourcesIfNeeded();
             return getTemporaryCodeLocation().getAbsolutePath();
         } catch (IOException e) {
-            logger.error(e.getMessage(), e);
-            return null;
+            throw new UncheckedIOException(e);
         }
     }
 
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java
deleted file mode 100644
index cf9c0a2..0000000
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java
+++ /dev/null
@@ -1,212 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval;
-
-import java.math.BigDecimal;
-import java.math.MathContext;
-
-public class EvaluationMetrics {
-    private EvaluationMetrics() throws IllegalAccessException {
-        throw new IllegalAccessException();
-    }
-
-    /**
-     * Checks the provided recall. Returns 1.0 if it is NaN, because this means that there was no missing
-     * classification.
-     *
-     * @param recall the precision
-     * @return 1.0 if recall is NaN, else the original value
-     */
-    public static double checkAndRepairRecall(double recall) {
-        if (Double.isNaN(recall)) {
-            return 1.0;
-        }
-        return recall;
-    }
-
-    /**
-     * Calculates the recall for the given True Positives (TPs) and False Negatives (FNs). If TP+NP=0, then returns 1.0
-     * because there was no missing element.
-     *
-     * @param truePositives  number of TPs
-     * @param falseNegatives number of FNs
-     * @return the Recall; 1.0 iff TP+NP=0
-     */
-    public static double calculateRecall(int truePositives, int falseNegatives) {
-        double denominator = (truePositives + falseNegatives);
-        var recall = 1.0 * truePositives / denominator;
-        return checkAndRepairRecall(recall);
-    }
-
-    /**
-     * Checks the provided precision. Returns 1.0 if it is NaN, because this means that there was no wrong
-     * classification.
-     *
-     * @param precision the precision
-     * @return 1.0 if precision is NaN, else the original value
-     */
-    public static double checkAndRepairPrecision(double precision) {
-        if (Double.isNaN(precision)) {
-            return 1.0;
-        }
-        return precision;
-    }
-
-    /**
-     * Calculates the precision for the given True Positives (TPs) and False Positives (FPs). If TP+FP=0, then returns
-     * 1.0 because there was no wrong classification.
-     *
-     * @param truePositives  number of TPs
-     * @param falsePositives number of FPs
-     * @return the Precision; 1.0 iff TP+FP=0
-     */
-    public static double calculatePrecision(int truePositives, int falsePositives) {
-        double denominator = (truePositives + falsePositives);
-        var precision = 1.0 * truePositives / denominator;
-        return checkAndRepairPrecision(precision);
-    }
-
-    /**
-     * Checks the provided F1-score. Iff it is NaN, returns 0.0, otherwise returns the original value
-     *
-     * @param f1 the f1-score to check
-     * @return Iff score is NaN, returns 0.0, otherwise returns the original value
-     */
-    public static double checkAndRepairF1(double f1) {
-        if (Double.isNaN(f1)) {
-            return 0.0;
-        }
-        return f1;
-    }
-
-    /**
-     * Calculates the F1-score using the provided precision and recall. If precision+recall=0, returns 0.0.
-     *
-     * @param precision the precision
-     * @param recall    the recall
-     * @return the F1-Score; 0.0 iff precision+recall=0
-     */
-    public static double calculateF1(double precision, double recall) {
-        var f1 = 2 * precision * recall / (precision + recall);
-        return checkAndRepairF1(f1);
-    }
-
-    /**
-     * Calculates the F1-score using the provided True Positives (TPs), False Positives (FPs), and False Negatives
-     * (FNs). If intermediate calculation shows that precision+recall=0, returns 0.0.
-     *
-     * @param truePositives  number of TPs
-     * @param falsePositives number of FPs
-     * @param falseNegatives number of FNs
-     * @return the F1-score. See also {@link #calculateF1(double, double)}
-     */
-    public static double calculateF1(int truePositives, int falsePositives, int falseNegatives) {
-        var precision = calculatePrecision(truePositives, falsePositives);
-        var recall = calculateRecall(truePositives, falseNegatives);
-        return calculateF1(precision, recall);
-    }
-
-    /**
-     * Calculates the accuracy based on the true positives, false positives, false negatives, and true negatives.
-     *
-     * @see <a href="https://en.wikipedia.org/wiki/Accuracy_and_precision">Wikipedia: Accuracy and Precision</a>
-     * @return the accuracy
-     */
-    public static double calculateAccuracy(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
-        double numerator = truePositives + trueNegatives;
-        double denominator = truePositives + falsePositives + falseNegatives + trueNegatives;
-        return numerator / denominator;
-    }
-
-    /**
-     * Returns the Phi Coefficient (also known as mean square contingency coefficient (MCC)) based on the true positives, false positives, false negatives, and
-     * true negatives.
-     * The return value lies between -1 and +1. -1 show perfect disagreement, +1 shows perfect agreement and 0 indicates no relationship.
-     * Therefore, good values should be close to +1.
-     *
-     * @see <a href="https://en.wikipedia.org/wiki/Phi_coefficient">Wikipedia: Phi coefficient</a>
-     *
-     * @return the value for Phi Coefficient (or MCC)
-     */
-    public static double calculatePhiCoefficient(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
-        var tp = BigDecimal.valueOf(truePositives);
-        var fp = BigDecimal.valueOf(falsePositives);
-        var fn = BigDecimal.valueOf(falseNegatives);
-        var tn = BigDecimal.valueOf(trueNegatives);
-
-        var num = tp.multiply(tn).subtract((fp.multiply(fn)));
-
-        var a = tp.add(fp);
-        var b = tp.add(fn);
-        var c = tn.add(fp);
-        var d = tn.add(fn);
-        if (a.equals(BigDecimal.ZERO) || b.equals(BigDecimal.ZERO) || c.equals(BigDecimal.ZERO) || d.equals(BigDecimal.ZERO)) {
-            return 0d;
-        }
-
-        var productOfSumsInDenominator = a.multiply(b).multiply(c).multiply(d);
-        var denominator = productOfSumsInDenominator.sqrt(MathContext.DECIMAL128);
-
-        return num.divide(denominator, MathContext.DECIMAL128).doubleValue();
-    }
-
-    /**
-     * Calculates the maximum possible value of the phi coefficient given the four values of the confusion matrix (TP, FP, FN, TN).
-     *
-     * @see <a href="https://link.springer.com/article/10.1007/BF02288588">Paper about PhiMax by Ferguson (1941)</a>
-     * @see <a href="https://journals.sagepub.com/doi/abs/10.1177/001316449105100403">Paper about Phi/PhiMax by Davenport et al. (1991)</a>
-     * @param truePositives  number of true positives
-     * @param falsePositives number of false positives
-     * @param falseNegatives number of false negatives
-     * @param trueNegatives  number of true negatives
-     * @return The maximum possible value of phi.
-     */
-    public static double calculatePhiCoefficientMax(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
-        var tp = BigDecimal.valueOf(truePositives);
-        var fp = BigDecimal.valueOf(falsePositives);
-        var fn = BigDecimal.valueOf(falseNegatives);
-        var tn = BigDecimal.valueOf(trueNegatives);
-
-        var test = fn.add(tp).compareTo(fp.add(tp)) >= 0;
-        var nominator = (fp.add(tn)).multiply(tp.add(fp)).sqrt(MathContext.DECIMAL128);
-        var denominator = (fn.add(tn)).multiply(tp.add(fn)).sqrt(MathContext.DECIMAL128);
-        if (test) {
-            // standard case
-            return nominator.divide(denominator, MathContext.DECIMAL128).doubleValue();
-        } else {
-            // if test is not true, you have to swap nominator and denominator as then you have to mirror the confusion matrix (,i.e., swap TP and TN)
-            return denominator.divide(nominator, MathContext.DECIMAL128).doubleValue();
-        }
-    }
-
-    /**
-     * Calculates the normalized phi correlation coefficient value that is phi divided by its maximum possible value.
-     * 
-     * @see <a href="https://journals.sagepub.com/doi/abs/10.1177/001316449105100403">Paper about Phi/PhiMax</a>
-     * @param truePositives  number of true positives
-     * @param falsePositives number of false positives
-     * @param falseNegatives number of false negatives
-     * @param trueNegatives  number of true negatives
-     * @return The value of Phi/PhiMax
-     */
-    public static double calculatePhiOverPhiMax(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
-        var phi = calculatePhiCoefficient(truePositives, falsePositives, falseNegatives, trueNegatives);
-        var phiMax = calculatePhiCoefficientMax(truePositives, falsePositives, falseNegatives, trueNegatives);
-        return phi / phiMax;
-    }
-
-    /**
-     * Calculates the specificity, also known as selectivity or true negative rate, based on the number of true negatives and false positives.
-     *
-     * @see <a href="https://en.wikipedia.org/wiki/Sensitivity_and_specificity">Wikipedia: Sensitivity and specificity</a>
-     * @param trueNegatives  the number of true negatives
-     * @param falsePositives the number of false positives
-     * @return the specificity
-     */
-    public static double calculateSpecificity(int trueNegatives, int falsePositives) {
-        double specificity = trueNegatives / ((double) trueNegatives + falsePositives);
-        if (Double.isNaN(specificity)) {
-            return 1.0;
-        }
-        return specificity;
-    }
-}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java
new file mode 100644
index 0000000..b28cc51
--- /dev/null
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java
@@ -0,0 +1,222 @@
+/* Licensed under MIT 2021-2024. */
+package edu.kit.kastel.mcse.ardoco.core.tests.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.eclipse.collections.api.factory.Lists;
+import org.eclipse.collections.api.list.ImmutableList;
+import org.eclipse.collections.api.list.MutableList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
+import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel;
+import edu.kit.kastel.mcse.ardoco.core.execution.ConfigurationHelper;
+import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
+
+/**
+ * This enum captures the different case studies that are used for evaluation in the integration tests (only historic versions).
+ */
+public enum HistoricProject implements GoldStandardProject {
+    TEASTORE_HISTORICAL( //
+            "TS-H", //
+            "/benchmark/teastore/model_2020/pcm/teastore.repository", //
+            "/benchmark/teastore/text_2018/teastore_2018_AB.txt", //
+            "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB.csv", //
+            "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+            "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB_UME.csv", //
+            new ExpectedResults(.999, .740, .850, .984, .853, .999), //
+            new ExpectedResults(.082, .983, .153, .332, .147, .291) //
+    ), //
+    TEAMMATES_HISTORICAL( //
+            "TM-H", //
+            "/benchmark/teammates/model_2021/pcm/teammates.repository", //
+            "/benchmark/teammates/text_2015/teammates_2015.txt", //
+            "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
+            "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+            "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
+            new ExpectedResults(.524, .695, .597, .970, .589, .979), //
+            new ExpectedResults(.161, .695, .262, .878, .295, .884) //
+    ), //
+    BIGBLUEBUTTON_HISTORICAL( //
+            "BBB-H", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", //
+            "/benchmark/bigbluebutton/text_2015/bigbluebutton_2015.txt", //
+            "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
+            "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+            "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
+            new ExpectedResults(.807, .617, .699, .978, .695, .993), //
+            new ExpectedResults(.048, .176, .076, .829, .018, .857) //
+    ), //
+
+    JABREF_HISTORICAL( //
+            "JR-H", "/benchmark/jabref/model_2021/pcm/jabref.repository", //
+            "/benchmark/jabref/text_2016/jabref_2016.txt", //
+            "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021.csv", //
+            "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+            "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021_UME.csv", //
+            new ExpectedResults(.817, .999, .899, .966, .886, .960), //
+            new ExpectedResults(.083, .111, .095, .683, -0.094, .784) //
+    );
+
+    private static final Logger logger = LoggerFactory.getLogger(HistoricProject.class);
+
+    private final String alias;
+    private final String model;
+    private final String textFile;
+    private final String configurationsFile;
+    private final String goldStandardTraceabilityLinkRecovery;
+    private final String goldStandardMissingTextForModelElement;
+    private final ExpectedResults expectedTraceLinkResults;
+    private final ExpectedResults expectedInconsistencyResults;
+    private final SortedSet<String> resourceNames;
+
+    HistoricProject(String alias, String model, String textFile, String goldStandardTraceabilityLinkRecovery, String configurationsFile,
+            String goldStandardMissingTextForModelElement, ExpectedResults expectedTraceLinkResults, ExpectedResults expectedInconsistencyResults) {
+        this.alias = alias;
+        this.model = model;
+        this.textFile = textFile;
+        this.configurationsFile = configurationsFile;
+        this.goldStandardTraceabilityLinkRecovery = goldStandardTraceabilityLinkRecovery;
+        this.goldStandardMissingTextForModelElement = goldStandardMissingTextForModelElement;
+        this.expectedTraceLinkResults = expectedTraceLinkResults;
+        this.expectedInconsistencyResults = expectedInconsistencyResults;
+        resourceNames = new TreeSet<>(List.of(model, textFile, goldStandardTraceabilityLinkRecovery, configurationsFile,
+                goldStandardMissingTextForModelElement));
+    }
+
+    @Override
+    public String getAlias() {
+        return alias;
+    }
+
+    @Override
+    public File getModelFile() {
+        return ProjectHelper.loadFileFromResources(model);
+    }
+
+    @Override
+    public String getModelResourceName() {
+        return model;
+    }
+
+    @Override
+    public File getModelFile(ArchitectureModelType modelType) {
+        return switch (modelType) {
+        case PCM -> getModelFile();
+        case UML -> ProjectHelper.loadFileFromResources(model.replace("/pcm/", "/uml/").replace(".repository", ".uml"));
+        };
+    }
+
+    @Override
+    public String getModelResourceName(ArchitectureModelType modelType) {
+        return switch (modelType) {
+        case PCM -> model;
+        case UML -> model.replace("/pcm/", "/uml/").replace(".repository", ".uml");
+        };
+    }
+
+    @Override
+    public File getTextFile() {
+        return ProjectHelper.loadFileFromResources(textFile);
+    }
+
+    @Override
+    public String getTextResourceName() {
+        return textFile;
+    }
+
+    @Override
+    public SortedMap<String, String> getAdditionalConfigurations() {
+        return ConfigurationHelper.loadAdditionalConfigs(getAdditionalConfigurationsFile());
+    }
+
+    @Override
+    public File getAdditionalConfigurationsFile() {
+        return ProjectHelper.loadFileFromResources(this.configurationsFile);
+    }
+
+    @Override
+    public String getAdditionalConfigurationsResourceName() {
+        return configurationsFile;
+    }
+
+    @Override
+    public File getTlrGoldStandardFile() {
+        return ProjectHelper.loadFileFromResources(goldStandardTraceabilityLinkRecovery);
+    }
+
+    @Override
+    public String getTlrGoldStandardResourceName() {
+        return goldStandardTraceabilityLinkRecovery;
+    }
+
+    @Override
+    public ImmutableList<String> getTlrGoldStandard() {
+        var path = Paths.get(this.getTlrGoldStandardFile().toURI());
+        List<String> goldLinks = Lists.mutable.empty();
+        try {
+            goldLinks = Files.readAllLines(path);
+        } catch (IOException e) {
+            logger.error(e.getMessage(), e);
+        }
+        goldLinks.removeFirst();
+        goldLinks.removeIf(String::isBlank);
+        return Lists.immutable.ofAll(goldLinks);
+    }
+
+    @Override
+    public GoldStandard getTlrGoldStandard(ArchitectureModel architectureModel) {
+        return new GoldStandard(getTlrGoldStandardFile(), architectureModel);
+    }
+
+    @Override
+    public MutableList<String> getMissingTextForModelElementGoldStandard() {
+        var path = Paths.get(this.getMissingTextForModelElementGoldStandardFile().toURI());
+        List<String> goldLinks = Lists.mutable.empty();
+        try {
+            goldLinks = Files.readAllLines(path);
+        } catch (IOException e) {
+            logger.error(e.getMessage(), e);
+        }
+        goldLinks.remove("missingModelElementID");
+        goldLinks.removeIf(String::isBlank);
+        return Lists.mutable.ofAll(goldLinks);
+    }
+
+    @Override
+    public File getMissingTextForModelElementGoldStandardFile() {
+        return ProjectHelper.loadFileFromResources(goldStandardMissingTextForModelElement);
+    }
+
+    @Override
+    public String getMissingTextForModelElementGoldStandardResourceName() {
+        return goldStandardMissingTextForModelElement;
+    }
+
+    @Override
+    public ExpectedResults getExpectedTraceLinkResults() {
+        return expectedTraceLinkResults;
+    }
+
+    @Override
+    public ExpectedResults getExpectedInconsistencyResults() {
+        return expectedInconsistencyResults;
+    }
+
+    @Override
+    public String getProjectName() {
+        return this.name();
+    }
+
+    @Override
+    public SortedSet<String> getResourceNames() {
+        return new TreeSet<>(resourceNames);
+    }
+}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java
index b2929bd..d6d6b27 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java
@@ -33,7 +33,7 @@ public enum Project implements GoldStandardProject {
             "/configurations/ms/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
             "/benchmark/mediastore/goldstandards/goldstandard_sad_2016-sam_2016_UME.csv", //
             new ExpectedResults(.999, .620, .765, .978, .778, .999), //
-            new ExpectedResults(.212, .792, .328, .702, .227, .690) //
+            new ExpectedResults(.127, .793, .220, .685, .227, .679) //
     ), //
     TEASTORE( //
             "TS", //
@@ -43,17 +43,7 @@ public enum Project implements GoldStandardProject {
             "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
             "/benchmark/teastore/goldstandards/goldstandard_sad_2020-sam_2020_UME.csv", //
             new ExpectedResults(.999, .740, .850, .984, .853, .999), //
-            new ExpectedResults(.962, .703, .784, .957, .808, .994) //
-    ), //
-    TEASTORE_HISTORICAL( //
-            "TS-H", //
-            "/benchmark/teastore/model_2020/pcm/teastore.repository", //
-            "/benchmark/teastore/text_2018/teastore_2018_AB.txt", //
-            "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB.csv", //
-            "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
-            "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB_UME.csv", //
-            new ExpectedResults(.999, .740, .850, .984, .853, .999), //
-            new ExpectedResults(.163, .982, .278, .376, .146, .289) //
+            new ExpectedResults(.950, .703, .808, .980, .808, .998) //
     ), //
     TEAMMATES( //
             "TM", //
@@ -63,17 +53,7 @@ public enum Project implements GoldStandardProject {
             "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
             "/benchmark/teammates/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", //
             new ExpectedResults(.555, .882, .681, .965, .688, .975), //
-            new ExpectedResults(.175, .745, .279, .851, .287, .851) //
-    ), //
-    TEAMMATES_HISTORICAL( //
-            "TM-H", //
-            "/benchmark/teammates/model_2021/pcm/teammates.repository", //
-            "/benchmark/teammates/text_2015/teammates_2015.txt", //
-            "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
-            "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
-            "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
-            new ExpectedResults(.524, .695, .597, .970, .589, .979), //
-            new ExpectedResults(.168, .629, .263, .863, .260, .870) //
+            new ExpectedResults(.147, .745, .245, .852, .287, .856) //
     ), //
     BIGBLUEBUTTON( //
             "BBB", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", //
@@ -82,16 +62,7 @@ public enum Project implements GoldStandardProject {
             "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
             "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", //
             new ExpectedResults(.875, .826, .850, .985, .835, .985), //
-            new ExpectedResults(.887, .461, .429, .956, .534, .984) //
-    ), //
-    BIGBLUEBUTTON_HISTORICAL( //
-            "BBB-H", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", //
-            "/benchmark/bigbluebutton/text_2015/bigbluebutton_2015.txt", //
-            "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
-            "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
-            "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
-            new ExpectedResults(.807, .617, .699, .978, .695, .993), //
-            new ExpectedResults(.085, .175, .111, .813, .018, .869) //
+            new ExpectedResults(.666, .461, .545, .960, .535, .988) //
     ), //
     JABREF( //
             "JR", "/benchmark/jabref/model_2021/pcm/jabref.repository", //
@@ -100,16 +71,7 @@ public enum Project implements GoldStandardProject {
             "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
             "/benchmark/jabref/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", //
             new ExpectedResults(.899, .999, .946, .973, .932, .966), //
-            new ExpectedResults(1.0, .443, .443, .845, .616, 1.0) //
-    ), //
-    JABREF_HISTORICAL( //
-            "JR-H", "/benchmark/jabref/model_2021/pcm/jabref.repository", //
-            "/benchmark/jabref/text_2016/jabref_2016.txt", //
-            "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021.csv", //
-            "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
-            "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021_UME.csv", //
-            new ExpectedResults(.817, .999, .899, .966, .886, .960), //
-            new ExpectedResults(.110, .110, .110, .366, -.249, .475) //
+            new ExpectedResults(1.0, .444, .615, .871, .617, 1.0) //
     );
 
     private static final Logger logger = LoggerFactory.getLogger(Project.class);
@@ -179,11 +141,7 @@ public String getTextResourceName() {
         return textFile;
     }
 
-    /**
-     * Return the map of additional configuration options
-     *
-     * @return the map of additional configuration options
-     */
+    @Override
     public SortedMap<String, String> getAdditionalConfigurations() {
         return ConfigurationHelper.loadAdditionalConfigs(getAdditionalConfigurationsFile());
     }
@@ -217,7 +175,7 @@ public ImmutableList<String> getTlrGoldStandard() {
         } catch (IOException e) {
             logger.error(e.getMessage(), e);
         }
-        goldLinks.remove(0);
+        goldLinks.removeFirst();
         goldLinks.removeIf(String::isBlank);
         return Lists.immutable.ofAll(goldLinks);
     }
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java
index d2e23c5..45a5465 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java
@@ -5,11 +5,9 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UncheckedIOException;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 /**
  * Helper class for {@link GoldStandardProject} implementations.
  */
@@ -18,7 +16,6 @@ public class ProjectHelper {
      * If set to false. The CodeProject will place the codeModel.acm file from the benchmark to the project directory.
      */
     public static final AtomicBoolean ANALYZE_CODE_DIRECTLY = new AtomicBoolean(false);
-    private static final Logger logger = LoggerFactory.getLogger(ProjectHelper.class);
 
     private ProjectHelper() {
         throw new IllegalAccessError();
@@ -33,7 +30,7 @@ private ProjectHelper() {
     public static File loadFileFromResources(String resource) {
         InputStream is = ProjectHelper.class.getResourceAsStream(resource);
         if (is == null)
-            return null;
+            throw new IllegalArgumentException("Resource not found: " + resource);
         try {
             File temporaryFile = File.createTempFile("ArDoCo", ".tmp");
             temporaryFile.deleteOnExit();
@@ -44,8 +41,7 @@ public static File loadFileFromResources(String resource) {
             }
             return temporaryFile;
         } catch (IOException e) {
-            logger.error(e.getMessage(), e);
-            return null;
+            throw new UncheckedIOException(e);
         }
     }
 }
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java
deleted file mode 100644
index 39a360b..0000000
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Licensed under MIT 2023-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval.results;
-
-import org.eclipse.collections.api.factory.Lists;
-
-/**
- * used to form the average of several {@link EvaluationResults}
- * 
- * @param <T> type of the {@link EvaluationResults}
- */
-public class EvaluationResultVector<T> {
-    private double precision = 0.0;
-    private double recall = 0.0;
-    private double f1 = 0.0;
-    private double accuracy = 0.0;
-    private double phiCoefficient = 0.0;
-    private double specificity = 0.0;
-    private double phiCoefficientMax = 0.0;
-    private double phiOverPhiMax = 0.0;
-
-    public void add(EvaluationResults<T> results) {
-        precision += results.precision();
-        recall += results.recall();
-        f1 += results.f1();
-        accuracy += results.accuracy();
-        specificity += results.specificity();
-        phiCoefficient += results.phiCoefficient();
-        phiCoefficientMax += results.phiCoefficientMax();
-        phiOverPhiMax += results.phiOverPhiMax();
-    }
-
-    public void scale(double scale) {
-        precision /= scale;
-        recall /= scale;
-        f1 /= scale;
-        accuracy /= scale;
-        specificity /= scale;
-        phiCoefficient /= scale;
-        phiCoefficientMax /= scale;
-        phiOverPhiMax /= scale;
-    }
-
-    public void addWeighted(EvaluationResults<T> results, int weight) {
-        precision += results.precision() * weight;
-        recall += results.recall() * weight;
-        f1 += results.f1() * weight;
-        accuracy += results.accuracy() * weight;
-        specificity += results.specificity() * weight;
-        phiCoefficient += results.phiCoefficient() * weight;
-        phiCoefficientMax += results.phiCoefficientMax() * weight;
-        phiOverPhiMax += results.phiOverPhiMax() * weight;
-    }
-
-    public EvaluationResults<T> toEvaluationResults() {
-        return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy,
-                phiCoefficient, specificity, phiCoefficientMax, phiOverPhiMax);
-    }
-}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java
index 9332bb8..0db56b3 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java
@@ -1,30 +1,29 @@
 /* Licensed under MIT 2023-2024. */
 package edu.kit.kastel.mcse.ardoco.core.tests.eval.results;
 
+import java.util.List;
 import java.util.Locale;
 
 import org.eclipse.collections.api.factory.Lists;
 import org.eclipse.collections.api.list.ImmutableList;
 import org.eclipse.collections.api.list.MutableList;
 
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.EvaluationMetrics;
+import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult;
 
-public record EvaluationResults<T>(double precision, double recall, double f1, ImmutableList<T> truePositives, int trueNegatives,
-                                   ImmutableList<T> falseNegatives, ImmutableList<T> falsePositives, double accuracy, double phiCoefficient, double specificity,
-                                   double phiCoefficientMax, double phiOverPhiMax) {
+public record EvaluationResults<T>(SingleClassificationResult<T> classificationResult) {
 
     public String toRow() {
         return String.format(Locale.ENGLISH, """
                 %4s & %4s & %4s & %4s & %4s & %4s & %4s
-                %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", precision, recall, f1, accuracy,
-                specificity, phiCoefficient, phiOverPhiMax);
+                %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", precision(), recall(), f1(), accuracy(),
+                specificity(), phiCoefficient(), phiOverPhiMax());
     }
 
     public String toRow(String headerKey, String headerVal) {
         return String.format(Locale.ENGLISH, """
                 %10s & %4s & %4s & %4s & %4s & %4s & %4s & %4s
-                %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal, precision,
-                recall, f1, accuracy, specificity, phiCoefficient, phiOverPhiMax);
+                %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal,
+                precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax());
     }
 
     @Override
@@ -37,7 +36,7 @@ public String toString() {
                 \tSpecificity:%6.2f
                 \tPhi Coef.:%8.2f
                 \tPhi/PhiMax:%7.2f (Phi Max: %.2f)
-                %s""", precision, recall, f1, accuracy, specificity, phiCoefficient, phiOverPhiMax, phiCoefficientMax, toRow());
+                %s""", precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax(), phiCoefficientMax(), toRow());
     }
 
     public String getResultStringWithExpected(ExpectedResults expectedResults) {
@@ -45,7 +44,7 @@ public String getResultStringWithExpected(ExpectedResults expectedResults) {
                 \tPrecision:%8.2f (min. expected: %.2f)
                 \tRecall:%11.2f (min. expected: %.2f)
                 \tF1:%15.2f (min. expected: %.2f)
-                %s""", precision, expectedResults.precision(), recall, expectedResults.recall(), f1, expectedResults.f1(), toRow());
+                %s""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults.f1(), toRow());
     }
 
     public String getExtendedResultStringWithExpected(ExpectedResults expectedResults) {
@@ -53,16 +52,17 @@ public String getExtendedResultStringWithExpected(ExpectedResults expectedResult
         outputBuilder.append(String.format(Locale.ENGLISH, """
                 \tPrecision:%8.2f (min. expected: %.2f)
                 \tRecall:%11.2f (min. expected: %.2f)
-                \tF1:%15.2f (min. expected: %.2f)""", precision, expectedResults.precision(), recall, expectedResults.recall(), f1, expectedResults.f1()));
+                \tF1:%15.2f (min. expected: %.2f)""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults
+                .f1()));
         outputBuilder.append(String.format(Locale.ENGLISH, """
 
                 \tAccuracy:%9.2f (min. expected: %.2f)
-                \tSpecificity:%6.2f (min. expected: %.2f)""", accuracy, expectedResults.accuracy(), specificity, expectedResults.specificity()));
+                \tSpecificity:%6.2f (min. expected: %.2f)""", accuracy(), expectedResults.accuracy(), specificity(), expectedResults.specificity()));
         outputBuilder.append(String.format(Locale.ENGLISH, """
 
                 \tPhi Coef.:%8.2f (min. expected: %.2f)
                 \tPhi/PhiMax:%7.2f (Phi Max: %.2f)
-                %s""", phiCoefficient, expectedResults.phiCoefficient(), phiOverPhiMax, phiCoefficientMax, toRow()));
+                %s""", phiCoefficient(), expectedResults.phiCoefficient(), phiOverPhiMax(), phiCoefficientMax(), toRow()));
         return outputBuilder.toString();
     }
 
@@ -73,64 +73,62 @@ public String getExplicitResultString() {
                 \tTN:%15d
                 \tFN:%15d
                 \tP:%16d
-                \tN:%16d""", truePositives.size(), falsePositives.size(), trueNegatives, falseNegatives.size(), truePositives.size() + falseNegatives.size(),
-                trueNegatives + falsePositives.size());
-    }
-
-    /**
-     * returns the weight (truePos + falseNeg)
-     *
-     * @return the weight
-     */
-    public int getWeight() {
-        return this.truePositives().size() + this.falseNegatives().size();
+                \tN:%16d""", truePositives().size(), falsePositives().size(), trueNegatives(), falseNegatives().size(), truePositives()
+                .size() + falseNegatives().size(), trueNegatives() + falsePositives().size());
     }
 
     public ImmutableList<T> getFound() {
         MutableList<T> found = Lists.mutable.empty();
-        found.addAll(truePositives.castToCollection());
-        found.addAll(falsePositives.castToCollection());
+        found.addAll(classificationResult.getTruePositives());
+        found.addAll(classificationResult.getFalsePositives());
         return found.toImmutable();
     }
 
-    /**
-     * creates new {@link EvaluationResults} from a {@link ResultMatrix}
-     *
-     * @param matrix the {@link ResultMatrix}
-     * @return new {@link EvaluationResults}
-     */
-    public static <T> EvaluationResults<T> createEvaluationResults(ResultMatrix<T> matrix) {
-        int nrTruePos = matrix.truePositives().size();
-        int nrTrueNeg = matrix.trueNegatives();
-        int nrFalsePos = matrix.falsePositives().size();
-        int nrFalseNeg = matrix.falseNegatives().size();
-
-        double precision = EvaluationMetrics.calculatePrecision(nrTruePos, nrFalsePos);
-        double recall = EvaluationMetrics.calculateRecall(nrTruePos, nrFalseNeg);
-        double f1 = EvaluationMetrics.calculateF1(precision, recall);
-
-        double accuracy = 0;
-        double phiCoefficient = 0;
-        double specificity = 0;
-        double phiCoefficientMax = 0;
-        double phiOverPhiMax = 0;
-
-        if (nrTruePos + nrFalsePos + nrFalseNeg + nrTrueNeg != 0) {
-            accuracy = EvaluationMetrics.calculateAccuracy(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
-        }
-        phiCoefficient = EvaluationMetrics.calculatePhiCoefficient(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
-        if (nrTrueNeg + nrFalsePos != 0) {
-            specificity = EvaluationMetrics.calculateSpecificity(nrTrueNeg, nrFalsePos);
-        }
-        if ((nrFalseNeg + nrTrueNeg) * (nrTruePos + nrFalseNeg) != 0) {
-            phiCoefficientMax = EvaluationMetrics.calculatePhiCoefficientMax(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
-        }
-        if (phiCoefficientMax != 0) {
-            phiOverPhiMax = EvaluationMetrics.calculatePhiOverPhiMax(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
-        }
-
-        return new EvaluationResults<>(precision, recall, f1, matrix.truePositives(), matrix.trueNegatives(), matrix.falseNegatives(), matrix.falsePositives(),
-                accuracy, phiCoefficient, specificity, phiCoefficientMax, phiOverPhiMax);
+    public double precision() {
+        return classificationResult.getPrecision();
+    }
+
+    public double recall() {
+        return classificationResult.getRecall();
+    }
+
+    public double f1() {
+        return classificationResult.getF1();
+    }
+
+    public double accuracy() {
+        return classificationResult.getAccuracy();
+    }
+
+    public double specificity() {
+        return classificationResult.getSpecificity();
+    }
+
+    public double phiCoefficient() {
+        return classificationResult.getPhiCoefficient();
     }
 
+    public double phiOverPhiMax() {
+        return classificationResult.getPhiOverPhiMax();
+    }
+
+    public double phiCoefficientMax() {
+        return classificationResult.getPhiCoefficientMax();
+    }
+
+    public List<T> truePositives() {
+        return classificationResult.getTruePositives().stream().toList();
+    }
+
+    public List<T> falsePositives() {
+        return classificationResult.getFalsePositives().stream().toList();
+    }
+
+    public List<T> falseNegatives() {
+        return classificationResult.getFalseNegatives().stream().toList();
+    }
+
+    public int trueNegatives() {
+        return classificationResult.getTrueNegatives();
+    }
 }
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java
deleted file mode 100644
index 70dcab7..0000000
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java
+++ /dev/null
@@ -1,15 +0,0 @@
-/* Licensed under MIT 2023-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval.results;
-
-import org.eclipse.collections.api.list.ImmutableList;
-
-/**
- * reprensents the results in the form of a matrix
- * 
- * @param truePositives  the true positives
- * @param trueNegatives  the true negatives
- * @param falsePositives the false positives
- * @param falseNegatives the false negatives
- */
-public record ResultMatrix<T>(ImmutableList<T> truePositives, int trueNegatives, ImmutableList<T> falsePositives, ImmutableList<T> falseNegatives) {
-}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java
index 6ba099e..91dc84d 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java
@@ -1,95 +1,73 @@
 /* Licensed under MIT 2023-2024. */
 package edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator;
 
-import org.eclipse.collections.api.factory.Lists;
+import java.util.List;
+
+import org.eclipse.collections.api.factory.Sets;
 import org.eclipse.collections.api.list.ImmutableList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.EvaluationMetrics;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResultVector;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
+import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;
+import edu.kit.kastel.mcse.ardoco.metrics.result.AggregatedClassificationResult;
+import edu.kit.kastel.mcse.ardoco.metrics.result.AggregationType;
+import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult;
 
 /**
  * This utility class provides methods to form the average of several {@link EvaluationResults}
  */
 public final class ResultCalculatorUtil {
+    private static final Logger logger = LoggerFactory.getLogger(ResultCalculatorUtil.class);
 
     private ResultCalculatorUtil() {
         throw new IllegalAccessError();
     }
 
-    public static <T> EvaluationResults<T> calculateAverageResults(ImmutableList<EvaluationResults<T>> results) {
-        int norm = results.size();
-        EvaluationResultVector<T> vector = new EvaluationResultVector<>();
-
-        for (var result : results) {
-            var weight = result.getWeight();
-            if (weight <= 0) {
-                norm--;
-                continue;
-            }
-            vector.add(result);
-        }
+    public static <T> EvaluationResults<T> calculateMacroAverageResults(ImmutableList<EvaluationResults<T>> results) {
+        var averages = getAverages(results);
+        if (averages == null)
+            return null;
 
-        vector.scale(norm);
-        return vector.toEvaluationResults();
+        var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.MACRO_AVERAGE).findFirst().orElseThrow();
+        return evaluationResults(macroAverage);
     }
 
     public static <T> EvaluationResults<T> calculateWeightedAverageResults(ImmutableList<EvaluationResults<T>> results) {
-        double weight = 0.0;
-        double precision = .0;
-        double recall = 0.0;
-        double f1 = 0.0;
-        double accuracy = 0.0;
-        double specificity = 0.0;
-        double phi = 0.0;
-        double phiMax = 0.0;
-        double phiOverPhiMax = 0.0;
-        int truePositives = 0;
-        int trueNegatives = 0;
-        int falsePositives = 0;
-        int falseNegatives = 0;
-
-        for (var result : results) {
-            double localWeight = result.getWeight();
-            weight += localWeight;
-
-            precision += localWeight * result.precision();
-            recall += localWeight * result.recall();
-            f1 += localWeight * result.f1();
-
-            accuracy += localWeight * result.accuracy();
-            specificity += localWeight * result.specificity();
-            phi += localWeight * result.phiCoefficient();
-            phiMax += localWeight * result.phiCoefficientMax();
-            phiOverPhiMax += localWeight * result.phiOverPhiMax();
-
-            truePositives += result.truePositives().size();
-            falseNegatives += result.falseNegatives().size();
-            falsePositives += result.falsePositives().size();
-            trueNegatives += result.trueNegatives();
+        var averages = getAverages(results);
+        if (averages == null)
+            return null;
 
-        }
+        var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.WEIGHTED_AVERAGE).findFirst().orElseThrow();
+        return evaluationResults(macroAverage);
+    }
 
-        precision = precision / weight;
-        recall = recall / weight;
-        f1 = f1 / weight;
-        accuracy = accuracy / weight;
-        specificity = specificity / weight;
+    public static EvaluationResults<String> calculateMicroAverageResults(ImmutableList<EvaluationResults<String>> results) {
+        var averages = getAverages(results);
+        if (averages == null)
+            return null;
 
-        if (truePositives > 0) {
-            phi = EvaluationMetrics.calculatePhiCoefficient(truePositives, falsePositives, falseNegatives, trueNegatives);
-            phiMax = EvaluationMetrics.calculatePhiCoefficientMax(truePositives, falsePositives, falseNegatives, trueNegatives);
-            phiOverPhiMax = EvaluationMetrics.calculatePhiOverPhiMax(truePositives, falsePositives, falseNegatives, trueNegatives);
+        var microAverage = averages.stream().filter(it -> it.getType() == AggregationType.MICRO_AVERAGE).findFirst().orElseThrow();
+        return evaluationResults(microAverage);
+    }
 
-            return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, phi,
-                    specificity, phiMax, phiOverPhiMax);
+    private static <T> EvaluationResults<T> evaluationResults(AggregatedClassificationResult average) {
+        var weightedAverageAsSingle = new SingleClassificationResult<T>(Sets.mutable.empty(), Sets.mutable.empty(), Sets.mutable.empty(), null, average
+                .getPrecision(), average.getRecall(), average.getF1(), average.getAccuracy(), average.getSpecificity(), average.getPhiCoefficient(), average
+                        .getPhiCoefficientMax(), average.getPhiOverPhiMax());
+
+        return new EvaluationResults<>(weightedAverageAsSingle);
+    }
+
+    private static <T> List<AggregatedClassificationResult> getAverages(ImmutableList<EvaluationResults<T>> results) {
+        if (results.isEmpty()) {
+            throw new IllegalArgumentException("No results to calculate average from");
         }
 
-        phi = phi / weight;
-        phiMax /= weight;
-        phiOverPhiMax /= weight;
-        return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, phi,
-                specificity, phiMax, phiOverPhiMax);
+        var calculator = ClassificationMetricsCalculator.getInstance();
+        var classifications = results.stream().map(EvaluationResults::classificationResult).toList();
 
+        return calculator.calculateAverages(classifications, null);
     }
+
 }
diff --git a/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java b/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java
deleted file mode 100644
index bd522c8..0000000
--- a/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-class EvaluationMetricsTest {
-
-    @Test
-    void calculatePrecisionTest() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(.5, EvaluationMetrics.calculatePrecision(10, 10), 1e-3), //
-                () -> Assertions.assertEquals(.857, EvaluationMetrics.calculatePrecision(6, 1), 1e-3), //
-                () -> Assertions.assertEquals(.154, EvaluationMetrics.calculatePrecision(10, 55), 1e-3), //
-                () -> Assertions.assertEquals(.905, EvaluationMetrics.calculatePrecision(210, 22), 1e-3) //
-        );
-    }
-
-    @Test
-    void calculateRecallTest() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateRecall(10, 10), 1e-3), //
-                () -> Assertions.assertEquals(.75, EvaluationMetrics.calculateRecall(6, 2), 1e-3), //
-                () -> Assertions.assertEquals(.154, EvaluationMetrics.calculateRecall(10, 55), 1e-3), //
-                () -> Assertions.assertEquals(.871, EvaluationMetrics.calculateRecall(210, 31), 1e-3) //
-        );
-    }
-
-    @Test
-    void calculateF1FromPrecisionRecallTest() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(1.0, EvaluationMetrics.calculateF1(1., 1.), 1e-2), //
-                () -> Assertions.assertEquals(0.0, EvaluationMetrics.calculateF1(0., 1.), 1e-2), //
-                () -> Assertions.assertEquals(0.0, EvaluationMetrics.calculateF1(1., 0.), 1e-2), //
-                () -> Assertions.assertEquals(0.18, EvaluationMetrics.calculateF1(.9, .1), 1e-2), //
-                () -> Assertions.assertEquals(0.48, EvaluationMetrics.calculateF1(.6, .4), 1e-2), //
-                () -> Assertions.assertEquals(0.42, EvaluationMetrics.calculateF1(.3, .7), 1e-2), //
-                () -> Assertions.assertEquals(0.9, EvaluationMetrics.calculateF1(.9, .9), 1e-2), //
-                () -> Assertions.assertEquals(0.48, EvaluationMetrics.calculateF1(.4, .6), 1e-2) //
-        );
-    }
-
-    @Test
-    void calculateF1Test() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateF1(10, 10, 10), 1e-3), //
-                () -> Assertions.assertEquals(.8, EvaluationMetrics.calculateF1(6, 1, 2), 1e-3), //
-                () -> Assertions.assertEquals(.154, EvaluationMetrics.calculateF1(10, 55, 55), 1e-3), //
-                () -> Assertions.assertEquals(.888, EvaluationMetrics.calculateF1(210, 22, 31), 1e-3) //
-        );
-    }
-
-    @Test
-    void calculateAccuracyTest() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateAccuracy(10, 10, 10, 10), 1e-3), //
-                () -> Assertions.assertEquals(.75, EvaluationMetrics.calculateAccuracy(6, 1, 2, 3), 1e-3), //
-                () -> Assertions.assertEquals(.214, EvaluationMetrics.calculateAccuracy(10, 55, 55, 20), 1e-3), //
-                () -> Assertions.assertEquals(.967, EvaluationMetrics.calculateAccuracy(210, 22, 31, 1337), 1e-3) //
-        );
-    }
-
-    @Test
-    void calculatePhiCoefficientTest() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(10, 10, 10, 10), 1e-3), //
-                () -> Assertions.assertEquals(.478, EvaluationMetrics.calculatePhiCoefficient(6, 1, 2, 3), 1e-3), //
-                () -> Assertions.assertEquals(-.579, EvaluationMetrics.calculatePhiCoefficient(10, 55, 55, 20), 1e-3), //
-                () -> Assertions.assertEquals(.869, EvaluationMetrics.calculatePhiCoefficient(210, 22, 31, 1337), 1e-3), //
-                () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(0, 0, 11, 11), 1e-3), //
-                () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(11, 0, 11, 0), 1e-3) //
-        );
-    }
-
-    @Test
-    void calculateSpecificityTest() {
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateSpecificity(1, 1), 1e-3), //
-                () -> Assertions.assertEquals(.76, EvaluationMetrics.calculateSpecificity(1337, 420), 1e-3), //
-                () -> Assertions.assertEquals(.0, EvaluationMetrics.calculateSpecificity(0, 20), 1e-3), //
-                () -> Assertions.assertEquals(1., EvaluationMetrics.calculateSpecificity(20, 0), 1e-3), //
-                () -> Assertions.assertEquals(1., EvaluationMetrics.calculateSpecificity(0, 0), 1e-3), //
-                () -> Assertions.assertEquals(.375, EvaluationMetrics.calculateSpecificity(3, 5), 1e-3) //
-        );
-    }
-
-}
diff --git a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java
index 3295007..c3f57b8 100644
--- a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java
+++ b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java
@@ -2,10 +2,8 @@
 package edu.kit.kastel.mcse.ardoco.id.tests.integration;
 
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
@@ -23,7 +21,6 @@
 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.MethodOrderer;
 import org.junit.jupiter.api.Order;
-import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.TestMethodOrder;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -31,7 +28,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import edu.kit.kastel.mcse.ardoco.core.api.inconsistency.InconsistentSentence;
 import edu.kit.kastel.mcse.ardoco.core.api.inconsistency.ModelInconsistency;
 import edu.kit.kastel.mcse.ardoco.core.api.models.ModelElement;
 import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel;
@@ -43,7 +39,6 @@
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
 import edu.kit.kastel.mcse.ardoco.id.tests.integration.inconsistencyhelper.HoldBackRunResultsProducer;
 import edu.kit.kastel.mcse.ardoco.id.types.MissingModelInstanceInconsistency;
@@ -57,30 +52,13 @@
  * are the spots of inconsistency then. We run this multiple times so each element was held back once.
  */
 @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
-public class InconsistencyDetectionEvaluationIT {
+class InconsistencyDetectionEvaluationIT {
     private static final Logger logger = LoggerFactory.getLogger(InconsistencyDetectionEvaluationIT.class);
 
     private static final String OUTPUT = "target/testout";
     public static final String DIRECTORY_NAME = "ardoco_eval_id";
 
-    /**
-     * missing models in model
-     */
-    private static final MutableList<EvaluationResults<String>> OVERALL_MME_RESULTS = Lists.mutable.empty(); //
-    private static final MutableList<EvaluationResults<String>> OVERALL_MME_RESULTS_BASELINE = Lists.mutable.empty();
-
-    /**
-     * undocumented models
-     */
-    private static final MutableList<EvaluationResults<String>> OVERALL_UME_RESULTS = Lists.mutable.empty();
-
-    private static final Map<GoldStandardProject, Pair<EvaluationResults<String>, ExpectedResults>> MME_RESULTS = new LinkedHashMap<>();
-    private static final Map<GoldStandardProject, EvaluationResults<String>> MME_RESULTS_BASELINE = new LinkedHashMap<>();
-    private static final Map<GoldStandardProject, EvaluationResults<String>> UME_RESULTS = new LinkedHashMap<>();
-
     private static final String LINE_SEPARATOR = System.lineSeparator();
-    private static boolean ranBaseline = false;
-    private static final Map<GoldStandardProject, ImmutableList<InconsistentSentence>> inconsistentSentencesPerProject = new LinkedHashMap<>();
     private static final Map<GoldStandardProject, ArDoCoResult> arDoCoResults = new LinkedHashMap<>();
 
     /**
@@ -94,18 +72,9 @@ public class InconsistencyDetectionEvaluationIT {
      */
     @DisplayName("Evaluating MME-Inconsistency Detection")
     @ParameterizedTest(name = "Evaluating MME-Inconsistency for {0}")
-    @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+    @EnumSource(Project.class)
     @Order(1)
-    protected void missingModelElementInconsistencyIT(GoldStandardProject goldStandardProject) {
-        runMissingModelElementInconsistencyEval(goldStandardProject, goldStandardProject.getExpectedInconsistencyResults());
-    }
-
-    @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*")
-    @DisplayName("Evaluating MME-Inconsistency Detection (Historic)")
-    @ParameterizedTest(name = "Evaluating MME-Inconsistency for {0}")
-    @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
-    @Order(2)
-    protected void missingModelElementInconsistencyHistoricIT(GoldStandardProject goldStandardProject) {
+    void missingModelElementInconsistencyIT(GoldStandardProject goldStandardProject) {
         runMissingModelElementInconsistencyEval(goldStandardProject, goldStandardProject.getExpectedInconsistencyResults());
     }
 
@@ -115,11 +84,8 @@ protected void runMissingModelElementInconsistencyEval(GoldStandardProject goldS
 
         var results = calculateEvaluationResults(goldStandardProject, runs);
 
-        OVERALL_MME_RESULTS.addAll(results);
-
-        EvaluationResults<String> weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable());
+        EvaluationResults<String> weightedResults = ResultCalculatorUtil.calculateMicroAverageResults(results.toImmutable());
 
-        MME_RESULTS.put(goldStandardProject, Tuples.pair(weightedResults, expectedInconsistencyResults));
         logResultsMissingModelInconsistency(goldStandardProject, weightedResults, expectedInconsistencyResults);
         checkResults(weightedResults, expectedInconsistencyResults);
 
@@ -135,24 +101,14 @@ protected void runMissingModelElementInconsistencyEval(GoldStandardProject goldS
     @EnabledIfEnvironmentVariable(named = "testBaseline", matches = ".*")
     @DisplayName("Evaluating MME-Inconsistency Detection Baseline")
     @ParameterizedTest(name = "Evaluating Baseline for {0}")
-    @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+    @EnumSource(Project.class)
     @Order(5)
-    protected void missingModelElementInconsistencyBaselineIT(GoldStandardProject goldStandardProject) {
-        runMissingModelElementInconsistencyBaselineEval(goldStandardProject);
-    }
-
-    @EnabledIfEnvironmentVariable(named = "testBaseline", matches = ".*")
-    @DisplayName("Evaluating MME-Inconsistency Detection Baseline (Historical)")
-    @ParameterizedTest(name = "Evaluating Baseline for {0}")
-    @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
-    @Order(6)
-    protected void missingModelElementInconsistencyBaselineHistoricIT(GoldStandardProject goldStandardProject) {
+    void missingModelElementInconsistencyBaselineIT(GoldStandardProject goldStandardProject) {
         runMissingModelElementInconsistencyBaselineEval(goldStandardProject);
     }
 
     protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProject goldStandardProject) {
         logger.info("Start evaluation of MME-inconsistency baseline for {}", goldStandardProject.getProjectName());
-        ranBaseline = true;
 
         HoldBackRunResultsProducer holdBackRunResultsProducer = new HoldBackRunResultsProducer();
         Map<ModelElement, ArDoCoResult> runs = holdBackRunResultsProducer.produceHoldBackRunResults(goldStandardProject, true);
@@ -160,10 +116,8 @@ protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProje
         Assertions.assertTrue(runs != null && !runs.isEmpty());
 
         var results = calculateEvaluationResults(goldStandardProject, runs);
-        OVERALL_MME_RESULTS_BASELINE.addAll(results);
 
         var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable());
-        MME_RESULTS_BASELINE.put(goldStandardProject, weightedResults);
 
         if (logger.isInfoEnabled()) {
             String name = goldStandardProject.getProjectName() + " missing model inconsistency";
@@ -178,18 +132,9 @@ protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProje
      */
     @DisplayName("Evaluate Inconsistency Analyses For MissingTextForModelElementInconsistencies")
     @ParameterizedTest(name = "Evaluating UME-inconsistency for {0}")
-    @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+    @EnumSource(Project.class)
     @Order(10)
-    protected void missingTextInconsistencyIT(GoldStandardProject goldStandardProject) {
-        runMissingTextInconsistencyEval(goldStandardProject);
-    }
-
-    @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*")
-    @DisplayName("Evaluate Inconsistency Analyses For MissingTextForModelElementInconsistencies " + "(Historical)")
-    @ParameterizedTest(name = "Evaluating UME-inconsistency for {0}")
-    @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
-    @Order(11)
-    protected void missingTextInconsistencyHistoricIT(GoldStandardProject goldStandardProject) {
+    void missingTextInconsistencyIT(GoldStandardProject goldStandardProject) {
         runMissingTextInconsistencyEval(goldStandardProject);
     }
 
@@ -205,9 +150,6 @@ private void runMissingTextInconsistencyEval(GoldStandardProject goldStandardPro
         var inconsistentModelElements = projectResults.getAllModelInconsistencies().collect(ModelInconsistency::getModelInstanceUid).toList();
         var results = TestUtil.compareInconsistencies(projectResults, inconsistentModelElements.toImmutable(), expectedInconsistentModelElements.toImmutable());
 
-        OVERALL_UME_RESULTS.add(results);
-        UME_RESULTS.put(goldStandardProject, results);
-
         String name = goldStandardProject.getProjectName() + " missing text inconsistency";
         TestUtil.logExplicitResults(logger, name, results);
         writeOutResults(goldStandardProject, results);
@@ -228,77 +170,6 @@ protected HoldBackRunResultsProducer getHoldBackRunResultsProducer() {
         return new HoldBackRunResultsProducer();
     }
 
-    @EnabledIfEnvironmentVariable(named = "overallResults", matches = ".*")
-    @Test
-    @Order(999)
-    void overAllResultsIT() {
-        var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_MME_RESULTS.toImmutable());
-        var macroResults = ResultCalculatorUtil.calculateAverageResults(OVERALL_MME_RESULTS.toImmutable());
-
-        Assertions.assertNotNull(weightedResults);
-        Assertions.assertNotNull(macroResults);
-
-        var weightedUMEResults = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_UME_RESULTS.toImmutable());
-        var macroUMEResults = ResultCalculatorUtil.calculateAverageResults(OVERALL_UME_RESULTS.toImmutable());
-
-        Assertions.assertNotNull(weightedUMEResults);
-        Assertions.assertNotNull(macroUMEResults);
-
-        if (logger.isInfoEnabled()) {
-            var mmeBaselineMacro = ResultCalculatorUtil.calculateAverageResults(OVERALL_MME_RESULTS_BASELINE.toImmutable());
-            var mmeBaselineWeighted = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_MME_RESULTS_BASELINE.toImmutable());
-            ;
-
-            var mmeOverallWeightedName = "MME Overall Weighted";
-            TestUtil.logResults(logger, mmeOverallWeightedName, weightedResults);
-
-            var mmeOverallMacroName = "MME Overall Macro";
-            TestUtil.logResults(logger, mmeOverallMacroName, macroResults);
-
-            var mmeBaselineOverallWeightedName = "MME BASELINE Overall Weighted";
-            var mmeBaselineOverallMacroName = "MME BASELINE Overall Macro";
-            if (ranBaseline) {
-                TestUtil.logResults(logger, mmeBaselineOverallWeightedName, mmeBaselineWeighted);
-                TestUtil.logResults(logger, mmeBaselineOverallMacroName, mmeBaselineMacro);
-            }
-
-            var umeOverallWeightedName = "Undoc. Model Element Overall Weighted";
-            TestUtil.logResults(logger, umeOverallWeightedName, weightedUMEResults);
-            var umeOverallMacroName = "Undoc. Model Element Overall Macro";
-            TestUtil.logResults(logger, umeOverallMacroName, macroUMEResults);
-
-            logger.info("MME");
-            for (var entry : MME_RESULTS.entrySet()) {
-                TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue().getOne());
-            }
-            TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", macroResults);
-            TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", weightedResults);
-
-            if (ranBaseline) {
-                logger.info("MME Baseline");
-                for (var entry : MME_RESULTS_BASELINE.entrySet()) {
-                    TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue());
-                }
-                TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", mmeBaselineMacro);
-                TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", mmeBaselineWeighted);
-            }
-
-            logger.info("UME");
-            for (var entry : UME_RESULTS.entrySet()) {
-                TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue());
-            }
-            TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", macroUMEResults);
-            TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", weightedUMEResults);
-        }
-
-        try {
-            writeOutput(weightedResults, macroResults);
-            writeOverallOutputMissingTextInconsistency(weightedUMEResults, macroUMEResults);
-        } catch (IOException e) {
-            logger.error(e.getMessage(), e.getCause());
-        }
-    }
-
     private MutableList<EvaluationResults<String>> calculateEvaluationResults(GoldStandardProject goldStandardProject, Map<ModelElement, ArDoCoResult> runs) {
 
         Map<ModelElement, EvaluationResults<String>> results = Maps.mutable.empty();
@@ -310,15 +181,14 @@ private MutableList<EvaluationResults<String>> calculateEvaluationResults(GoldSt
             if (runEvalResults != null) {
                 results.put(modelInstance, runEvalResults);
             } else {
-                // for the base case, instead of calculating results, save the found inconsistencies.
-                inconsistentSentencesPerProject.put(goldStandardProject, arDoCoResult.getInconsistentSentences());
+                logger.error("Evaluation results for {} are null.", modelInstance);
             }
         }
         return Lists.mutable.ofAll(results.values());
     }
 
     private EvaluationResults<String> evaluateRun(GoldStandardProject goldStandardProject, ModelElement removedElement, ArDoCoResult arDoCoResult) {
-        var modelId = arDoCoResult.getModelIds().get(0);
+        var modelId = arDoCoResult.getModelIds().getFirst();
 
         ImmutableList<MissingModelInstanceInconsistency> inconsistencies = arDoCoResult.getInconsistenciesOfTypeForModel(modelId,
                 MissingModelInstanceInconsistency.class);
@@ -436,49 +306,6 @@ private static Pair<StringBuilder, StringBuilder> createOutput(GoldStandardProje
         return Tuples.pair(outputBuilder, detailedOutputBuilder);
     }
 
-    private static void writeOutput(EvaluationResults<String> weightedResults, EvaluationResults<String> macroResults) throws IOException {
-        var evalDir = Path.of(OUTPUT).resolve(DIRECTORY_NAME);
-        Files.createDirectories(evalDir);
-        var outputFile = evalDir.resolve("base_results.md");
-
-        var outputBuilder = new StringBuilder("# Inconsistency Detection").append(LINE_SEPARATOR);
-
-        var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults);
-        outputBuilder.append(resultString).append(LINE_SEPARATOR);
-        resultString = TestUtil.createResultLogString("Overall Macro", macroResults);
-        outputBuilder.append(resultString).append(LINE_SEPARATOR);
-        outputBuilder.append(LINE_SEPARATOR);
-
-        for (var entry : inconsistentSentencesPerProject.entrySet()) {
-            var goldStandardProject = entry.getKey();
-            outputBuilder.append("## ").append(goldStandardProject.getProjectName());
-            outputBuilder.append(LINE_SEPARATOR);
-            var inconsistentSentences = entry.getValue();
-            for (var inconsistentSentence : inconsistentSentences) {
-                outputBuilder.append(inconsistentSentence.getInfoString());
-                outputBuilder.append(LINE_SEPARATOR);
-            }
-        }
-
-        Files.writeString(outputFile, outputBuilder.toString(), StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
-    }
-
-    //FIXME Something is wrong with this.
-    private static void writeOverallOutputMissingTextInconsistency(EvaluationResults<String> weightedResults, EvaluationResults<String> macroResults)
-            throws IOException {
-        var evalDir = Path.of(OUTPUT).resolve(DIRECTORY_NAME);
-        Files.createDirectories(evalDir);
-        var outputFile = evalDir.resolve("_MissingTextInconsistency_Overall_Results.md");
-
-        var outputBuilder = new StringBuilder("# Inconsistency Detection - Missing Text For Model " + "Element").append(LINE_SEPARATOR);
-
-        var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults);
-        outputBuilder.append(resultString).append(LINE_SEPARATOR);
-        resultString = TestUtil.createResultLogString("Overall Macro", macroResults);
-        outputBuilder.append(resultString).append(LINE_SEPARATOR);
-        outputBuilder.append(LINE_SEPARATOR);
-    }
-
     private static String getOverallResultsString(MutableList<EvaluationResults<String>> results) {
         StringBuilder outputBuilder = new StringBuilder();
         outputBuilder.append("###").append(LINE_SEPARATOR);
@@ -529,18 +356,15 @@ private static Pair<MutableList<EvaluationResults<String>>, StringBuilder> inspe
 
     private static void inspectRun(StringBuilder outputBuilder, StringBuilder detailedOutputBuilder, MutableList<EvaluationResults<String>> allResults,
             ArDoCoResult arDoCoResult, EvaluationResults<String> result) {
-        var truePositives = result.truePositives().toList();
+        var truePositives = result.truePositives();
         appendResults(truePositives, detailedOutputBuilder, "True Positives", arDoCoResult, outputBuilder);
 
-        var falsePositives = result.falsePositives().toList();
+        var falsePositives = result.falsePositives();
         appendResults(falsePositives, detailedOutputBuilder, "False Positives", arDoCoResult, outputBuilder);
 
-        var falseNegatives = result.falseNegatives().toList();
+        var falseNegatives = result.falseNegatives();
         appendResults(falseNegatives, detailedOutputBuilder, "False Negatives", arDoCoResult, outputBuilder);
-
-        var results = EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositives.toImmutable(), 0, falsePositives.toImmutable(), falseNegatives
-                .toImmutable()));
-        allResults.add(results);
+        allResults.add(result);
     }
 
     private static void appendResults(List<String> resultList, StringBuilder detailedOutputBuilder, String type, ArDoCoResult arDoCoResult,
@@ -588,7 +412,7 @@ private static String listToString(List<?> truePositives) {
     }
 
     private static ImmutableList<MissingModelInstanceInconsistency> getInitialInconsistencies(ArDoCoResult arDoCoResult) {
-        var id = arDoCoResult.getModelIds().get(0);
+        var id = arDoCoResult.getModelIds().getFirst();
         return arDoCoResult.getInconsistenciesOfTypeForModel(id, MissingModelInstanceInconsistency.class);
     }
 }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java
index a18a52b..6af59c3 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java
@@ -1,63 +1,36 @@
 /* Licensed under MIT 2021-2024. */
 package edu.kit.kastel.mcse.ardoco.tlr.tests.integration;
 
-import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.DATA_MAP;
 import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.OUTPUT;
-import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.PROJECT_RESULTS;
-import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.RESULTS;
 
 import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.stream.Stream;
 
-import org.eclipse.collections.api.collection.ImmutableCollection;
 import org.eclipse.collections.api.factory.Lists;
 import org.eclipse.collections.api.list.ImmutableList;
-import org.eclipse.collections.api.list.MutableList;
-import org.eclipse.collections.impl.tuple.Tuples;
-import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.MethodOrderer;
 import org.junit.jupiter.api.TestMethodOrder;
 
-import edu.kit.kastel.mcse.ardoco.core.api.PreprocessingData;
 import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
-import edu.kit.kastel.mcse.ardoco.core.api.models.ModelInstance;
-import edu.kit.kastel.mcse.ardoco.core.api.models.ModelStates;
 import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence;
-import edu.kit.kastel.mcse.ardoco.core.common.util.FilePrinter;
 import edu.kit.kastel.mcse.ardoco.core.common.util.TraceLinkUtilities;
-import edu.kit.kastel.mcse.ardoco.core.data.DataRepository;
 import edu.kit.kastel.mcse.ardoco.core.execution.ConfigurationHelper;
 import edu.kit.kastel.mcse.ardoco.core.execution.runner.ArDoCoRunner;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
 import edu.kit.kastel.mcse.ardoco.tlr.execution.ArDoCoForSadSamTraceabilityLinkRecovery;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TLRUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLGoldStandardFile;
 
 /**
  * Integration test that evaluates the traceability link recovery capabilities of ArDoCo.
  */
 @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
 public class SadSamTraceabilityLinkRecoveryEvaluation<T extends GoldStandardProject> extends TraceabilityLinkRecoveryEvaluation<T> {
+
     @Override
     protected boolean resultHasRequiredData(ArDoCoResult arDoCoResult) {
         var traceLinks = arDoCoResult.getAllTraceLinks();
         return !traceLinks.isEmpty();
     }
 
-    @Override
-    protected ArDoCoResult runTraceLinkEvaluation(T project) {
-        var result = super.runTraceLinkEvaluation(project);
-        DATA_MAP.put(project, result);
-        return result;
-    }
-
     @Override
     protected ArDoCoRunner getAndSetupRunner(T project) {
         var additionalConfigsMap = ConfigurationHelper.loadAdditionalConfigs(project.getAdditionalConfigurationsFile());
@@ -104,27 +77,6 @@ protected int getConfusionMatrixSum(ArDoCoResult arDoCoResult) {
         return sentences * modelElements;
     }
 
-    @Override
-    protected EvaluationResults<String> calculateEvaluationResults(ArDoCoResult arDoCoResult, ImmutableCollection<String> goldStandard) {
-        var results = super.calculateEvaluationResults(arDoCoResult, goldStandard);
-        PROJECT_RESULTS.add(results);
-        return results;
-    }
-
-    public ArDoCoResult getArDoCoResult(T project) {
-        String name = project.getProjectName();
-        var inputModel = project.getModelFile();
-        var inputText = project.getTextFile();
-
-        var arDoCoResult = DATA_MAP.get(project);
-        if (arDoCoResult == null) {
-            File additionalConfigurations = project.getAdditionalConfigurationsFile();
-            arDoCoResult = getArDoCoResult(name, inputText, inputModel, ArchitectureModelType.PCM, additionalConfigurations);
-            DATA_MAP.put(project, arDoCoResult);
-        }
-        return arDoCoResult;
-    }
-
     protected ArDoCoResult getArDoCoResult(String name, File inputText, File inputModel, ArchitectureModelType architectureModelType,
             File additionalConfigurations) {
         var additionalConfigsMap = ConfigurationHelper.loadAdditionalConfigs(additionalConfigurations);
@@ -134,132 +86,4 @@ protected ArDoCoResult getArDoCoResult(String name, File inputText, File inputMo
         runner.setUp(inputText, inputModel, architectureModelType, additionalConfigsMap, outputDir);
         return runner.run();
     }
-
-    /**
-     * calculate {@link EvaluationResults} and compare to {@link ExpectedResults}
-     *
-     * @param project      the result's project
-     * @param arDoCoResult the result
-     */
-    public static void checkResults(GoldStandardProject project, ArDoCoResult arDoCoResult) {
-
-        var modelIds = arDoCoResult.getModelIds();
-        var modelId = modelIds.stream().findFirst().orElseThrow();
-
-        var goldStandard = project.getTlrGoldStandard();
-        EvaluationResults<String> results = calculateResults(goldStandard, arDoCoResult, modelId);
-
-        ExpectedResults expectedResults = project.getExpectedTraceLinkResults();
-
-        logAndSaveProjectResult(project, arDoCoResult, results, expectedResults);
-
-        compareResultWithExpected(results, expectedResults);
-
-    }
-
-    private static void logAndSaveProjectResult(GoldStandardProject project, ArDoCoResult arDoCoResult, EvaluationResults<String> results,
-            ExpectedResults expectedResults) {
-        if (logger.isInfoEnabled()) {
-            String projectName = project.getProjectName();
-            TestUtil.logExtendedResultsWithExpected(logger, SadSamTraceabilityLinkRecoveryEvaluation.class, projectName, results, expectedResults);
-
-            var data = arDoCoResult.dataRepository();
-            printDetailedDebug(results, data);
-            try {
-                RESULTS.add(Tuples.pair(project, TestUtil.compareTLR(DATA_MAP.get(project), TLRUtil.getTraceLinks(data), TLGoldStandardFile.loadLinks(project)
-                        .toImmutable())));
-                DATA_MAP.put(project, arDoCoResult);
-                PROJECT_RESULTS.add(results);
-            } catch (IOException e) {
-                // failing to save project results is irrelevant for test success
-                logger.warn("Failed to load file for gold standard", e);
-            }
-        }
-    }
-
-    private static void compareResultWithExpected(EvaluationResults<String> results, ExpectedResults expectedResults) {
-        Assertions.assertAll(//
-                () -> Assertions.assertTrue(results.precision() >= expectedResults.precision(), "Precision " + results
-                        .precision() + " is below the expected minimum value " + expectedResults.precision()), //
-                () -> Assertions.assertTrue(results.recall() >= expectedResults.recall(), "Recall " + results
-                        .recall() + " is below the expected minimum value " + expectedResults.recall()), //
-                () -> Assertions.assertTrue(results.f1() >= expectedResults.f1(), "F1 " + results
-                        .f1() + " is below the expected minimum value " + expectedResults.f1()));
-        Assertions.assertAll(//
-                () -> Assertions.assertTrue(results.accuracy() >= expectedResults.accuracy(), "Accuracy " + results
-                        .accuracy() + " is below the expected minimum value " + expectedResults.accuracy()), //
-                () -> Assertions.assertTrue(results.phiCoefficient() >= expectedResults.phiCoefficient(), "Phi coefficient " + results
-                        .phiCoefficient() + " is below the expected minimum value " + expectedResults.phiCoefficient()));
-    }
-
-    public static void writeDetailedOutput(GoldStandardProject project, ArDoCoResult arDoCoResult) {
-        String name = project.getProjectName();
-        var path = Path.of(OUTPUT).resolve(name);
-        try {
-            Files.createDirectories(path);
-        } catch (IOException e) {
-            logger.warn("Could not create directories.", e);
-        }
-        FilePrinter.printResultsInFiles(path, name, arDoCoResult);
-    }
-
-    private static EvaluationResults<String> calculateResults(ImmutableList<String> goldStandard, ArDoCoResult arDoCoResult, String modelId) {
-        var traceLinks = arDoCoResult.getTraceLinksForModelAsStrings(modelId);
-        logger.info("Found {} trace links", traceLinks.size());
-
-        return TestUtil.compareTLR(arDoCoResult, traceLinks, goldStandard);
-    }
-
-    private static void printDetailedDebug(EvaluationResults<String> results, DataRepository data) {
-        var falseNegatives = results.falseNegatives().stream().map(Object::toString);
-        var falsePositives = results.falsePositives().stream().map(Object::toString);
-
-        var sentences = data.getData(PreprocessingData.ID, PreprocessingData.class).orElseThrow().getText().getSentences();
-        var modelStates = data.getData(ModelStates.ID, ModelStates.class).orElseThrow();
-
-        for (String modelId : modelStates.modelIds()) {
-            var instances = modelStates.getModelExtractionState(modelId).getInstances();
-
-            var falseNegativeOutput = createOutputStrings(falseNegatives, sentences, instances);
-            var falsePositivesOutput = createOutputStrings(falsePositives, sentences, instances);
-
-            logger.debug("Model: \n{}", modelId);
-            if (!falseNegativeOutput.isEmpty()) {
-                logger.debug("False negatives:\n{}", String.join("\n", falseNegativeOutput));
-            }
-            if (!falsePositivesOutput.isEmpty()) {
-                logger.debug("False positives:\n{}", String.join("\n", falsePositivesOutput));
-            }
-        }
-
-    }
-
-    private static MutableList<String> createOutputStrings(Stream<String> traceLinkStrings, ImmutableList<Sentence> sentences,
-            ImmutableList<ModelInstance> instances) {
-        var outputList = Lists.mutable.<String>empty();
-        for (var traceLinkString : traceLinkStrings.toList()) {
-            var parts = traceLinkString.split(",", -1);
-            if (parts.length < 2) {
-                continue;
-            }
-            var id = parts[0];
-
-            var modelElement = instances.detect(instance -> instance.getUid().equals(id));
-
-            var sentence = parts[1];
-
-            var sentenceNo = -1;
-            try {
-                sentenceNo = Integer.parseInt(sentence);
-            } catch (NumberFormatException e) {
-                logger.debug("Having problems retrieving sentence, so skipping line: {}", traceLinkString);
-                continue;
-            }
-            var sentenceText = sentences.get(sentenceNo - 1);
-
-            outputList.add(String.format("%-20s - %s (%s)", modelElement.getFullName(), sentenceText.getText(), traceLinkString));
-        }
-        return outputList;
-    }
-
 }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java
index 07093be..ae136f1 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java
@@ -51,8 +51,7 @@ protected ImmutableList<String> createTraceLinkStringList(ArDoCoResult arDoCoRes
 
     @Override
     protected ImmutableList<String> getGoldStandard(CodeProject codeProject) {
-        ImmutableList<String> samCodeGoldStandard = codeProject.getSamCodeGoldStandard();
-        return samCodeGoldStandard;
+        return codeProject.getSamCodeGoldStandard();
     }
 
     @Override
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java
index 244e246..25c165b 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java
@@ -3,63 +3,30 @@
 
 import static edu.kit.kastel.mcse.ardoco.core.tests.eval.ProjectHelper.ANALYZE_CODE_DIRECTLY;
 
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.LinkedHashMap;
+import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.function.Predicate;
 
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.MutableList;
-import org.eclipse.collections.api.tuple.Pair;
 import org.junit.jupiter.api.*;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.MethodSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
-import edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.SadSamTraceLink;
 import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
 import edu.kit.kastel.mcse.ardoco.core.common.RepositoryHandler;
-import edu.kit.kastel.mcse.ardoco.core.common.util.DataRepositoryHelper;
-import edu.kit.kastel.mcse.ardoco.core.execution.ArDoCo;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.CodeProject;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLDiffFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLLogFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLModelFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLPreviousFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSentenceFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSummaryFile;
 
 @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
-public class TraceLinkEvaluationIT<T extends GoldStandardProject> {
-
-    protected static final Logger logger = LoggerFactory.getLogger(TraceLinkEvaluationIT.class);
+class TraceLinkEvaluationIT<T extends GoldStandardProject> {
 
     protected static final String OUTPUT = "target/testout-tlr-it";
 
     protected static final String LOGGING_ARDOCO_CORE = "org.slf4j.simpleLogger.log.edu.kit.kastel.mcse.ardoco.core";
     protected static AtomicBoolean analyzeCodeDirectly = ANALYZE_CODE_DIRECTLY;
 
-    protected static final List<Pair<GoldStandardProject, EvaluationResults<TestLink>>> RESULTS = new ArrayList<>();
-    protected static final MutableList<EvaluationResults<String>> PROJECT_RESULTS = Lists.mutable.empty();
-    protected static final Map<GoldStandardProject, ArDoCoResult> DATA_MAP = new LinkedHashMap<>();
-
     @BeforeAll
     static void beforeAll() {
         System.setProperty(LOGGING_ARDOCO_CORE, "info");
@@ -67,9 +34,6 @@ static void beforeAll() {
 
     @AfterAll
     static void afterAll() {
-        logOverallResultsForSadSamTlr();
-        writeOutputForSadSamTlr();
-
         System.setProperty(LOGGING_ARDOCO_CORE, "error");
     }
 
@@ -77,66 +41,12 @@ private static void cleanUpCodeRepository(CodeProject codeProject) {
         RepositoryHandler.removeRepository(codeProject.getCodeLocation());
     }
 
-    private static void logOverallResultsForSadSamTlr() {
-        if (logger.isInfoEnabled()) {
-            var name = "Overall Weighted";
-            var results = ResultCalculatorUtil.calculateWeightedAverageResults(PROJECT_RESULTS.toImmutable());
-            TestUtil.logResults(logger, name, results);
-
-            name = "Overall Macro";
-            results = ResultCalculatorUtil.calculateAverageResults(PROJECT_RESULTS.toImmutable());
-            TestUtil.logResults(logger, name, results);
-        }
-    }
-
-    private static void writeOutputForSadSamTlr() {
-        var evalDir = Path.of(OUTPUT).resolve("ardoco_eval_tl");
-        try {
-            Files.createDirectories(evalDir);
-
-            TLSummaryFile.save(evalDir.resolve("summary.txt"), RESULTS, DATA_MAP);
-            TLModelFile.save(evalDir.resolve("models.txt"), DATA_MAP);
-            TLSentenceFile.save(evalDir.resolve("sentences.txt"), DATA_MAP);
-            TLLogFile.append(evalDir.resolve("log.txt"), RESULTS);
-            TLPreviousFile.save(evalDir.resolve("previous.csv"), RESULTS, logger); // save before loading
-            TLDiffFile.save(evalDir.resolve("diff.txt"), RESULTS, TLPreviousFile.load(evalDir.resolve("previous.csv"), DATA_MAP), DATA_MAP);
-        } catch (IOException e) {
-            logger.error("Failed to write output.", e);
-        }
-    }
-
-    private static List<Project> getHistoricalProjects() {
-        return filterForHistoricalProjects(List.of(Project.values()));
-    }
-
-    private static List<CodeProject> getNonHistoricalCodeProjects() {
-        return filterForNonHistoricalProjects(List.of(CodeProject.values()));
-    }
-
-    private static <T extends Enum<T>> List<T> filterForHistoricalProjects(Collection<T> unfilteredProjects) {
-        return filterForProjects(unfilteredProjects, p -> p.name().endsWith("HISTORICAL"));
-    }
-
-    private static <T extends Enum<T>> List<T> filterForNonHistoricalProjects(Collection<T> unfilteredProjects) {
-        return filterForProjects(unfilteredProjects, p -> !p.name().endsWith("HISTORICAL"));
-    }
-
-    private static <T extends Enum<T>> List<T> filterForProjects(Collection<T> unfilteredProjects, Predicate<T> filter) {
-        List<T> projects = new ArrayList<>();
-        for (var project : unfilteredProjects) {
-            if (filter.test(project)) {
-                projects.add(project);
-            }
-        }
-        return projects;
-    }
-
     @EnabledIfEnvironmentVariable(named = "testCodeFull", matches = ".*")
     @DisplayName("Evaluate SAD-SAM-Code TLR (Full)")
     @ParameterizedTest(name = "{0}")
-    @MethodSource("getNonHistoricalCodeProjects")
+    @EnumSource(CodeProject.class)
     @Order(1)
-    protected void evaluateSadSamCodeTlrFullIT(CodeProject project) {
+    void evaluateSadSamCodeTlrFullIT(CodeProject project) {
         analyzeCodeDirectly.set(true);
         if (analyzeCodeDirectly.get())
             cleanUpCodeRepository(project);
@@ -149,9 +59,9 @@ protected void evaluateSadSamCodeTlrFullIT(CodeProject project) {
     @EnabledIfEnvironmentVariable(named = "testCodeFull", matches = ".*")
     @DisplayName("Evaluate SAM-Code TLR (Full)")
     @ParameterizedTest(name = "{0}")
-    @EnumSource(value = CodeProject.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+    @EnumSource(value = CodeProject.class)
     @Order(2)
-    protected void evaluateSamCodeTlrFullIT(CodeProject project) {
+    void evaluateSamCodeTlrFullIT(CodeProject project) {
         analyzeCodeDirectly.set(true);
         if (analyzeCodeDirectly.get())
             cleanUpCodeRepository(project);
@@ -163,9 +73,9 @@ protected void evaluateSamCodeTlrFullIT(CodeProject project) {
 
     @DisplayName("Evaluate SAD-SAM-Code TLR")
     @ParameterizedTest(name = "{0}")
-    @MethodSource("getNonHistoricalCodeProjects")
+    @EnumSource(CodeProject.class)
     @Order(9)
-    protected void evaluateSadSamCodeTlrIT(CodeProject codeProject) {
+    void evaluateSadSamCodeTlrIT(CodeProject codeProject) {
         analyzeCodeDirectly.set(false);
         if (analyzeCodeDirectly.get())
             cleanUpCodeRepository(codeProject);
@@ -179,9 +89,9 @@ protected void evaluateSadSamCodeTlrIT(CodeProject codeProject) {
 
     @DisplayName("Evaluate SAM-Code TLR")
     @ParameterizedTest(name = "{0}")
-    @MethodSource("getNonHistoricalCodeProjects")
+    @EnumSource(CodeProject.class)
     @Order(10)
-    protected void evaluateSamCodeTlrIT(CodeProject project) {
+    void evaluateSamCodeTlrIT(CodeProject project) {
         analyzeCodeDirectly.set(false);
         if (analyzeCodeDirectly.get())
             cleanUpCodeRepository(project);
@@ -193,68 +103,15 @@ protected void evaluateSamCodeTlrIT(CodeProject project) {
 
     @DisplayName("Evaluate SAD-SAM TLR")
     @ParameterizedTest(name = "{0}")
-    @MethodSource("getNonHistoricalCodeProjects")
+    @MethodSource("getProjects")
     @Order(20)
-    protected void evaluateSadSamTlrIT(T project) {
+    void evaluateSadSamTlrIT(T project) {
         var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>();
-        var results = evaluation.runTraceLinkEvaluation(project);
-        Assertions.assertNotNull(results);
-    }
-
-    @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*")
-    @DisplayName("Evaluate TLR (Historical)")
-    @ParameterizedTest(name = "{0}")
-    @MethodSource("getHistoricalProjects")
-    @Order(21)
-    protected void evaluateSadSamTlrHistoricalIT(T project) {
-        var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>();
-        ArDoCoResult arDoCoResult = evaluation.getArDoCoResult(project);
+        var arDoCoResult = evaluation.runTraceLinkEvaluation(project);
         Assertions.assertNotNull(arDoCoResult);
-
-        SadSamTraceabilityLinkRecoveryEvaluation.checkResults(project, arDoCoResult);
-        SadSamTraceabilityLinkRecoveryEvaluation.writeDetailedOutput(project, arDoCoResult);
     }
 
-    /**
-     * Test if the results from executing ArDoCo with UML are the same as with PCM
-     *
-     * @param project the project, provided by the EnumSource
-     */
-    @Disabled("Only enable this for local tests.")
-    @DisplayName("Compare TLR for UML/PCM")
-    @ParameterizedTest(name = "{0}")
-    @EnumSource(value = Project.class)
-    @Order(29)
-    protected void compareSadSamTlRForPcmAndUmlIT(Project project) {
-        String name = project.name();
-        var inputText = project.getTextFile();
-
-        var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>();
-
-        var ardocoRunForPCM = evaluation.getArDoCoResult(project);
-        Assertions.assertNotNull(ardocoRunForPCM);
-
-        var arDoCo = ArDoCo.getInstance(name);
-        var preprocessingData = ardocoRunForPCM.getPreprocessingData();
-        DataRepositoryHelper.putPreprocessingData(arDoCo.getDataRepository(), preprocessingData);
-
-        File umlModelFile = project.getModelFile(ArchitectureModelType.UML);
-        File additionalConfigurations = project.getAdditionalConfigurationsFile();
-        var ardocoRunForUML = evaluation.getArDoCoResult(name, inputText, umlModelFile, ArchitectureModelType.UML, additionalConfigurations);
-        Assertions.assertNotNull(ardocoRunForUML);
-
-        var pcmTLs = ardocoRunForPCM.getAllTraceLinks()
-                .toList()
-                .sortThisBy(SadSamTraceLink::getModelElementUid)
-                .sortThisByInt(SadSamTraceLink::getSentenceNumber);
-        var umlTLs = ardocoRunForUML.getAllTraceLinks()
-                .toList()
-                .sortThisBy(SadSamTraceLink::getModelElementUid)
-                .sortThisByInt(SadSamTraceLink::getSentenceNumber);
-
-        Assertions.assertAll( //
-                () -> Assertions.assertEquals(pcmTLs.size(), umlTLs.size()), //
-                () -> Assertions.assertIterableEquals(pcmTLs, umlTLs) //
-        );
+    private static List<? extends GoldStandardProject> getProjects() {
+        return Arrays.asList(Project.values());
     }
 }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java
index 6944b61..522c963 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java
@@ -3,51 +3,29 @@
 
 import static edu.kit.kastel.mcse.ardoco.core.tests.eval.ProjectHelper.ANALYZE_CODE_DIRECTLY;
 
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.function.Predicate;
 
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.MutableList;
 import org.eclipse.collections.api.tuple.Pair;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.MethodSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.junit.jupiter.params.provider.EnumSource;
 
 import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.CodeProject;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLDiffFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLLogFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLModelFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLPreviousFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSentenceFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSummaryFile;
+import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.ModelElementSentenceLink;
 
 class TraceLinkEvaluationSadCodeDirectIT {
-    protected static final Logger logger = LoggerFactory.getLogger(TraceLinkEvaluationIT.class);
-
-    protected static final String OUTPUT = "target/testout";
-
     protected static final String LOGGING_ARDOCO_CORE = "org.slf4j.simpleLogger.log.edu.kit.kastel.mcse.ardoco.core";
 
-    protected static final List<Pair<GoldStandardProject, EvaluationResults<TestLink>>> RESULTS = new ArrayList<>();
-    protected static final MutableList<EvaluationResults<String>> PROJECT_RESULTS = Lists.mutable.empty();
+    protected static final List<Pair<GoldStandardProject, EvaluationResults<ModelElementSentenceLink>>> RESULTS = new ArrayList<>();
     protected static final Map<GoldStandardProject, ArDoCoResult> DATA_MAP = new LinkedHashMap<>();
 
     @BeforeAll
@@ -57,64 +35,16 @@ static void beforeAll() {
 
     @AfterAll
     static void afterAll() {
-        logOverallResultsForSadSamTlr();
-        writeOutputForSadSamTlr();
         System.setProperty(LOGGING_ARDOCO_CORE, "error");
     }
 
     @DisplayName("Evaluate SAD-Code TLR")
     @ParameterizedTest(name = "{0}")
-    @MethodSource("getNonHistoricalCodeProjects")
+    @EnumSource(CodeProject.class)
     void evaluateSadCodeTlrIT(CodeProject project) {
         ANALYZE_CODE_DIRECTLY.set(false);
         var evaluation = new SadCodeTraceabilityLinkRecoveryEvaluation();
         ArDoCoResult results = evaluation.runTraceLinkEvaluation(project);
         Assertions.assertNotNull(results);
     }
-
-    private static List<CodeProject> getNonHistoricalCodeProjects() {
-        return filterForNonHistoricalProjects(List.of(CodeProject.values()));
-    }
-
-    private static <T extends Enum<T>> List<T> filterForNonHistoricalProjects(Collection<T> unfilteredProjects) {
-        return filterForProjects(unfilteredProjects, p -> !p.name().endsWith("HISTORICAL"));
-    }
-
-    private static <T extends Enum<T>> List<T> filterForProjects(Collection<T> unfilteredProjects, Predicate<T> filter) {
-        List<T> projects = new ArrayList<>();
-        for (var project : unfilteredProjects) {
-            if (filter.test(project)) {
-                projects.add(project);
-            }
-        }
-        return projects;
-    }
-
-    private static void logOverallResultsForSadSamTlr() {
-        if (logger.isInfoEnabled()) {
-            var name = "Overall Weighted";
-            var results = ResultCalculatorUtil.calculateWeightedAverageResults(PROJECT_RESULTS.toImmutable());
-            TestUtil.logResults(logger, name, results);
-
-            name = "Overall Macro";
-            results = ResultCalculatorUtil.calculateAverageResults(PROJECT_RESULTS.toImmutable());
-            TestUtil.logResults(logger, name, results);
-        }
-    }
-
-    private static void writeOutputForSadSamTlr() {
-        var evalDir = Path.of(OUTPUT).resolve("ardoco_eval_tl");
-        try {
-            Files.createDirectories(evalDir);
-
-            TLSummaryFile.save(evalDir.resolve("summary.txt"), RESULTS, DATA_MAP);
-            TLModelFile.save(evalDir.resolve("models.txt"), DATA_MAP);
-            TLSentenceFile.save(evalDir.resolve("sentences.txt"), DATA_MAP);
-            TLLogFile.append(evalDir.resolve("log.txt"), RESULTS);
-            TLPreviousFile.save(evalDir.resolve("previous.csv"), RESULTS, logger); // save before loading
-            TLDiffFile.save(evalDir.resolve("diff.txt"), RESULTS, TLPreviousFile.load(evalDir.resolve("previous.csv"), DATA_MAP), DATA_MAP);
-        } catch (IOException e) {
-            logger.error("Failed to write output.", e);
-        }
-    }
 }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java
index b9e36f7..81658a0 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java
@@ -9,7 +9,6 @@
 import java.util.NoSuchElementException;
 import java.util.Objects;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 import org.eclipse.collections.api.collection.ImmutableCollection;
 import org.eclipse.collections.api.factory.Lists;
@@ -31,7 +30,7 @@
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
 import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
+import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;
 
 public abstract class TraceabilityLinkRecoveryEvaluation<T extends GoldStandardProject> {
     protected static final Logger logger = LoggerFactory.getLogger(TraceabilityLinkRecoveryEvaluation.class);
@@ -40,9 +39,11 @@ public abstract class TraceabilityLinkRecoveryEvaluation<T extends GoldStandardP
     // If the path separator in the gold standards are changed, this needs to update
     public static final String GOLD_STANDARD_PATH_SEPARATOR = "/";
 
+    // This map can contain TLs from all of its subclasses.
+    // Therefore, #resultHasRequiredData can be used to determine whether the result is valid for the specific subclass.
     protected static Map<GoldStandardProject, ArDoCoResult> resultMap = new LinkedHashMap<>();
 
-    protected ArDoCoResult runTraceLinkEvaluation(T project) {
+    protected final ArDoCoResult runTraceLinkEvaluation(T project) {
         ArDoCoResult result = resultMap.get(project);
         if (result == null || !resultHasRequiredData(result)) {
             ArDoCoRunner runner = getAndSetupRunner(project);
@@ -160,44 +161,16 @@ protected EvaluationResults<String> calculateEvaluationResults(ArDoCoResult arDo
 
         Set<String> distinctTraceLinks = new LinkedHashSet<>(results.castToCollection());
         Set<String> distinctGoldStandard = new LinkedHashSet<>(goldStandard.castToCollection());
+        int confusionMatrixSum = getConfusionMatrixSum(arDoCoResult);
+
+        var calculator = ClassificationMetricsCalculator.getInstance();
+        var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
+        return new EvaluationResults<>(classification);
 
-        // True Positives are the trace links that are contained on both lists
-        Set<String> truePositives = distinctTraceLinks.stream()
-                .filter(tl -> isTraceLinkContainedInGoldStandard(tl, distinctGoldStandard))
-                .collect(Collectors.toSet());
-        ImmutableList<String> truePositivesList = Lists.immutable.ofAll(truePositives);
-
-        // False Positives are the trace links that are only contained in the result set
-        Set<String> falsePositives = distinctTraceLinks.stream()
-                .filter(tl -> !isTraceLinkContainedInGoldStandard(tl, distinctGoldStandard))
-                .collect(Collectors.toSet());
-        ImmutableList<String> falsePositivesList = Lists.immutable.ofAll(falsePositives);
-
-        // False Negatives are the trace links that are only contained in the gold standard
-        Set<String> falseNegatives = distinctGoldStandard.stream()
-                .filter(gstl -> !isGoldStandardTraceLinkContainedInTraceLinks(gstl, distinctTraceLinks))
-                .collect(Collectors.toSet());
-        ImmutableList<String> falseNegativesList = Lists.immutable.ofAll(falseNegatives);
-
-        int trueNegatives = getConfusionMatrixSum(arDoCoResult) - truePositives.size() - falsePositives.size() - falseNegatives.size();
-        return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
     }
 
     protected abstract ImmutableList<String> createTraceLinkStringList(ArDoCoResult arDoCoResult);
 
     protected abstract int getConfusionMatrixSum(ArDoCoResult arDoCoResult);
 
-    private static boolean areTraceLinksMatching(String goldStandardTraceLink, String traceLink) {
-        traceLink = traceLink.strip();
-        goldStandardTraceLink = goldStandardTraceLink.strip();
-        return (goldStandardTraceLink.equals(traceLink));
-    }
-
-    private static boolean isTraceLinkContainedInGoldStandard(String traceLink, Set<String> goldStandard) {
-        return goldStandard.stream().anyMatch(goldStandardTraceLink -> areTraceLinksMatching(goldStandardTraceLink, traceLink));
-    }
-
-    private static boolean isGoldStandardTraceLinkContainedInTraceLinks(String goldStandardTraceLink, Set<String> traceLinks) {
-        return traceLinks.stream().anyMatch(traceLink -> areTraceLinksMatching(goldStandardTraceLink, traceLink));
-    }
 }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java
similarity index 51%
rename from tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java
rename to tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java
index 42bb6c3..04ed6d4 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java
@@ -8,15 +8,15 @@
 /**
  * Represents a simple trace link by the id of the model and number of the sentence involved.
  */
-public record TestLink(String modelId, int sentenceNr) implements Comparable<TestLink> {
+public record ModelElementSentenceLink(String modelElementId, int sentenceNumber) implements Comparable<ModelElementSentenceLink> {
 
-    public TestLink(SadSamTraceLink traceLink) {
+    public ModelElementSentenceLink(SadSamTraceLink traceLink) {
         this(traceLink.getModelElementUid(), traceLink.getSentenceNumber());
     }
 
     @Override
-    public int compareTo(TestLink o) {
-        return Comparator.comparing(TestLink::modelId).thenComparing(TestLink::sentenceNr).compare(this, o);
+    public int compareTo(ModelElementSentenceLink o) {
+        return Comparator.comparing(ModelElementSentenceLink::modelElementId).thenComparing(ModelElementSentenceLink::sentenceNumber).compare(this, o);
     }
 
 }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java
index cf5ef6b..e21bf0d 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java
@@ -28,8 +28,8 @@ private TLRUtil() {
      * @param data the {@link EvaluationResults}
      * @return the trace links
      */
-    public static ImmutableList<TestLink> getTraceLinks(DataRepository data) {
-        var traceLinks = Lists.mutable.<TestLink>empty();
+    public static ImmutableList<ModelElementSentenceLink> getTraceLinks(DataRepository data) {
+        var traceLinks = Lists.mutable.<ModelElementSentenceLink>empty();
         var connectionStates = data.getData(ConnectionStates.ID, ConnectionStates.class).orElseThrow();
         var modelStates = data.getData(ModelStates.ID, ModelStates.class).orElseThrow();
 
@@ -40,7 +40,7 @@ public static ImmutableList<TestLink> getTraceLinks(DataRepository data) {
                 .map(connectionStates::getConnectionState)
                 .toList();
         for (var connectionState : connectionStatesList) {
-            traceLinks.addAll(connectionState.getTraceLinks().stream().map(TestLink::new).toList());
+            traceLinks.addAll(connectionState.getTraceLinks().stream().map(ModelElementSentenceLink::new).toList());
         }
         return traceLinks.toImmutable();
     }
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java
deleted file mode 100644
index 5489385..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.text.DecimalFormat;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-
-import org.eclipse.collections.api.tuple.Pair;
-
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This is a helper class to write out a diff-file for the evaluation results of TLR.
- */
-public class TLDiffFile {
-
-    private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("+##0.00%;-##0.00%");
-    private static final String LINE_SEPARATOR = System.lineSeparator();
-
-    private TLDiffFile() {
-        throw new IllegalAccessError("This constructor should not be called!");
-    }
-
-    /**
-     * Writes out the differences of new and old results.
-     *
-     * @param targetFile        file to write into
-     * @param newProjectResults new results
-     * @param oldProjectResults old results
-     * @param dataMap           the mapping of Project to ArDoCoResult of the new run
-     * @throws IOException if writing fails
-     */
-    public static void save(Path targetFile, Collection<Pair<GoldStandardProject, EvaluationResults<TestLink>>> newProjectResults,
-            Collection<Pair<GoldStandardProject, EvaluationResults<TestLink>>> oldProjectResults, Map<GoldStandardProject, ArDoCoResult> dataMap)
-            throws IOException {
-        // Assumption: Both collections contain the same projects
-
-        newProjectResults = newProjectResults.stream().sorted(Comparator.comparing(x -> x.getOne().getProjectName())).toList();
-        oldProjectResults = oldProjectResults.stream().sorted(Comparator.comparing(x -> x.getOne().getProjectName())).toList();
-
-        var builder = new StringBuilder();
-
-        builder.append("Time of evaluation: `").append(CommonUtilities.getCurrentTimeAsString()).append("`");
-        builder.append(LINE_SEPARATOR);
-
-        var newResults = newProjectResults.stream().map(Pair::getTwo).toList();
-        var oldResults = newProjectResults.stream().map(Pair::getTwo).toList();
-
-        // Append average differences in precision, recall, f1
-        var oldAvgPrecision = oldResults.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN);
-        var oldAvgRecall = oldResults.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN);
-        var oldAvgF1 = oldResults.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN);
-        var newAvgPrecision = newResults.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN);
-        var newAvgRecall = newResults.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN);
-        var newAvgF1 = newResults.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN);
-
-        builder.append("Ø ");
-        builder.append(NUMBER_FORMAT.format(newAvgPrecision - oldAvgPrecision)).append(" Precision,  ");
-        builder.append(NUMBER_FORMAT.format(newAvgRecall - oldAvgRecall)).append(" Recall,  ");
-        builder.append(NUMBER_FORMAT.format(newAvgF1 - oldAvgF1)).append(" F1");
-        builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-        // Append project specific details
-        for (Pair<GoldStandardProject, EvaluationResults<TestLink>> oldProjectResult : oldProjectResults) {
-            var project = oldProjectResult.getOne();
-            var newResultOptional = newProjectResults.stream().filter(r -> r.getOne().equals(project)).findAny();
-            if (newResultOptional.isEmpty()) {
-                continue;
-            }
-            var newResult = newResultOptional.get().getTwo();
-            var data = dataMap.get(project);
-
-            builder.append("# ").append(project.getProjectName());
-            builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-            var oldResult = oldProjectResult.getTwo();
-            builder.append(NUMBER_FORMAT.format(newResult.precision() - oldResult.precision())).append(" Precision,  ");
-            builder.append(NUMBER_FORMAT.format(newResult.recall() - oldResult.recall())).append(" Recall,  ");
-            builder.append(NUMBER_FORMAT.format(newResult.f1() - oldResult.f1())).append(" F1");
-            builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-            var newTruePositives = findNewLinks(oldResult.truePositives().castToList(), newResult.truePositives().castToList());
-            appendList(builder, "New true positives", newTruePositives, data);
-
-            var newFalsePositives = findNewLinks(oldResult.falsePositives().castToList(), newResult.falsePositives().castToList());
-            appendList(builder, "New false positives", newFalsePositives, data);
-
-            var newFalseNegatives = findNewLinks(oldResult.falseNegatives().castToList(), newResult.falseNegatives().castToList());
-            appendList(builder, "New false negatives", newFalseNegatives, data);
-
-            var lostFalsePositives = findMissingLinks(oldResult.falsePositives().castToList(), newResult.falsePositives().castToList());
-            appendList(builder, "False positives that are now true negatives", lostFalsePositives, data);
-
-            builder.append(LINE_SEPARATOR);
-        }
-
-        Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
-    }
-
-    private static List<TestLink> findNewLinks(List<TestLink> oldLinks, List<TestLink> newLinks) {
-        return newLinks.stream().filter(link -> !oldLinks.contains(link)).toList();
-    }
-
-    private static List<TestLink> findMissingLinks(List<TestLink> oldLinks, List<TestLink> newLinks) {
-        return oldLinks.stream().filter(link -> !newLinks.contains(link)).toList();
-    }
-
-    private static void appendList(StringBuilder builder, String description, List<TestLink> links, ArDoCoResult arDoCoResult) {
-        var text = arDoCoResult.getText();
-        if (links.isEmpty()) {
-            return;
-        }
-
-        builder.append(description).append(":");
-        builder.append(LINE_SEPARATOR);
-
-        for (TestLink link : links) {
-            for (var modelId : arDoCoResult.getModelIds()) {
-                var dataModel = arDoCoResult.getModelState(modelId);
-                var line = TLSummaryFile.format(link, text, dataModel);
-                if (line != null && !line.isBlank()) {
-                    builder.append("- ").append(line).append(LINE_SEPARATOR);
-                }
-            }
-        }
-
-        builder.append(LINE_SEPARATOR);
-    }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java
deleted file mode 100644
index 626fd7b..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.List;
-
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.MutableList;
-
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-public class TLGoldStandardFile {
-
-    private TLGoldStandardFile() {
-        // no instantiation
-        throw new IllegalAccessError("No instantiation allowed");
-    }
-
-    public static MutableList<TestLink> loadLinks(GoldStandardProject goldStandardProject) throws IOException {
-        Path path = goldStandardProject.getTlrGoldStandardFile().toPath();
-        List<String> lines = Files.readAllLines(path);
-
-        return Lists.mutable.ofAll(lines.stream()
-                .skip(1) // skip csv header
-                .map(line -> line.split(",")) // modelElementId,sentenceNr
-                .map(array -> new TestLink(array[0], Integer.parseInt(array[1])))
-                .map(link -> new TestLink(link.modelId(), link.sentenceNr() - 1))
-                // ^ goldstandard sentences start with 1 while ISentences are zero indexed
-                .toList());
-    }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java
deleted file mode 100644
index 7ef9dd0..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.text.DecimalFormat;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-
-import org.eclipse.collections.api.tuple.Pair;
-
-import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This helper-class offer functionality to write out a log of the results for TLR.
- */
-public class TLLogFile {
-    private static final String LINE_SEPARATOR = System.lineSeparator();
-    private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("##0.00%");
-
-    private TLLogFile() {
-        throw new IllegalAccessError("This constructor should not be called!");
-    }
-
-    /**
-     * Appends the given results to the given file.
-     *
-     * @param targetFile     file to append to
-     * @param projectResults the results to write out
-     * @throws IOException if writing to file system fails
-     */
-    public static void append(Path targetFile, List<Pair<GoldStandardProject, EvaluationResults<TestLink>>> projectResults) throws IOException {
-        List<EvaluationResults<TestLink>> results = projectResults.stream().map(Pair::getTwo).toList();
-        var builder = new StringBuilder();
-
-        builder.append("- `").append(CommonUtilities.getCurrentTimeAsString()).append("` ");
-
-        // calc average
-        double avgPrecision = results.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN);
-        double avgRecall = results.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN);
-        double avgF1 = results.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN);
-
-        builder.append(String.format("[`Ø`  %s  %s  %s]", NUMBER_FORMAT.format(avgPrecision), NUMBER_FORMAT.format(avgRecall), NUMBER_FORMAT.format(avgF1)));
-
-        var sortedResults = new ArrayList<>(projectResults);
-        sortedResults.sort(Comparator.comparing(x -> x.getOne().getProjectName()));
-        for (Pair<GoldStandardProject, EvaluationResults<TestLink>> projectResult : sortedResults) {
-            String alias = projectResult.getOne().getAlias();
-            EvaluationResults<TestLink> result = projectResult.getTwo();
-            String precision = NUMBER_FORMAT.format(result.precision());
-            String recall = NUMBER_FORMAT.format(result.recall());
-            String F1 = NUMBER_FORMAT.format(result.f1());
-
-            builder.append(String.format(" [`%s`  %s  %s  %s]", alias, precision, recall, F1));
-        }
-
-        builder.append(LINE_SEPARATOR);
-
-        Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
-    }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java
deleted file mode 100644
index 0afdc95..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.util.Map;
-
-import edu.kit.kastel.mcse.ardoco.core.api.models.ModelInstance;
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-
-/**
- * This helper-class offers functionality to write out information about the models as seen by ArDoCo after evaluation of TLR.
- */
-public class TLModelFile {
-
-    private static final String LINE_SEPARATOR = System.lineSeparator();
-
-    private TLModelFile() {
-        throw new IllegalAccessError("This constructor should not be called!");
-    }
-
-    /**
-     * Writes out information about models to the target file.
-     *
-     * @param targetFile the file to write to
-     * @param dataMap    the data map to extract model information for each project
-     * @throws IOException if writing to file system fails
-     */
-    public static void save(Path targetFile, Map<GoldStandardProject, ArDoCoResult> dataMap) throws IOException {
-        var projects = dataMap.keySet().stream().sorted().toList();
-        var builder = new StringBuilder();
-
-        for (GoldStandardProject project : projects) {
-            var projectData = dataMap.get(project);
-
-            builder.append("# ").append(project.getProjectName());
-            builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-            for (var modelId : projectData.getModelIds()) {
-                var models = projectData.getModelState(modelId).getInstances();
-                builder.append("## ModelId: ").append(modelId);
-                builder.append(LINE_SEPARATOR);
-                for (ModelInstance model : models) {
-                    builder.append("- [")
-                            .append(model.getUid())
-                            .append("]: \"")
-                            .append(model.getFullName())
-                            .append("\" (")
-                            .append(model.getFullType())
-                            .append(") (")
-                            .append(String.join(", ", model.getNameParts()))
-                            .append(") (")
-                            .append(String.join(", ", model.getTypeParts()))
-                            .append(")")
-                            .append(LINE_SEPARATOR);
-                }
-            }
-
-            builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-        }
-
-        Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
-    }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java
deleted file mode 100644
index 52c2802..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.tuple.Pair;
-import org.eclipse.collections.impl.tuple.Tuples;
-import org.slf4j.Logger;
-
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This is a helper class to load and write out the results of the previous evaluation run for TLR results.
- */
-public class TLPreviousFile {
-    private static final String LINE_SEPARATOR = System.lineSeparator();
-
-    private TLPreviousFile() {
-        throw new IllegalAccessError("This constructor should not be called!");
-    }
-
-    /**
-     * Loads the previous results
-     *
-     * @param sourceFile file to load from
-     * @return the previous results
-     * @throws IOException if file access fails
-     */
-    public static Collection<Pair<GoldStandardProject, EvaluationResults<TestLink>>> load(Path sourceFile,
-            final Map<GoldStandardProject, ArDoCoResult> DATA_MAP) throws IOException {
-        List<String> lines = Files.readAllLines(sourceFile);
-        Map<Project, List<TestLink>> foundLinkMap = new LinkedHashMap<>();
-        List<Pair<GoldStandardProject, EvaluationResults<TestLink>>> results = new ArrayList<>();
-
-        for (String line : lines) {
-            var parts = line.split(",", -1);
-            Project project = Project.valueOf(parts[0]);
-            String modelId = parts[1];
-            int sentenceNr = Integer.parseInt(parts[2]);
-
-            var testLink = new TestLink(modelId, sentenceNr);
-
-            if (!foundLinkMap.containsKey(project)) {
-                foundLinkMap.put(project, new ArrayList<>());
-            }
-
-            foundLinkMap.get(project).add(testLink);
-        }
-
-        for (Project project : foundLinkMap.keySet()) {
-            var correctLinks = TLGoldStandardFile.loadLinks(project);
-            var foundLinks = foundLinkMap.get(project);
-
-            ArDoCoResult arDoCoResult = DATA_MAP.get(project);
-            if (arDoCoResult != null) {
-                results.add(Tuples.pair(project, TestUtil.compareTLR(arDoCoResult, Lists.immutable.ofAll(foundLinks), correctLinks.toImmutable())));
-            }
-        }
-
-        return results;
-    }
-
-    /**
-     * Saves the given results to the given file.
-     *
-     * @param targetFile     file to save to
-     * @param projectResults results to save
-     * @throws IOException if writing to file system fails
-     */
-    public static void save(Path targetFile, Collection<Pair<GoldStandardProject, EvaluationResults<TestLink>>> projectResults, Logger logger)
-            throws IOException {
-        if (Files.exists(targetFile)) {
-            logger.warn("File with the results of the previous evaluation run already exists.");
-            return; // do not overwrite
-        }
-
-        var sortedResults = new ArrayList<>(projectResults);
-        sortedResults.sort(Comparator.comparing(x -> x.getOne().getProjectName()));
-
-        var builder = new StringBuilder();
-
-        for (Pair<GoldStandardProject, EvaluationResults<TestLink>> projectResult : sortedResults) {
-            EvaluationResults<TestLink> result = projectResult.getTwo();
-            for (TestLink foundLink : result.getFound()) {
-                builder.append(projectResult.getOne().getProjectName());
-                builder.append(',');
-                builder.append(foundLink.modelId());
-                builder.append(',');
-                builder.append(foundLink.sentenceNr());
-                builder.append(LINE_SEPARATOR);
-            }
-        }
-
-        Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE);
-    }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java
deleted file mode 100644
index 0e24ada..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.util.Map;
-
-import org.eclipse.collections.api.list.ImmutableList;
-
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-
-/**
- * This helper-class offers functionality to write out the sentences as seen by ArDoCo after the evaluation runs for TLR are done.
- */
-public class TLSentenceFile {
-    private static final String LINE_SEPARATOR = System.lineSeparator();
-
-    private TLSentenceFile() {
-        throw new IllegalAccessError("This constructor should not be called!");
-    }
-
-    /**
-     * Write out the sentences from the given data map to the target file
-     *
-     * @param targetFile file to write to
-     * @param dataMap    data to extract the sentences from
-     * @throws IOException if writing to file system fails
-     */
-    public static void save(Path targetFile, Map<GoldStandardProject, ArDoCoResult> dataMap) throws IOException {
-        var projects = dataMap.keySet().stream().sorted().toList();
-        var builder = new StringBuilder();
-
-        for (GoldStandardProject project : projects) {
-            ImmutableList<Sentence> sentences = dataMap.get(project).getText().getSentences();
-
-            builder.append("# ").append(project.getProjectName());
-            builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-            for (Sentence sentence : sentences) {
-                builder.append("- [").append(sentence.getSentenceNumber()).append("]: ").append(sentence.getText()).append(LINE_SEPARATOR);
-            }
-
-            builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-        }
-
-        Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
-    }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java
deleted file mode 100644
index 7d9e650..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.text.DecimalFormat;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.tuple.Pair;
-
-import edu.kit.kastel.mcse.ardoco.core.api.models.LegacyModelExtractionState;
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.api.text.Text;
-import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This helper class offers functionality to write out a summary of the TLR evaluation runs for all projects.
- */
-public class TLSummaryFile {
-    private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("##0.00%");
-    private static final String LINE_SEPARATOR = System.lineSeparator();
-
-    private TLSummaryFile() {
-        throw new IllegalAccessError("This constructor should not be called!");
-    }
-
-    /**
-     * Writes a summary of the given results, data etc. to the given file.
-     *
-     * @param targetFile file to write to
-     * @param results    results of the runs
-     * @param dataMap    the outcomes (data) of the runs
-     * @throws IOException if writing to file system fails
-     */
-    public static void save(Path targetFile, Collection<Pair<GoldStandardProject, EvaluationResults<TestLink>>> results,
-            Map<GoldStandardProject, ArDoCoResult> dataMap) throws IOException {
-        var sortedResults = results.stream().sorted().toList();
-        var builder = new StringBuilder();
-
-        builder.append("Time of evaluation: `").append(CommonUtilities.getCurrentTimeAsString()).append("`");
-        builder.append(LINE_SEPARATOR);
-
-        appendOverallResults(sortedResults, builder);
-
-        for (var result : sortedResults) {
-            appendProjectResultSummary(dataMap, builder, result);
-            builder.append(LINE_SEPARATOR);
-        }
-
-        Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
-    }
-
-    private static void appendProjectResultSummary(Map<GoldStandardProject, ArDoCoResult> dataMap, StringBuilder builder,
-            Pair<GoldStandardProject, EvaluationResults<TestLink>> projectResult) {
-        var data = dataMap.get(projectResult.getOne());
-        var text = data.getText();
-
-        var result = projectResult.getTwo();
-
-        var precision = NUMBER_FORMAT.format(result.precision());
-        var recall = NUMBER_FORMAT.format(result.recall());
-        var f1Score = NUMBER_FORMAT.format(result.f1());
-        var truePosCount = result.truePositives().size();
-        var falsePositives = result.falsePositives();
-        var falsePosCount = falsePositives.size();
-        var falseNegatives = result.falseNegatives();
-        var falseNegCount = falseNegatives.size();
-
-        builder.append("# ").append(projectResult.getOne().getProjectName());
-        builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-        builder.append("Summary:").append(LINE_SEPARATOR);
-        builder.append(String.format("- %s Precision / %s Recall / %s F1", precision, recall, f1Score));
-        builder.append(LINE_SEPARATOR);
-        builder.append(String.format("- %s True Positives / %s False Positives / %s False Negatives", truePosCount, falsePosCount, falseNegCount));
-        builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
-        if (!falsePositives.isEmpty()) {
-            var falsePositivesOutput = createFalseLinksOutput("False Positives", falsePositives.castToList(), data, text);
-            builder.append(falsePositivesOutput);
-        }
-
-        if (!falseNegatives.isEmpty()) {
-            var falseNegativesOutput = createFalseLinksOutput("False Negatives", falseNegatives.castToList(), data, text);
-            builder.append(falseNegativesOutput);
-        }
-    }
-
-    private static <T> void appendOverallResults(List<Pair<GoldStandardProject, EvaluationResults<T>>> projectResults, StringBuilder builder) {
-        var results = Lists.mutable.ofAll(projectResults.stream().map(Pair::getTwo).toList());
-        var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable());
-        var macroResults = ResultCalculatorUtil.calculateAverageResults(results.toImmutable());
-        var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults);
-        builder.append(resultString).append(LINE_SEPARATOR);
-        resultString = TestUtil.createResultLogString("Overall Macro", macroResults);
-        builder.append(resultString).append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-    }
-
-    private static String createFalseLinksOutput(String type, List<TestLink> falseLinks, ArDoCoResult data, Text text) {
-        var builder = new StringBuilder();
-        builder.append(type).append(":").append(LINE_SEPARATOR);
-
-        for (TestLink falseLink : falseLinks) {
-            builder.append(createFalseLinkOutput(data, text, falseLink));
-        }
-
-        builder.append(LINE_SEPARATOR);
-        return builder.toString();
-    }
-
-    private static String createFalseLinkOutput(ArDoCoResult data, Text text, TestLink falseLink) {
-        var builder = new StringBuilder();
-        for (var modelId : data.getModelIds()) {
-            var dataModel = data.getModelState(modelId);
-            var line = format(falseLink, text, dataModel);
-            if (line != null && !line.isBlank()) {
-                builder.append("- ").append(line).append(LINE_SEPARATOR);
-            }
-        }
-        return builder.toString();
-    }
-
-    static String format(TestLink link, Text text, LegacyModelExtractionState modelState) {
-        var model = modelState.getInstances().stream().filter(m -> m.getUid().equals(link.modelId())).findAny().orElse(null);
-        var sentence = text.getSentences().stream().filter(s -> s.getSentenceNumber() == link.sentenceNr()).findAny().orElse(null);
-
-        if (model == null && sentence == null) {
-            return null;
-        }
-
-        var modelStr = model == null ? link.modelId() : "\"" + model.getFullName() + "\"";
-        var sentenceStr = sentence == null ? String.valueOf(link.sentenceNr()) : "\"" + sentence.getText() + "\"";
-
-        return String.format("%s ⇔ %s [%s,%s]", modelStr, sentenceStr, link.modelId(), link.sentenceNr());
-    }
-
-}