diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java index b6e35a1..5c2c63b 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java @@ -22,8 +22,8 @@ public String toRow() { public String toRow(String headerKey, String headerVal) { return String.format(Locale.ENGLISH, """ %10s & %4s & %4s & %4s & %4s & %4s & %4s & %4s - %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal, precision(), - recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax()); + %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal, + precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax()); } @Override @@ -52,7 +52,8 @@ public String getExtendedResultStringWithExpected(ExpectedResults expectedResult outputBuilder.append(String.format(Locale.ENGLISH, """ \tPrecision:%8.2f (min. expected: %.2f) \tRecall:%11.2f (min. expected: %.2f) - \tF1:%15.2f (min. expected: %.2f)""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults.f1())); + \tF1:%15.2f (min. expected: %.2f)""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults + .f1())); outputBuilder.append(String.format(Locale.ENGLISH, """ \tAccuracy:%9.2f (min. expected: %.2f) @@ -72,8 +73,8 @@ public String getExplicitResultString() { \tTN:%15d \tFN:%15d \tP:%16d - \tN:%16d""", truePositives().size(), falsePositives().size(), trueNegatives(), falseNegatives().size(), truePositives().size() + falseNegatives().size(), - trueNegatives() + falsePositives().size()); + \tN:%16d""", truePositives().size(), falsePositives().size(), trueNegatives(), falseNegatives().size(), truePositives() + .size() + falseNegatives().size(), trueNegatives() + falsePositives().size()); } public ImmutableList getFound() { diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java index 53e8067..709b62c 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java @@ -1,11 +1,16 @@ /* Licensed under MIT 2023-2024. */ package edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator; +import java.util.List; + import org.eclipse.collections.api.factory.Sets; import org.eclipse.collections.api.list.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator; +import edu.kit.kastel.mcse.ardoco.metrics.result.AggregatedClassificationResult; import edu.kit.kastel.mcse.ardoco.metrics.result.AggregationType; import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult; @@ -13,37 +18,47 @@ * This utility class provides methods to form the average of several {@link EvaluationResults} */ public final class ResultCalculatorUtil { + private static final Logger logger = LoggerFactory.getLogger(ResultCalculatorUtil.class); private ResultCalculatorUtil() { throw new IllegalAccessError(); } public static EvaluationResults calculateAverageResults(ImmutableList> results) { - var calculator = ClassificationMetricsCalculator.getInstance(); - var classifications = results.stream().map(EvaluationResults::classificationResult).toList(); + var averages = getAverages(results); + if (averages == null) + return null; - var averages = calculator.calculateAverages(classifications, null); var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.MACRO_AVERAGE).findFirst().orElseThrow(); - - var macroAverageAsSingle = new SingleClassificationResult(Sets.mutable.empty(), Sets.mutable.empty(), Sets.mutable.empty(), null, macroAverage - .getPrecision(), macroAverage.getRecall(), macroAverage.getF1(), macroAverage.getAccuracy(), macroAverage.getSpecificity(), macroAverage - .getPhiCoefficient(), macroAverage.getPhiCoefficientMax(), macroAverage.getPhiOverPhiMax()); - - return new EvaluationResults<>(macroAverageAsSingle); + return evaluationResults(macroAverage); } public static EvaluationResults calculateWeightedAverageResults(ImmutableList> results) { - var calculator = ClassificationMetricsCalculator.getInstance(); - var classifications = results.stream().map(EvaluationResults::classificationResult).toList(); + var averages = getAverages(results); + if (averages == null) + return null; - var averages = calculator.calculateAverages(classifications, null); var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.WEIGHTED_AVERAGE).findFirst().orElseThrow(); + return evaluationResults(macroAverage); + } - var weightedAverageAsSingle = new SingleClassificationResult(Sets.mutable.empty(), Sets.mutable.empty(), Sets.mutable.empty(), null, macroAverage - .getPrecision(), macroAverage.getRecall(), macroAverage.getF1(), macroAverage.getAccuracy(), macroAverage.getSpecificity(), macroAverage - .getPhiCoefficient(), macroAverage.getPhiCoefficientMax(), macroAverage.getPhiOverPhiMax()); + private static EvaluationResults evaluationResults(AggregatedClassificationResult average) { + var weightedAverageAsSingle = new SingleClassificationResult(Sets.mutable.empty(), Sets.mutable.empty(), Sets.mutable.empty(), null, average + .getPrecision(), average.getRecall(), average.getF1(), average.getAccuracy(), average.getSpecificity(), average.getPhiCoefficient(), average + .getPhiCoefficientMax(), average.getPhiOverPhiMax()); return new EvaluationResults<>(weightedAverageAsSingle); + } + + private static List getAverages(ImmutableList> results) { + if (results.isEmpty()) { + logger.warn("No results to calculate average from"); + return null; + } + + var calculator = ClassificationMetricsCalculator.getInstance(); + var classifications = results.stream().map(EvaluationResults::classificationResult).toList(); + return calculator.calculateAverages(classifications, null); } } diff --git a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java index 9665389..1c120f2 100644 --- a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java +++ b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java @@ -528,13 +528,13 @@ private static Pair>, StringBuilder> inspe private static void inspectRun(StringBuilder outputBuilder, StringBuilder detailedOutputBuilder, MutableList> allResults, ArDoCoResult arDoCoResult, EvaluationResults result) { - var truePositives = result.truePositives().toList(); + var truePositives = result.truePositives(); appendResults(truePositives, detailedOutputBuilder, "True Positives", arDoCoResult, outputBuilder); - var falsePositives = result.falsePositives().toList(); + var falsePositives = result.falsePositives(); appendResults(falsePositives, detailedOutputBuilder, "False Positives", arDoCoResult, outputBuilder); - var falseNegatives = result.falseNegatives().toList(); + var falseNegatives = result.falseNegatives(); appendResults(falseNegatives, detailedOutputBuilder, "False Negatives", arDoCoResult, outputBuilder); allResults.add(result); }