diff --git a/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java b/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java index c91ebde..764e953 100644 --- a/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java +++ b/stages-tlr/text-extraction/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/textextraction/TextExtractionTest.java @@ -24,6 +24,7 @@ import edu.kit.kastel.mcse.ardoco.core.execution.runner.AnonymousRunner; import edu.kit.kastel.mcse.ardoco.core.pipeline.AbstractPipelineStep; import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; +import edu.kit.kastel.mcse.ardoco.core.tests.eval.HistoricProject; import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project; import edu.kit.kastel.mcse.ardoco.core.tests.eval.helper.StageTest; import edu.kit.kastel.mcse.ardoco.tlr.text.providers.TextPreprocessingAgent; @@ -40,9 +41,7 @@ protected TextExtractionResult runComparable(TextProject project, SortedMap disambiguations; - TextProject(Project project, List disambiguations) { + TextProject(GoldStandardProject project, List disambiguations) { this.project = project; this.disambiguations = Lists.immutable.ofAll(disambiguations); } diff --git a/tests/integration-tests/tests-base/pom.xml b/tests/integration-tests/tests-base/pom.xml index ebad13e..279977a 100644 --- a/tests/integration-tests/tests-base/pom.xml +++ b/tests/integration-tests/tests-base/pom.xml @@ -17,6 +17,12 @@ com.tngtech.archunit archunit-junit5 + + io.github.ardoco + metrics + 0.1.1-SNAPSHOT + compile + io.github.ardoco.core common diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java index ba2a76c..7adba91 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java @@ -3,17 +3,14 @@ import java.util.Locale; import java.util.Set; -import java.util.stream.Collectors; import org.eclipse.collections.api.collection.ImmutableCollection; -import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.list.ImmutableList; import org.slf4j.Logger; import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix; +import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator; /** * This utility class provides methods for running the tests, especially regarding the evaluations. @@ -37,20 +34,18 @@ public static EvaluationResults compareTLR(ArDoCoResult arDoCoResult, Imm Set distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection()); Set distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection()); - // True Positives are the trace links that are contained on both lists - Set truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet()); - ImmutableList truePositivesList = Lists.immutable.ofAll(truePositives); + int sentences = arDoCoResult.getText().getSentences().size(); + int modelElements = 0; + for (var model : arDoCoResult.getModelIds()) { + modelElements += arDoCoResult.getModelState(model).getInstances().size(); + } - // False Positives are the trace links that are only contained in the result set - Set falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet()); - ImmutableList falsePositivesList = Lists.immutable.ofAll(falsePositives); + int confusionMatrixSum = sentences * modelElements; - // False Negatives are the trace links that are only contained in the gold standard - Set falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet()); - ImmutableList falseNegativesList = Lists.immutable.ofAll(falseNegatives); + var calculator = ClassificationMetricsCalculator.getInstance(); - int trueNegatives = TestUtil.calculateTrueNegativesForTLR(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size()); - return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList)); + var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum); + return new EvaluationResults<>(classification); } /** @@ -67,57 +62,11 @@ public static EvaluationResults compareInconsistencies(ArDoCoResult arDoC Set distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection()); Set distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection()); - // True Positives are the trace links that are contained on both lists - Set truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet()); - ImmutableList truePositivesList = Lists.immutable.ofAll(truePositives); - - // False Positives are the trace links that are only contained in the result set - Set falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet()); - ImmutableList falsePositivesList = Lists.immutable.ofAll(falsePositives); - - // False Negatives are the trace links that are only contained in the gold standard - Set falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet()); - ImmutableList falseNegativesList = Lists.immutable.ofAll(falseNegatives); - - int trueNegatives = TestUtil.calculateTrueNegativesForInconsistencies(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size()); - return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList)); - } - - /** - * Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the - * total sum of all entries in the confusion matrix and then substracts the true positives, false positives, and false negatives. - * - * @param arDoCoResult the output of ArDoCo - * @param truePositives nr of true positives - * @param falsePositives nr of false positives - * @param falseNegatives nr of false negatives - * @return the number of true negatives - */ - public static int calculateTrueNegativesForTLR(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) { - int sentences = arDoCoResult.getText().getSentences().size(); - int modelElements = 0; - for (var model : arDoCoResult.getModelIds()) { - modelElements += arDoCoResult.getModelState(model).getInstances().size(); - } - - int confusionMatrixSum = sentences * modelElements; - return confusionMatrixSum - (truePositives + falsePositives + falseNegatives); - } - - /** - * Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the - * total sum of all sentences in the {@link ArDoCoResult} and then substracts the true positives, false positives, and false negatives. - * - * @param arDoCoResult the output of ArDoCo - * @param truePositives nr of true positives - * @param falsePositives nr of false positives - * @param falseNegatives nr of false negatives - * @return the number of true negatives - */ - public static int calculateTrueNegativesForInconsistencies(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) { - int numberOfSentences = arDoCoResult.getText().getSentences().size(); - return numberOfSentences - (truePositives + falsePositives + falseNegatives); + int confusionMatrixSum = arDoCoResult.getText().getSentences().size(); + var calculator = ClassificationMetricsCalculator.getInstance(); + var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum); + return new EvaluationResults<>(classification); } /** @@ -168,19 +117,6 @@ public static void logExplicitResults(Logger logger, String name, EvaluationResu logger.info(logString); } - /** - * Log the provided {@link EvaluationResults} using the provided logger and name. Additionally, provided the expected results. - * - * @param logger Logger to use - * @param name Name to show in the output - * @param results the results - * @param expectedResults the expected results - */ - public static void logResultsWithExpected(Logger logger, String name, EvaluationResults results, ExpectedResults expectedResults) { - var infoString = String.format(Locale.ENGLISH, "%n%s:%n%s", name, results.getResultStringWithExpected(expectedResults)); - logger.info(infoString); - } - public static void logExtendedResultsWithExpected(Logger logger, Object testClass, String name, EvaluationResults results, ExpectedResults expectedResults) { var infoString = String.format(Locale.ENGLISH, """ diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java index ac86729..6dfab30 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java @@ -50,7 +50,7 @@ public class DeterministicArDoCoTest { @ArchTest public static final ArchRule forbidUnorderedSetsAndMaps = noClasses().that() - .resideOutsideOfPackages("..tests..") + .resideOutsideOfPackages("..tests..", "..metrics..") .and(areNotDirectlyAnnotatedWith(Deterministic.class)) .should() .accessClassesThat(areForbiddenClasses()) diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java index 31255a6..bab51bb 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java @@ -5,6 +5,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; @@ -159,8 +160,7 @@ public String getCodeModelDirectory() { loadCodeModelFromResourcesIfNeeded(); return getTemporaryCodeLocation().getAbsolutePath(); } catch (IOException e) { - logger.error(e.getMessage(), e); - return null; + throw new UncheckedIOException(e); } } diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java deleted file mode 100644 index cf9c0a2..0000000 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java +++ /dev/null @@ -1,212 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.core.tests.eval; - -import java.math.BigDecimal; -import java.math.MathContext; - -public class EvaluationMetrics { - private EvaluationMetrics() throws IllegalAccessException { - throw new IllegalAccessException(); - } - - /** - * Checks the provided recall. Returns 1.0 if it is NaN, because this means that there was no missing - * classification. - * - * @param recall the precision - * @return 1.0 if recall is NaN, else the original value - */ - public static double checkAndRepairRecall(double recall) { - if (Double.isNaN(recall)) { - return 1.0; - } - return recall; - } - - /** - * Calculates the recall for the given True Positives (TPs) and False Negatives (FNs). If TP+NP=0, then returns 1.0 - * because there was no missing element. - * - * @param truePositives number of TPs - * @param falseNegatives number of FNs - * @return the Recall; 1.0 iff TP+NP=0 - */ - public static double calculateRecall(int truePositives, int falseNegatives) { - double denominator = (truePositives + falseNegatives); - var recall = 1.0 * truePositives / denominator; - return checkAndRepairRecall(recall); - } - - /** - * Checks the provided precision. Returns 1.0 if it is NaN, because this means that there was no wrong - * classification. - * - * @param precision the precision - * @return 1.0 if precision is NaN, else the original value - */ - public static double checkAndRepairPrecision(double precision) { - if (Double.isNaN(precision)) { - return 1.0; - } - return precision; - } - - /** - * Calculates the precision for the given True Positives (TPs) and False Positives (FPs). If TP+FP=0, then returns - * 1.0 because there was no wrong classification. - * - * @param truePositives number of TPs - * @param falsePositives number of FPs - * @return the Precision; 1.0 iff TP+FP=0 - */ - public static double calculatePrecision(int truePositives, int falsePositives) { - double denominator = (truePositives + falsePositives); - var precision = 1.0 * truePositives / denominator; - return checkAndRepairPrecision(precision); - } - - /** - * Checks the provided F1-score. Iff it is NaN, returns 0.0, otherwise returns the original value - * - * @param f1 the f1-score to check - * @return Iff score is NaN, returns 0.0, otherwise returns the original value - */ - public static double checkAndRepairF1(double f1) { - if (Double.isNaN(f1)) { - return 0.0; - } - return f1; - } - - /** - * Calculates the F1-score using the provided precision and recall. If precision+recall=0, returns 0.0. - * - * @param precision the precision - * @param recall the recall - * @return the F1-Score; 0.0 iff precision+recall=0 - */ - public static double calculateF1(double precision, double recall) { - var f1 = 2 * precision * recall / (precision + recall); - return checkAndRepairF1(f1); - } - - /** - * Calculates the F1-score using the provided True Positives (TPs), False Positives (FPs), and False Negatives - * (FNs). If intermediate calculation shows that precision+recall=0, returns 0.0. - * - * @param truePositives number of TPs - * @param falsePositives number of FPs - * @param falseNegatives number of FNs - * @return the F1-score. See also {@link #calculateF1(double, double)} - */ - public static double calculateF1(int truePositives, int falsePositives, int falseNegatives) { - var precision = calculatePrecision(truePositives, falsePositives); - var recall = calculateRecall(truePositives, falseNegatives); - return calculateF1(precision, recall); - } - - /** - * Calculates the accuracy based on the true positives, false positives, false negatives, and true negatives. - * - * @see Wikipedia: Accuracy and Precision - * @return the accuracy - */ - public static double calculateAccuracy(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) { - double numerator = truePositives + trueNegatives; - double denominator = truePositives + falsePositives + falseNegatives + trueNegatives; - return numerator / denominator; - } - - /** - * Returns the Phi Coefficient (also known as mean square contingency coefficient (MCC)) based on the true positives, false positives, false negatives, and - * true negatives. - * The return value lies between -1 and +1. -1 show perfect disagreement, +1 shows perfect agreement and 0 indicates no relationship. - * Therefore, good values should be close to +1. - * - * @see Wikipedia: Phi coefficient - * - * @return the value for Phi Coefficient (or MCC) - */ - public static double calculatePhiCoefficient(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) { - var tp = BigDecimal.valueOf(truePositives); - var fp = BigDecimal.valueOf(falsePositives); - var fn = BigDecimal.valueOf(falseNegatives); - var tn = BigDecimal.valueOf(trueNegatives); - - var num = tp.multiply(tn).subtract((fp.multiply(fn))); - - var a = tp.add(fp); - var b = tp.add(fn); - var c = tn.add(fp); - var d = tn.add(fn); - if (a.equals(BigDecimal.ZERO) || b.equals(BigDecimal.ZERO) || c.equals(BigDecimal.ZERO) || d.equals(BigDecimal.ZERO)) { - return 0d; - } - - var productOfSumsInDenominator = a.multiply(b).multiply(c).multiply(d); - var denominator = productOfSumsInDenominator.sqrt(MathContext.DECIMAL128); - - return num.divide(denominator, MathContext.DECIMAL128).doubleValue(); - } - - /** - * Calculates the maximum possible value of the phi coefficient given the four values of the confusion matrix (TP, FP, FN, TN). - * - * @see Paper about PhiMax by Ferguson (1941) - * @see Paper about Phi/PhiMax by Davenport et al. (1991) - * @param truePositives number of true positives - * @param falsePositives number of false positives - * @param falseNegatives number of false negatives - * @param trueNegatives number of true negatives - * @return The maximum possible value of phi. - */ - public static double calculatePhiCoefficientMax(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) { - var tp = BigDecimal.valueOf(truePositives); - var fp = BigDecimal.valueOf(falsePositives); - var fn = BigDecimal.valueOf(falseNegatives); - var tn = BigDecimal.valueOf(trueNegatives); - - var test = fn.add(tp).compareTo(fp.add(tp)) >= 0; - var nominator = (fp.add(tn)).multiply(tp.add(fp)).sqrt(MathContext.DECIMAL128); - var denominator = (fn.add(tn)).multiply(tp.add(fn)).sqrt(MathContext.DECIMAL128); - if (test) { - // standard case - return nominator.divide(denominator, MathContext.DECIMAL128).doubleValue(); - } else { - // if test is not true, you have to swap nominator and denominator as then you have to mirror the confusion matrix (,i.e., swap TP and TN) - return denominator.divide(nominator, MathContext.DECIMAL128).doubleValue(); - } - } - - /** - * Calculates the normalized phi correlation coefficient value that is phi divided by its maximum possible value. - * - * @see Paper about Phi/PhiMax - * @param truePositives number of true positives - * @param falsePositives number of false positives - * @param falseNegatives number of false negatives - * @param trueNegatives number of true negatives - * @return The value of Phi/PhiMax - */ - public static double calculatePhiOverPhiMax(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) { - var phi = calculatePhiCoefficient(truePositives, falsePositives, falseNegatives, trueNegatives); - var phiMax = calculatePhiCoefficientMax(truePositives, falsePositives, falseNegatives, trueNegatives); - return phi / phiMax; - } - - /** - * Calculates the specificity, also known as selectivity or true negative rate, based on the number of true negatives and false positives. - * - * @see Wikipedia: Sensitivity and specificity - * @param trueNegatives the number of true negatives - * @param falsePositives the number of false positives - * @return the specificity - */ - public static double calculateSpecificity(int trueNegatives, int falsePositives) { - double specificity = trueNegatives / ((double) trueNegatives + falsePositives); - if (Double.isNaN(specificity)) { - return 1.0; - } - return specificity; - } -} diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java new file mode 100644 index 0000000..b28cc51 --- /dev/null +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java @@ -0,0 +1,222 @@ +/* Licensed under MIT 2021-2024. */ +package edu.kit.kastel.mcse.ardoco.core.tests.eval; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.eclipse.collections.api.factory.Lists; +import org.eclipse.collections.api.list.ImmutableList; +import org.eclipse.collections.api.list.MutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType; +import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel; +import edu.kit.kastel.mcse.ardoco.core.execution.ConfigurationHelper; +import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults; + +/** + * This enum captures the different case studies that are used for evaluation in the integration tests (only historic versions). + */ +public enum HistoricProject implements GoldStandardProject { + TEASTORE_HISTORICAL( // + "TS-H", // + "/benchmark/teastore/model_2020/pcm/teastore.repository", // + "/benchmark/teastore/text_2018/teastore_2018_AB.txt", // + "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB.csv", // + "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt + "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB_UME.csv", // + new ExpectedResults(.999, .740, .850, .984, .853, .999), // + new ExpectedResults(.082, .983, .153, .332, .147, .291) // + ), // + TEAMMATES_HISTORICAL( // + "TM-H", // + "/benchmark/teammates/model_2021/pcm/teammates.repository", // + "/benchmark/teammates/text_2015/teammates_2015.txt", // + "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021.csv", // + "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt + "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", // + new ExpectedResults(.524, .695, .597, .970, .589, .979), // + new ExpectedResults(.161, .695, .262, .878, .295, .884) // + ), // + BIGBLUEBUTTON_HISTORICAL( // + "BBB-H", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", // + "/benchmark/bigbluebutton/text_2015/bigbluebutton_2015.txt", // + "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021.csv", // + "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt + "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", // + new ExpectedResults(.807, .617, .699, .978, .695, .993), // + new ExpectedResults(.048, .176, .076, .829, .018, .857) // + ), // + + JABREF_HISTORICAL( // + "JR-H", "/benchmark/jabref/model_2021/pcm/jabref.repository", // + "/benchmark/jabref/text_2016/jabref_2016.txt", // + "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021.csv", // + "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt + "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021_UME.csv", // + new ExpectedResults(.817, .999, .899, .966, .886, .960), // + new ExpectedResults(.083, .111, .095, .683, -0.094, .784) // + ); + + private static final Logger logger = LoggerFactory.getLogger(HistoricProject.class); + + private final String alias; + private final String model; + private final String textFile; + private final String configurationsFile; + private final String goldStandardTraceabilityLinkRecovery; + private final String goldStandardMissingTextForModelElement; + private final ExpectedResults expectedTraceLinkResults; + private final ExpectedResults expectedInconsistencyResults; + private final SortedSet resourceNames; + + HistoricProject(String alias, String model, String textFile, String goldStandardTraceabilityLinkRecovery, String configurationsFile, + String goldStandardMissingTextForModelElement, ExpectedResults expectedTraceLinkResults, ExpectedResults expectedInconsistencyResults) { + this.alias = alias; + this.model = model; + this.textFile = textFile; + this.configurationsFile = configurationsFile; + this.goldStandardTraceabilityLinkRecovery = goldStandardTraceabilityLinkRecovery; + this.goldStandardMissingTextForModelElement = goldStandardMissingTextForModelElement; + this.expectedTraceLinkResults = expectedTraceLinkResults; + this.expectedInconsistencyResults = expectedInconsistencyResults; + resourceNames = new TreeSet<>(List.of(model, textFile, goldStandardTraceabilityLinkRecovery, configurationsFile, + goldStandardMissingTextForModelElement)); + } + + @Override + public String getAlias() { + return alias; + } + + @Override + public File getModelFile() { + return ProjectHelper.loadFileFromResources(model); + } + + @Override + public String getModelResourceName() { + return model; + } + + @Override + public File getModelFile(ArchitectureModelType modelType) { + return switch (modelType) { + case PCM -> getModelFile(); + case UML -> ProjectHelper.loadFileFromResources(model.replace("/pcm/", "/uml/").replace(".repository", ".uml")); + }; + } + + @Override + public String getModelResourceName(ArchitectureModelType modelType) { + return switch (modelType) { + case PCM -> model; + case UML -> model.replace("/pcm/", "/uml/").replace(".repository", ".uml"); + }; + } + + @Override + public File getTextFile() { + return ProjectHelper.loadFileFromResources(textFile); + } + + @Override + public String getTextResourceName() { + return textFile; + } + + @Override + public SortedMap getAdditionalConfigurations() { + return ConfigurationHelper.loadAdditionalConfigs(getAdditionalConfigurationsFile()); + } + + @Override + public File getAdditionalConfigurationsFile() { + return ProjectHelper.loadFileFromResources(this.configurationsFile); + } + + @Override + public String getAdditionalConfigurationsResourceName() { + return configurationsFile; + } + + @Override + public File getTlrGoldStandardFile() { + return ProjectHelper.loadFileFromResources(goldStandardTraceabilityLinkRecovery); + } + + @Override + public String getTlrGoldStandardResourceName() { + return goldStandardTraceabilityLinkRecovery; + } + + @Override + public ImmutableList getTlrGoldStandard() { + var path = Paths.get(this.getTlrGoldStandardFile().toURI()); + List goldLinks = Lists.mutable.empty(); + try { + goldLinks = Files.readAllLines(path); + } catch (IOException e) { + logger.error(e.getMessage(), e); + } + goldLinks.removeFirst(); + goldLinks.removeIf(String::isBlank); + return Lists.immutable.ofAll(goldLinks); + } + + @Override + public GoldStandard getTlrGoldStandard(ArchitectureModel architectureModel) { + return new GoldStandard(getTlrGoldStandardFile(), architectureModel); + } + + @Override + public MutableList getMissingTextForModelElementGoldStandard() { + var path = Paths.get(this.getMissingTextForModelElementGoldStandardFile().toURI()); + List goldLinks = Lists.mutable.empty(); + try { + goldLinks = Files.readAllLines(path); + } catch (IOException e) { + logger.error(e.getMessage(), e); + } + goldLinks.remove("missingModelElementID"); + goldLinks.removeIf(String::isBlank); + return Lists.mutable.ofAll(goldLinks); + } + + @Override + public File getMissingTextForModelElementGoldStandardFile() { + return ProjectHelper.loadFileFromResources(goldStandardMissingTextForModelElement); + } + + @Override + public String getMissingTextForModelElementGoldStandardResourceName() { + return goldStandardMissingTextForModelElement; + } + + @Override + public ExpectedResults getExpectedTraceLinkResults() { + return expectedTraceLinkResults; + } + + @Override + public ExpectedResults getExpectedInconsistencyResults() { + return expectedInconsistencyResults; + } + + @Override + public String getProjectName() { + return this.name(); + } + + @Override + public SortedSet getResourceNames() { + return new TreeSet<>(resourceNames); + } +} diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java index b2929bd..d6d6b27 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java @@ -33,7 +33,7 @@ public enum Project implements GoldStandardProject { "/configurations/ms/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt "/benchmark/mediastore/goldstandards/goldstandard_sad_2016-sam_2016_UME.csv", // new ExpectedResults(.999, .620, .765, .978, .778, .999), // - new ExpectedResults(.212, .792, .328, .702, .227, .690) // + new ExpectedResults(.127, .793, .220, .685, .227, .679) // ), // TEASTORE( // "TS", // @@ -43,17 +43,7 @@ public enum Project implements GoldStandardProject { "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt "/benchmark/teastore/goldstandards/goldstandard_sad_2020-sam_2020_UME.csv", // new ExpectedResults(.999, .740, .850, .984, .853, .999), // - new ExpectedResults(.962, .703, .784, .957, .808, .994) // - ), // - TEASTORE_HISTORICAL( // - "TS-H", // - "/benchmark/teastore/model_2020/pcm/teastore.repository", // - "/benchmark/teastore/text_2018/teastore_2018_AB.txt", // - "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB.csv", // - "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt - "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB_UME.csv", // - new ExpectedResults(.999, .740, .850, .984, .853, .999), // - new ExpectedResults(.163, .982, .278, .376, .146, .289) // + new ExpectedResults(.950, .703, .808, .980, .808, .998) // ), // TEAMMATES( // "TM", // @@ -63,17 +53,7 @@ public enum Project implements GoldStandardProject { "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt "/benchmark/teammates/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", // new ExpectedResults(.555, .882, .681, .965, .688, .975), // - new ExpectedResults(.175, .745, .279, .851, .287, .851) // - ), // - TEAMMATES_HISTORICAL( // - "TM-H", // - "/benchmark/teammates/model_2021/pcm/teammates.repository", // - "/benchmark/teammates/text_2015/teammates_2015.txt", // - "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021.csv", // - "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt - "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", // - new ExpectedResults(.524, .695, .597, .970, .589, .979), // - new ExpectedResults(.168, .629, .263, .863, .260, .870) // + new ExpectedResults(.147, .745, .245, .852, .287, .856) // ), // BIGBLUEBUTTON( // "BBB", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", // @@ -82,16 +62,7 @@ public enum Project implements GoldStandardProject { "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", // new ExpectedResults(.875, .826, .850, .985, .835, .985), // - new ExpectedResults(.887, .461, .429, .956, .534, .984) // - ), // - BIGBLUEBUTTON_HISTORICAL( // - "BBB-H", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", // - "/benchmark/bigbluebutton/text_2015/bigbluebutton_2015.txt", // - "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021.csv", // - "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt - "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", // - new ExpectedResults(.807, .617, .699, .978, .695, .993), // - new ExpectedResults(.085, .175, .111, .813, .018, .869) // + new ExpectedResults(.666, .461, .545, .960, .535, .988) // ), // JABREF( // "JR", "/benchmark/jabref/model_2021/pcm/jabref.repository", // @@ -100,16 +71,7 @@ public enum Project implements GoldStandardProject { "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt "/benchmark/jabref/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", // new ExpectedResults(.899, .999, .946, .973, .932, .966), // - new ExpectedResults(1.0, .443, .443, .845, .616, 1.0) // - ), // - JABREF_HISTORICAL( // - "JR-H", "/benchmark/jabref/model_2021/pcm/jabref.repository", // - "/benchmark/jabref/text_2016/jabref_2016.txt", // - "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021.csv", // - "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt - "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021_UME.csv", // - new ExpectedResults(.817, .999, .899, .966, .886, .960), // - new ExpectedResults(.110, .110, .110, .366, -.249, .475) // + new ExpectedResults(1.0, .444, .615, .871, .617, 1.0) // ); private static final Logger logger = LoggerFactory.getLogger(Project.class); @@ -179,11 +141,7 @@ public String getTextResourceName() { return textFile; } - /** - * Return the map of additional configuration options - * - * @return the map of additional configuration options - */ + @Override public SortedMap getAdditionalConfigurations() { return ConfigurationHelper.loadAdditionalConfigs(getAdditionalConfigurationsFile()); } @@ -217,7 +175,7 @@ public ImmutableList getTlrGoldStandard() { } catch (IOException e) { logger.error(e.getMessage(), e); } - goldLinks.remove(0); + goldLinks.removeFirst(); goldLinks.removeIf(String::isBlank); return Lists.immutable.ofAll(goldLinks); } diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java index d2e23c5..45a5465 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java @@ -5,11 +5,9 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UncheckedIOException; import java.util.concurrent.atomic.AtomicBoolean; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - /** * Helper class for {@link GoldStandardProject} implementations. */ @@ -18,7 +16,6 @@ public class ProjectHelper { * If set to false. The CodeProject will place the codeModel.acm file from the benchmark to the project directory. */ public static final AtomicBoolean ANALYZE_CODE_DIRECTLY = new AtomicBoolean(false); - private static final Logger logger = LoggerFactory.getLogger(ProjectHelper.class); private ProjectHelper() { throw new IllegalAccessError(); @@ -33,7 +30,7 @@ private ProjectHelper() { public static File loadFileFromResources(String resource) { InputStream is = ProjectHelper.class.getResourceAsStream(resource); if (is == null) - return null; + throw new IllegalArgumentException("Resource not found: " + resource); try { File temporaryFile = File.createTempFile("ArDoCo", ".tmp"); temporaryFile.deleteOnExit(); @@ -44,8 +41,7 @@ public static File loadFileFromResources(String resource) { } return temporaryFile; } catch (IOException e) { - logger.error(e.getMessage(), e); - return null; + throw new UncheckedIOException(e); } } } diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java deleted file mode 100644 index 39a360b..0000000 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java +++ /dev/null @@ -1,58 +0,0 @@ -/* Licensed under MIT 2023-2024. */ -package edu.kit.kastel.mcse.ardoco.core.tests.eval.results; - -import org.eclipse.collections.api.factory.Lists; - -/** - * used to form the average of several {@link EvaluationResults} - * - * @param type of the {@link EvaluationResults} - */ -public class EvaluationResultVector { - private double precision = 0.0; - private double recall = 0.0; - private double f1 = 0.0; - private double accuracy = 0.0; - private double phiCoefficient = 0.0; - private double specificity = 0.0; - private double phiCoefficientMax = 0.0; - private double phiOverPhiMax = 0.0; - - public void add(EvaluationResults results) { - precision += results.precision(); - recall += results.recall(); - f1 += results.f1(); - accuracy += results.accuracy(); - specificity += results.specificity(); - phiCoefficient += results.phiCoefficient(); - phiCoefficientMax += results.phiCoefficientMax(); - phiOverPhiMax += results.phiOverPhiMax(); - } - - public void scale(double scale) { - precision /= scale; - recall /= scale; - f1 /= scale; - accuracy /= scale; - specificity /= scale; - phiCoefficient /= scale; - phiCoefficientMax /= scale; - phiOverPhiMax /= scale; - } - - public void addWeighted(EvaluationResults results, int weight) { - precision += results.precision() * weight; - recall += results.recall() * weight; - f1 += results.f1() * weight; - accuracy += results.accuracy() * weight; - specificity += results.specificity() * weight; - phiCoefficient += results.phiCoefficient() * weight; - phiCoefficientMax += results.phiCoefficientMax() * weight; - phiOverPhiMax += results.phiOverPhiMax() * weight; - } - - public EvaluationResults toEvaluationResults() { - return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, - phiCoefficient, specificity, phiCoefficientMax, phiOverPhiMax); - } -} diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java index 9332bb8..0db56b3 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java @@ -1,30 +1,29 @@ /* Licensed under MIT 2023-2024. */ package edu.kit.kastel.mcse.ardoco.core.tests.eval.results; +import java.util.List; import java.util.Locale; import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.ImmutableList; import org.eclipse.collections.api.list.MutableList; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.EvaluationMetrics; +import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult; -public record EvaluationResults(double precision, double recall, double f1, ImmutableList truePositives, int trueNegatives, - ImmutableList falseNegatives, ImmutableList falsePositives, double accuracy, double phiCoefficient, double specificity, - double phiCoefficientMax, double phiOverPhiMax) { +public record EvaluationResults(SingleClassificationResult classificationResult) { public String toRow() { return String.format(Locale.ENGLISH, """ %4s & %4s & %4s & %4s & %4s & %4s & %4s - %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", precision, recall, f1, accuracy, - specificity, phiCoefficient, phiOverPhiMax); + %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", precision(), recall(), f1(), accuracy(), + specificity(), phiCoefficient(), phiOverPhiMax()); } public String toRow(String headerKey, String headerVal) { return String.format(Locale.ENGLISH, """ %10s & %4s & %4s & %4s & %4s & %4s & %4s & %4s - %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal, precision, - recall, f1, accuracy, specificity, phiCoefficient, phiOverPhiMax); + %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal, + precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax()); } @Override @@ -37,7 +36,7 @@ public String toString() { \tSpecificity:%6.2f \tPhi Coef.:%8.2f \tPhi/PhiMax:%7.2f (Phi Max: %.2f) - %s""", precision, recall, f1, accuracy, specificity, phiCoefficient, phiOverPhiMax, phiCoefficientMax, toRow()); + %s""", precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax(), phiCoefficientMax(), toRow()); } public String getResultStringWithExpected(ExpectedResults expectedResults) { @@ -45,7 +44,7 @@ public String getResultStringWithExpected(ExpectedResults expectedResults) { \tPrecision:%8.2f (min. expected: %.2f) \tRecall:%11.2f (min. expected: %.2f) \tF1:%15.2f (min. expected: %.2f) - %s""", precision, expectedResults.precision(), recall, expectedResults.recall(), f1, expectedResults.f1(), toRow()); + %s""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults.f1(), toRow()); } public String getExtendedResultStringWithExpected(ExpectedResults expectedResults) { @@ -53,16 +52,17 @@ public String getExtendedResultStringWithExpected(ExpectedResults expectedResult outputBuilder.append(String.format(Locale.ENGLISH, """ \tPrecision:%8.2f (min. expected: %.2f) \tRecall:%11.2f (min. expected: %.2f) - \tF1:%15.2f (min. expected: %.2f)""", precision, expectedResults.precision(), recall, expectedResults.recall(), f1, expectedResults.f1())); + \tF1:%15.2f (min. expected: %.2f)""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults + .f1())); outputBuilder.append(String.format(Locale.ENGLISH, """ \tAccuracy:%9.2f (min. expected: %.2f) - \tSpecificity:%6.2f (min. expected: %.2f)""", accuracy, expectedResults.accuracy(), specificity, expectedResults.specificity())); + \tSpecificity:%6.2f (min. expected: %.2f)""", accuracy(), expectedResults.accuracy(), specificity(), expectedResults.specificity())); outputBuilder.append(String.format(Locale.ENGLISH, """ \tPhi Coef.:%8.2f (min. expected: %.2f) \tPhi/PhiMax:%7.2f (Phi Max: %.2f) - %s""", phiCoefficient, expectedResults.phiCoefficient(), phiOverPhiMax, phiCoefficientMax, toRow())); + %s""", phiCoefficient(), expectedResults.phiCoefficient(), phiOverPhiMax(), phiCoefficientMax(), toRow())); return outputBuilder.toString(); } @@ -73,64 +73,62 @@ public String getExplicitResultString() { \tTN:%15d \tFN:%15d \tP:%16d - \tN:%16d""", truePositives.size(), falsePositives.size(), trueNegatives, falseNegatives.size(), truePositives.size() + falseNegatives.size(), - trueNegatives + falsePositives.size()); - } - - /** - * returns the weight (truePos + falseNeg) - * - * @return the weight - */ - public int getWeight() { - return this.truePositives().size() + this.falseNegatives().size(); + \tN:%16d""", truePositives().size(), falsePositives().size(), trueNegatives(), falseNegatives().size(), truePositives() + .size() + falseNegatives().size(), trueNegatives() + falsePositives().size()); } public ImmutableList getFound() { MutableList found = Lists.mutable.empty(); - found.addAll(truePositives.castToCollection()); - found.addAll(falsePositives.castToCollection()); + found.addAll(classificationResult.getTruePositives()); + found.addAll(classificationResult.getFalsePositives()); return found.toImmutable(); } - /** - * creates new {@link EvaluationResults} from a {@link ResultMatrix} - * - * @param matrix the {@link ResultMatrix} - * @return new {@link EvaluationResults} - */ - public static EvaluationResults createEvaluationResults(ResultMatrix matrix) { - int nrTruePos = matrix.truePositives().size(); - int nrTrueNeg = matrix.trueNegatives(); - int nrFalsePos = matrix.falsePositives().size(); - int nrFalseNeg = matrix.falseNegatives().size(); - - double precision = EvaluationMetrics.calculatePrecision(nrTruePos, nrFalsePos); - double recall = EvaluationMetrics.calculateRecall(nrTruePos, nrFalseNeg); - double f1 = EvaluationMetrics.calculateF1(precision, recall); - - double accuracy = 0; - double phiCoefficient = 0; - double specificity = 0; - double phiCoefficientMax = 0; - double phiOverPhiMax = 0; - - if (nrTruePos + nrFalsePos + nrFalseNeg + nrTrueNeg != 0) { - accuracy = EvaluationMetrics.calculateAccuracy(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg); - } - phiCoefficient = EvaluationMetrics.calculatePhiCoefficient(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg); - if (nrTrueNeg + nrFalsePos != 0) { - specificity = EvaluationMetrics.calculateSpecificity(nrTrueNeg, nrFalsePos); - } - if ((nrFalseNeg + nrTrueNeg) * (nrTruePos + nrFalseNeg) != 0) { - phiCoefficientMax = EvaluationMetrics.calculatePhiCoefficientMax(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg); - } - if (phiCoefficientMax != 0) { - phiOverPhiMax = EvaluationMetrics.calculatePhiOverPhiMax(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg); - } - - return new EvaluationResults<>(precision, recall, f1, matrix.truePositives(), matrix.trueNegatives(), matrix.falseNegatives(), matrix.falsePositives(), - accuracy, phiCoefficient, specificity, phiCoefficientMax, phiOverPhiMax); + public double precision() { + return classificationResult.getPrecision(); + } + + public double recall() { + return classificationResult.getRecall(); + } + + public double f1() { + return classificationResult.getF1(); + } + + public double accuracy() { + return classificationResult.getAccuracy(); + } + + public double specificity() { + return classificationResult.getSpecificity(); + } + + public double phiCoefficient() { + return classificationResult.getPhiCoefficient(); } + public double phiOverPhiMax() { + return classificationResult.getPhiOverPhiMax(); + } + + public double phiCoefficientMax() { + return classificationResult.getPhiCoefficientMax(); + } + + public List truePositives() { + return classificationResult.getTruePositives().stream().toList(); + } + + public List falsePositives() { + return classificationResult.getFalsePositives().stream().toList(); + } + + public List falseNegatives() { + return classificationResult.getFalseNegatives().stream().toList(); + } + + public int trueNegatives() { + return classificationResult.getTrueNegatives(); + } } diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java deleted file mode 100644 index 70dcab7..0000000 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java +++ /dev/null @@ -1,15 +0,0 @@ -/* Licensed under MIT 2023-2024. */ -package edu.kit.kastel.mcse.ardoco.core.tests.eval.results; - -import org.eclipse.collections.api.list.ImmutableList; - -/** - * reprensents the results in the form of a matrix - * - * @param truePositives the true positives - * @param trueNegatives the true negatives - * @param falsePositives the false positives - * @param falseNegatives the false negatives - */ -public record ResultMatrix(ImmutableList truePositives, int trueNegatives, ImmutableList falsePositives, ImmutableList falseNegatives) { -} diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java index 6ba099e..91dc84d 100644 --- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java +++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java @@ -1,95 +1,73 @@ /* Licensed under MIT 2023-2024. */ package edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator; -import org.eclipse.collections.api.factory.Lists; +import java.util.List; + +import org.eclipse.collections.api.factory.Sets; import org.eclipse.collections.api.list.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.EvaluationMetrics; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResultVector; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; +import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator; +import edu.kit.kastel.mcse.ardoco.metrics.result.AggregatedClassificationResult; +import edu.kit.kastel.mcse.ardoco.metrics.result.AggregationType; +import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult; /** * This utility class provides methods to form the average of several {@link EvaluationResults} */ public final class ResultCalculatorUtil { + private static final Logger logger = LoggerFactory.getLogger(ResultCalculatorUtil.class); private ResultCalculatorUtil() { throw new IllegalAccessError(); } - public static EvaluationResults calculateAverageResults(ImmutableList> results) { - int norm = results.size(); - EvaluationResultVector vector = new EvaluationResultVector<>(); - - for (var result : results) { - var weight = result.getWeight(); - if (weight <= 0) { - norm--; - continue; - } - vector.add(result); - } + public static EvaluationResults calculateMacroAverageResults(ImmutableList> results) { + var averages = getAverages(results); + if (averages == null) + return null; - vector.scale(norm); - return vector.toEvaluationResults(); + var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.MACRO_AVERAGE).findFirst().orElseThrow(); + return evaluationResults(macroAverage); } public static EvaluationResults calculateWeightedAverageResults(ImmutableList> results) { - double weight = 0.0; - double precision = .0; - double recall = 0.0; - double f1 = 0.0; - double accuracy = 0.0; - double specificity = 0.0; - double phi = 0.0; - double phiMax = 0.0; - double phiOverPhiMax = 0.0; - int truePositives = 0; - int trueNegatives = 0; - int falsePositives = 0; - int falseNegatives = 0; - - for (var result : results) { - double localWeight = result.getWeight(); - weight += localWeight; - - precision += localWeight * result.precision(); - recall += localWeight * result.recall(); - f1 += localWeight * result.f1(); - - accuracy += localWeight * result.accuracy(); - specificity += localWeight * result.specificity(); - phi += localWeight * result.phiCoefficient(); - phiMax += localWeight * result.phiCoefficientMax(); - phiOverPhiMax += localWeight * result.phiOverPhiMax(); - - truePositives += result.truePositives().size(); - falseNegatives += result.falseNegatives().size(); - falsePositives += result.falsePositives().size(); - trueNegatives += result.trueNegatives(); + var averages = getAverages(results); + if (averages == null) + return null; - } + var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.WEIGHTED_AVERAGE).findFirst().orElseThrow(); + return evaluationResults(macroAverage); + } - precision = precision / weight; - recall = recall / weight; - f1 = f1 / weight; - accuracy = accuracy / weight; - specificity = specificity / weight; + public static EvaluationResults calculateMicroAverageResults(ImmutableList> results) { + var averages = getAverages(results); + if (averages == null) + return null; - if (truePositives > 0) { - phi = EvaluationMetrics.calculatePhiCoefficient(truePositives, falsePositives, falseNegatives, trueNegatives); - phiMax = EvaluationMetrics.calculatePhiCoefficientMax(truePositives, falsePositives, falseNegatives, trueNegatives); - phiOverPhiMax = EvaluationMetrics.calculatePhiOverPhiMax(truePositives, falsePositives, falseNegatives, trueNegatives); + var microAverage = averages.stream().filter(it -> it.getType() == AggregationType.MICRO_AVERAGE).findFirst().orElseThrow(); + return evaluationResults(microAverage); + } - return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, phi, - specificity, phiMax, phiOverPhiMax); + private static EvaluationResults evaluationResults(AggregatedClassificationResult average) { + var weightedAverageAsSingle = new SingleClassificationResult(Sets.mutable.empty(), Sets.mutable.empty(), Sets.mutable.empty(), null, average + .getPrecision(), average.getRecall(), average.getF1(), average.getAccuracy(), average.getSpecificity(), average.getPhiCoefficient(), average + .getPhiCoefficientMax(), average.getPhiOverPhiMax()); + + return new EvaluationResults<>(weightedAverageAsSingle); + } + + private static List getAverages(ImmutableList> results) { + if (results.isEmpty()) { + throw new IllegalArgumentException("No results to calculate average from"); } - phi = phi / weight; - phiMax /= weight; - phiOverPhiMax /= weight; - return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, phi, - specificity, phiMax, phiOverPhiMax); + var calculator = ClassificationMetricsCalculator.getInstance(); + var classifications = results.stream().map(EvaluationResults::classificationResult).toList(); + return calculator.calculateAverages(classifications, null); } + } diff --git a/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java b/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java deleted file mode 100644 index bd522c8..0000000 --- a/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java +++ /dev/null @@ -1,87 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.core.tests.eval; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -class EvaluationMetricsTest { - - @Test - void calculatePrecisionTest() { - Assertions.assertAll( // - () -> Assertions.assertEquals(.5, EvaluationMetrics.calculatePrecision(10, 10), 1e-3), // - () -> Assertions.assertEquals(.857, EvaluationMetrics.calculatePrecision(6, 1), 1e-3), // - () -> Assertions.assertEquals(.154, EvaluationMetrics.calculatePrecision(10, 55), 1e-3), // - () -> Assertions.assertEquals(.905, EvaluationMetrics.calculatePrecision(210, 22), 1e-3) // - ); - } - - @Test - void calculateRecallTest() { - Assertions.assertAll( // - () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateRecall(10, 10), 1e-3), // - () -> Assertions.assertEquals(.75, EvaluationMetrics.calculateRecall(6, 2), 1e-3), // - () -> Assertions.assertEquals(.154, EvaluationMetrics.calculateRecall(10, 55), 1e-3), // - () -> Assertions.assertEquals(.871, EvaluationMetrics.calculateRecall(210, 31), 1e-3) // - ); - } - - @Test - void calculateF1FromPrecisionRecallTest() { - Assertions.assertAll( // - () -> Assertions.assertEquals(1.0, EvaluationMetrics.calculateF1(1., 1.), 1e-2), // - () -> Assertions.assertEquals(0.0, EvaluationMetrics.calculateF1(0., 1.), 1e-2), // - () -> Assertions.assertEquals(0.0, EvaluationMetrics.calculateF1(1., 0.), 1e-2), // - () -> Assertions.assertEquals(0.18, EvaluationMetrics.calculateF1(.9, .1), 1e-2), // - () -> Assertions.assertEquals(0.48, EvaluationMetrics.calculateF1(.6, .4), 1e-2), // - () -> Assertions.assertEquals(0.42, EvaluationMetrics.calculateF1(.3, .7), 1e-2), // - () -> Assertions.assertEquals(0.9, EvaluationMetrics.calculateF1(.9, .9), 1e-2), // - () -> Assertions.assertEquals(0.48, EvaluationMetrics.calculateF1(.4, .6), 1e-2) // - ); - } - - @Test - void calculateF1Test() { - Assertions.assertAll( // - () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateF1(10, 10, 10), 1e-3), // - () -> Assertions.assertEquals(.8, EvaluationMetrics.calculateF1(6, 1, 2), 1e-3), // - () -> Assertions.assertEquals(.154, EvaluationMetrics.calculateF1(10, 55, 55), 1e-3), // - () -> Assertions.assertEquals(.888, EvaluationMetrics.calculateF1(210, 22, 31), 1e-3) // - ); - } - - @Test - void calculateAccuracyTest() { - Assertions.assertAll( // - () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateAccuracy(10, 10, 10, 10), 1e-3), // - () -> Assertions.assertEquals(.75, EvaluationMetrics.calculateAccuracy(6, 1, 2, 3), 1e-3), // - () -> Assertions.assertEquals(.214, EvaluationMetrics.calculateAccuracy(10, 55, 55, 20), 1e-3), // - () -> Assertions.assertEquals(.967, EvaluationMetrics.calculateAccuracy(210, 22, 31, 1337), 1e-3) // - ); - } - - @Test - void calculatePhiCoefficientTest() { - Assertions.assertAll( // - () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(10, 10, 10, 10), 1e-3), // - () -> Assertions.assertEquals(.478, EvaluationMetrics.calculatePhiCoefficient(6, 1, 2, 3), 1e-3), // - () -> Assertions.assertEquals(-.579, EvaluationMetrics.calculatePhiCoefficient(10, 55, 55, 20), 1e-3), // - () -> Assertions.assertEquals(.869, EvaluationMetrics.calculatePhiCoefficient(210, 22, 31, 1337), 1e-3), // - () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(0, 0, 11, 11), 1e-3), // - () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(11, 0, 11, 0), 1e-3) // - ); - } - - @Test - void calculateSpecificityTest() { - Assertions.assertAll( // - () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateSpecificity(1, 1), 1e-3), // - () -> Assertions.assertEquals(.76, EvaluationMetrics.calculateSpecificity(1337, 420), 1e-3), // - () -> Assertions.assertEquals(.0, EvaluationMetrics.calculateSpecificity(0, 20), 1e-3), // - () -> Assertions.assertEquals(1., EvaluationMetrics.calculateSpecificity(20, 0), 1e-3), // - () -> Assertions.assertEquals(1., EvaluationMetrics.calculateSpecificity(0, 0), 1e-3), // - () -> Assertions.assertEquals(.375, EvaluationMetrics.calculateSpecificity(3, 5), 1e-3) // - ); - } - -} diff --git a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java index 3295007..c3f57b8 100644 --- a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java +++ b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java @@ -2,10 +2,8 @@ package edu.kit.kastel.mcse.ardoco.id.tests.integration; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; @@ -23,7 +21,6 @@ import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.MethodOrderer; import org.junit.jupiter.api.Order; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestMethodOrder; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import org.junit.jupiter.params.ParameterizedTest; @@ -31,7 +28,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import edu.kit.kastel.mcse.ardoco.core.api.inconsistency.InconsistentSentence; import edu.kit.kastel.mcse.ardoco.core.api.inconsistency.ModelInconsistency; import edu.kit.kastel.mcse.ardoco.core.api.models.ModelElement; import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel; @@ -43,7 +39,6 @@ import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil; import edu.kit.kastel.mcse.ardoco.id.tests.integration.inconsistencyhelper.HoldBackRunResultsProducer; import edu.kit.kastel.mcse.ardoco.id.types.MissingModelInstanceInconsistency; @@ -57,30 +52,13 @@ * are the spots of inconsistency then. We run this multiple times so each element was held back once. */ @TestMethodOrder(MethodOrderer.OrderAnnotation.class) -public class InconsistencyDetectionEvaluationIT { +class InconsistencyDetectionEvaluationIT { private static final Logger logger = LoggerFactory.getLogger(InconsistencyDetectionEvaluationIT.class); private static final String OUTPUT = "target/testout"; public static final String DIRECTORY_NAME = "ardoco_eval_id"; - /** - * missing models in model - */ - private static final MutableList> OVERALL_MME_RESULTS = Lists.mutable.empty(); // - private static final MutableList> OVERALL_MME_RESULTS_BASELINE = Lists.mutable.empty(); - - /** - * undocumented models - */ - private static final MutableList> OVERALL_UME_RESULTS = Lists.mutable.empty(); - - private static final Map, ExpectedResults>> MME_RESULTS = new LinkedHashMap<>(); - private static final Map> MME_RESULTS_BASELINE = new LinkedHashMap<>(); - private static final Map> UME_RESULTS = new LinkedHashMap<>(); - private static final String LINE_SEPARATOR = System.lineSeparator(); - private static boolean ranBaseline = false; - private static final Map> inconsistentSentencesPerProject = new LinkedHashMap<>(); private static final Map arDoCoResults = new LinkedHashMap<>(); /** @@ -94,18 +72,9 @@ public class InconsistencyDetectionEvaluationIT { */ @DisplayName("Evaluating MME-Inconsistency Detection") @ParameterizedTest(name = "Evaluating MME-Inconsistency for {0}") - @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$") + @EnumSource(Project.class) @Order(1) - protected void missingModelElementInconsistencyIT(GoldStandardProject goldStandardProject) { - runMissingModelElementInconsistencyEval(goldStandardProject, goldStandardProject.getExpectedInconsistencyResults()); - } - - @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*") - @DisplayName("Evaluating MME-Inconsistency Detection (Historic)") - @ParameterizedTest(name = "Evaluating MME-Inconsistency for {0}") - @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$") - @Order(2) - protected void missingModelElementInconsistencyHistoricIT(GoldStandardProject goldStandardProject) { + void missingModelElementInconsistencyIT(GoldStandardProject goldStandardProject) { runMissingModelElementInconsistencyEval(goldStandardProject, goldStandardProject.getExpectedInconsistencyResults()); } @@ -115,11 +84,8 @@ protected void runMissingModelElementInconsistencyEval(GoldStandardProject goldS var results = calculateEvaluationResults(goldStandardProject, runs); - OVERALL_MME_RESULTS.addAll(results); - - EvaluationResults weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable()); + EvaluationResults weightedResults = ResultCalculatorUtil.calculateMicroAverageResults(results.toImmutable()); - MME_RESULTS.put(goldStandardProject, Tuples.pair(weightedResults, expectedInconsistencyResults)); logResultsMissingModelInconsistency(goldStandardProject, weightedResults, expectedInconsistencyResults); checkResults(weightedResults, expectedInconsistencyResults); @@ -135,24 +101,14 @@ protected void runMissingModelElementInconsistencyEval(GoldStandardProject goldS @EnabledIfEnvironmentVariable(named = "testBaseline", matches = ".*") @DisplayName("Evaluating MME-Inconsistency Detection Baseline") @ParameterizedTest(name = "Evaluating Baseline for {0}") - @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$") + @EnumSource(Project.class) @Order(5) - protected void missingModelElementInconsistencyBaselineIT(GoldStandardProject goldStandardProject) { - runMissingModelElementInconsistencyBaselineEval(goldStandardProject); - } - - @EnabledIfEnvironmentVariable(named = "testBaseline", matches = ".*") - @DisplayName("Evaluating MME-Inconsistency Detection Baseline (Historical)") - @ParameterizedTest(name = "Evaluating Baseline for {0}") - @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$") - @Order(6) - protected void missingModelElementInconsistencyBaselineHistoricIT(GoldStandardProject goldStandardProject) { + void missingModelElementInconsistencyBaselineIT(GoldStandardProject goldStandardProject) { runMissingModelElementInconsistencyBaselineEval(goldStandardProject); } protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProject goldStandardProject) { logger.info("Start evaluation of MME-inconsistency baseline for {}", goldStandardProject.getProjectName()); - ranBaseline = true; HoldBackRunResultsProducer holdBackRunResultsProducer = new HoldBackRunResultsProducer(); Map runs = holdBackRunResultsProducer.produceHoldBackRunResults(goldStandardProject, true); @@ -160,10 +116,8 @@ protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProje Assertions.assertTrue(runs != null && !runs.isEmpty()); var results = calculateEvaluationResults(goldStandardProject, runs); - OVERALL_MME_RESULTS_BASELINE.addAll(results); var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable()); - MME_RESULTS_BASELINE.put(goldStandardProject, weightedResults); if (logger.isInfoEnabled()) { String name = goldStandardProject.getProjectName() + " missing model inconsistency"; @@ -178,18 +132,9 @@ protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProje */ @DisplayName("Evaluate Inconsistency Analyses For MissingTextForModelElementInconsistencies") @ParameterizedTest(name = "Evaluating UME-inconsistency for {0}") - @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$") + @EnumSource(Project.class) @Order(10) - protected void missingTextInconsistencyIT(GoldStandardProject goldStandardProject) { - runMissingTextInconsistencyEval(goldStandardProject); - } - - @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*") - @DisplayName("Evaluate Inconsistency Analyses For MissingTextForModelElementInconsistencies " + "(Historical)") - @ParameterizedTest(name = "Evaluating UME-inconsistency for {0}") - @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$") - @Order(11) - protected void missingTextInconsistencyHistoricIT(GoldStandardProject goldStandardProject) { + void missingTextInconsistencyIT(GoldStandardProject goldStandardProject) { runMissingTextInconsistencyEval(goldStandardProject); } @@ -205,9 +150,6 @@ private void runMissingTextInconsistencyEval(GoldStandardProject goldStandardPro var inconsistentModelElements = projectResults.getAllModelInconsistencies().collect(ModelInconsistency::getModelInstanceUid).toList(); var results = TestUtil.compareInconsistencies(projectResults, inconsistentModelElements.toImmutable(), expectedInconsistentModelElements.toImmutable()); - OVERALL_UME_RESULTS.add(results); - UME_RESULTS.put(goldStandardProject, results); - String name = goldStandardProject.getProjectName() + " missing text inconsistency"; TestUtil.logExplicitResults(logger, name, results); writeOutResults(goldStandardProject, results); @@ -228,77 +170,6 @@ protected HoldBackRunResultsProducer getHoldBackRunResultsProducer() { return new HoldBackRunResultsProducer(); } - @EnabledIfEnvironmentVariable(named = "overallResults", matches = ".*") - @Test - @Order(999) - void overAllResultsIT() { - var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_MME_RESULTS.toImmutable()); - var macroResults = ResultCalculatorUtil.calculateAverageResults(OVERALL_MME_RESULTS.toImmutable()); - - Assertions.assertNotNull(weightedResults); - Assertions.assertNotNull(macroResults); - - var weightedUMEResults = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_UME_RESULTS.toImmutable()); - var macroUMEResults = ResultCalculatorUtil.calculateAverageResults(OVERALL_UME_RESULTS.toImmutable()); - - Assertions.assertNotNull(weightedUMEResults); - Assertions.assertNotNull(macroUMEResults); - - if (logger.isInfoEnabled()) { - var mmeBaselineMacro = ResultCalculatorUtil.calculateAverageResults(OVERALL_MME_RESULTS_BASELINE.toImmutable()); - var mmeBaselineWeighted = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_MME_RESULTS_BASELINE.toImmutable()); - ; - - var mmeOverallWeightedName = "MME Overall Weighted"; - TestUtil.logResults(logger, mmeOverallWeightedName, weightedResults); - - var mmeOverallMacroName = "MME Overall Macro"; - TestUtil.logResults(logger, mmeOverallMacroName, macroResults); - - var mmeBaselineOverallWeightedName = "MME BASELINE Overall Weighted"; - var mmeBaselineOverallMacroName = "MME BASELINE Overall Macro"; - if (ranBaseline) { - TestUtil.logResults(logger, mmeBaselineOverallWeightedName, mmeBaselineWeighted); - TestUtil.logResults(logger, mmeBaselineOverallMacroName, mmeBaselineMacro); - } - - var umeOverallWeightedName = "Undoc. Model Element Overall Weighted"; - TestUtil.logResults(logger, umeOverallWeightedName, weightedUMEResults); - var umeOverallMacroName = "Undoc. Model Element Overall Macro"; - TestUtil.logResults(logger, umeOverallMacroName, macroUMEResults); - - logger.info("MME"); - for (var entry : MME_RESULTS.entrySet()) { - TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue().getOne()); - } - TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", macroResults); - TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", weightedResults); - - if (ranBaseline) { - logger.info("MME Baseline"); - for (var entry : MME_RESULTS_BASELINE.entrySet()) { - TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue()); - } - TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", mmeBaselineMacro); - TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", mmeBaselineWeighted); - } - - logger.info("UME"); - for (var entry : UME_RESULTS.entrySet()) { - TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue()); - } - TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", macroUMEResults); - TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", weightedUMEResults); - } - - try { - writeOutput(weightedResults, macroResults); - writeOverallOutputMissingTextInconsistency(weightedUMEResults, macroUMEResults); - } catch (IOException e) { - logger.error(e.getMessage(), e.getCause()); - } - } - private MutableList> calculateEvaluationResults(GoldStandardProject goldStandardProject, Map runs) { Map> results = Maps.mutable.empty(); @@ -310,15 +181,14 @@ private MutableList> calculateEvaluationResults(GoldSt if (runEvalResults != null) { results.put(modelInstance, runEvalResults); } else { - // for the base case, instead of calculating results, save the found inconsistencies. - inconsistentSentencesPerProject.put(goldStandardProject, arDoCoResult.getInconsistentSentences()); + logger.error("Evaluation results for {} are null.", modelInstance); } } return Lists.mutable.ofAll(results.values()); } private EvaluationResults evaluateRun(GoldStandardProject goldStandardProject, ModelElement removedElement, ArDoCoResult arDoCoResult) { - var modelId = arDoCoResult.getModelIds().get(0); + var modelId = arDoCoResult.getModelIds().getFirst(); ImmutableList inconsistencies = arDoCoResult.getInconsistenciesOfTypeForModel(modelId, MissingModelInstanceInconsistency.class); @@ -436,49 +306,6 @@ private static Pair createOutput(GoldStandardProje return Tuples.pair(outputBuilder, detailedOutputBuilder); } - private static void writeOutput(EvaluationResults weightedResults, EvaluationResults macroResults) throws IOException { - var evalDir = Path.of(OUTPUT).resolve(DIRECTORY_NAME); - Files.createDirectories(evalDir); - var outputFile = evalDir.resolve("base_results.md"); - - var outputBuilder = new StringBuilder("# Inconsistency Detection").append(LINE_SEPARATOR); - - var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults); - outputBuilder.append(resultString).append(LINE_SEPARATOR); - resultString = TestUtil.createResultLogString("Overall Macro", macroResults); - outputBuilder.append(resultString).append(LINE_SEPARATOR); - outputBuilder.append(LINE_SEPARATOR); - - for (var entry : inconsistentSentencesPerProject.entrySet()) { - var goldStandardProject = entry.getKey(); - outputBuilder.append("## ").append(goldStandardProject.getProjectName()); - outputBuilder.append(LINE_SEPARATOR); - var inconsistentSentences = entry.getValue(); - for (var inconsistentSentence : inconsistentSentences) { - outputBuilder.append(inconsistentSentence.getInfoString()); - outputBuilder.append(LINE_SEPARATOR); - } - } - - Files.writeString(outputFile, outputBuilder.toString(), StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } - - //FIXME Something is wrong with this. - private static void writeOverallOutputMissingTextInconsistency(EvaluationResults weightedResults, EvaluationResults macroResults) - throws IOException { - var evalDir = Path.of(OUTPUT).resolve(DIRECTORY_NAME); - Files.createDirectories(evalDir); - var outputFile = evalDir.resolve("_MissingTextInconsistency_Overall_Results.md"); - - var outputBuilder = new StringBuilder("# Inconsistency Detection - Missing Text For Model " + "Element").append(LINE_SEPARATOR); - - var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults); - outputBuilder.append(resultString).append(LINE_SEPARATOR); - resultString = TestUtil.createResultLogString("Overall Macro", macroResults); - outputBuilder.append(resultString).append(LINE_SEPARATOR); - outputBuilder.append(LINE_SEPARATOR); - } - private static String getOverallResultsString(MutableList> results) { StringBuilder outputBuilder = new StringBuilder(); outputBuilder.append("###").append(LINE_SEPARATOR); @@ -529,18 +356,15 @@ private static Pair>, StringBuilder> inspe private static void inspectRun(StringBuilder outputBuilder, StringBuilder detailedOutputBuilder, MutableList> allResults, ArDoCoResult arDoCoResult, EvaluationResults result) { - var truePositives = result.truePositives().toList(); + var truePositives = result.truePositives(); appendResults(truePositives, detailedOutputBuilder, "True Positives", arDoCoResult, outputBuilder); - var falsePositives = result.falsePositives().toList(); + var falsePositives = result.falsePositives(); appendResults(falsePositives, detailedOutputBuilder, "False Positives", arDoCoResult, outputBuilder); - var falseNegatives = result.falseNegatives().toList(); + var falseNegatives = result.falseNegatives(); appendResults(falseNegatives, detailedOutputBuilder, "False Negatives", arDoCoResult, outputBuilder); - - var results = EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositives.toImmutable(), 0, falsePositives.toImmutable(), falseNegatives - .toImmutable())); - allResults.add(results); + allResults.add(result); } private static void appendResults(List resultList, StringBuilder detailedOutputBuilder, String type, ArDoCoResult arDoCoResult, @@ -588,7 +412,7 @@ private static String listToString(List truePositives) { } private static ImmutableList getInitialInconsistencies(ArDoCoResult arDoCoResult) { - var id = arDoCoResult.getModelIds().get(0); + var id = arDoCoResult.getModelIds().getFirst(); return arDoCoResult.getInconsistenciesOfTypeForModel(id, MissingModelInstanceInconsistency.class); } } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java index a18a52b..6af59c3 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java @@ -1,63 +1,36 @@ /* Licensed under MIT 2021-2024. */ package edu.kit.kastel.mcse.ardoco.tlr.tests.integration; -import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.DATA_MAP; import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.OUTPUT; -import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.PROJECT_RESULTS; -import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.RESULTS; import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.stream.Stream; -import org.eclipse.collections.api.collection.ImmutableCollection; import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.ImmutableList; -import org.eclipse.collections.api.list.MutableList; -import org.eclipse.collections.impl.tuple.Tuples; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.MethodOrderer; import org.junit.jupiter.api.TestMethodOrder; -import edu.kit.kastel.mcse.ardoco.core.api.PreprocessingData; import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType; -import edu.kit.kastel.mcse.ardoco.core.api.models.ModelInstance; -import edu.kit.kastel.mcse.ardoco.core.api.models.ModelStates; import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; -import edu.kit.kastel.mcse.ardoco.core.common.util.FilePrinter; import edu.kit.kastel.mcse.ardoco.core.common.util.TraceLinkUtilities; -import edu.kit.kastel.mcse.ardoco.core.data.DataRepository; import edu.kit.kastel.mcse.ardoco.core.execution.ConfigurationHelper; import edu.kit.kastel.mcse.ardoco.core.execution.runner.ArDoCoRunner; -import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil; import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults; import edu.kit.kastel.mcse.ardoco.tlr.execution.ArDoCoForSadSamTraceabilityLinkRecovery; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TLRUtil; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLGoldStandardFile; /** * Integration test that evaluates the traceability link recovery capabilities of ArDoCo. */ @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class SadSamTraceabilityLinkRecoveryEvaluation extends TraceabilityLinkRecoveryEvaluation { + @Override protected boolean resultHasRequiredData(ArDoCoResult arDoCoResult) { var traceLinks = arDoCoResult.getAllTraceLinks(); return !traceLinks.isEmpty(); } - @Override - protected ArDoCoResult runTraceLinkEvaluation(T project) { - var result = super.runTraceLinkEvaluation(project); - DATA_MAP.put(project, result); - return result; - } - @Override protected ArDoCoRunner getAndSetupRunner(T project) { var additionalConfigsMap = ConfigurationHelper.loadAdditionalConfigs(project.getAdditionalConfigurationsFile()); @@ -104,27 +77,6 @@ protected int getConfusionMatrixSum(ArDoCoResult arDoCoResult) { return sentences * modelElements; } - @Override - protected EvaluationResults calculateEvaluationResults(ArDoCoResult arDoCoResult, ImmutableCollection goldStandard) { - var results = super.calculateEvaluationResults(arDoCoResult, goldStandard); - PROJECT_RESULTS.add(results); - return results; - } - - public ArDoCoResult getArDoCoResult(T project) { - String name = project.getProjectName(); - var inputModel = project.getModelFile(); - var inputText = project.getTextFile(); - - var arDoCoResult = DATA_MAP.get(project); - if (arDoCoResult == null) { - File additionalConfigurations = project.getAdditionalConfigurationsFile(); - arDoCoResult = getArDoCoResult(name, inputText, inputModel, ArchitectureModelType.PCM, additionalConfigurations); - DATA_MAP.put(project, arDoCoResult); - } - return arDoCoResult; - } - protected ArDoCoResult getArDoCoResult(String name, File inputText, File inputModel, ArchitectureModelType architectureModelType, File additionalConfigurations) { var additionalConfigsMap = ConfigurationHelper.loadAdditionalConfigs(additionalConfigurations); @@ -134,132 +86,4 @@ protected ArDoCoResult getArDoCoResult(String name, File inputText, File inputMo runner.setUp(inputText, inputModel, architectureModelType, additionalConfigsMap, outputDir); return runner.run(); } - - /** - * calculate {@link EvaluationResults} and compare to {@link ExpectedResults} - * - * @param project the result's project - * @param arDoCoResult the result - */ - public static void checkResults(GoldStandardProject project, ArDoCoResult arDoCoResult) { - - var modelIds = arDoCoResult.getModelIds(); - var modelId = modelIds.stream().findFirst().orElseThrow(); - - var goldStandard = project.getTlrGoldStandard(); - EvaluationResults results = calculateResults(goldStandard, arDoCoResult, modelId); - - ExpectedResults expectedResults = project.getExpectedTraceLinkResults(); - - logAndSaveProjectResult(project, arDoCoResult, results, expectedResults); - - compareResultWithExpected(results, expectedResults); - - } - - private static void logAndSaveProjectResult(GoldStandardProject project, ArDoCoResult arDoCoResult, EvaluationResults results, - ExpectedResults expectedResults) { - if (logger.isInfoEnabled()) { - String projectName = project.getProjectName(); - TestUtil.logExtendedResultsWithExpected(logger, SadSamTraceabilityLinkRecoveryEvaluation.class, projectName, results, expectedResults); - - var data = arDoCoResult.dataRepository(); - printDetailedDebug(results, data); - try { - RESULTS.add(Tuples.pair(project, TestUtil.compareTLR(DATA_MAP.get(project), TLRUtil.getTraceLinks(data), TLGoldStandardFile.loadLinks(project) - .toImmutable()))); - DATA_MAP.put(project, arDoCoResult); - PROJECT_RESULTS.add(results); - } catch (IOException e) { - // failing to save project results is irrelevant for test success - logger.warn("Failed to load file for gold standard", e); - } - } - } - - private static void compareResultWithExpected(EvaluationResults results, ExpectedResults expectedResults) { - Assertions.assertAll(// - () -> Assertions.assertTrue(results.precision() >= expectedResults.precision(), "Precision " + results - .precision() + " is below the expected minimum value " + expectedResults.precision()), // - () -> Assertions.assertTrue(results.recall() >= expectedResults.recall(), "Recall " + results - .recall() + " is below the expected minimum value " + expectedResults.recall()), // - () -> Assertions.assertTrue(results.f1() >= expectedResults.f1(), "F1 " + results - .f1() + " is below the expected minimum value " + expectedResults.f1())); - Assertions.assertAll(// - () -> Assertions.assertTrue(results.accuracy() >= expectedResults.accuracy(), "Accuracy " + results - .accuracy() + " is below the expected minimum value " + expectedResults.accuracy()), // - () -> Assertions.assertTrue(results.phiCoefficient() >= expectedResults.phiCoefficient(), "Phi coefficient " + results - .phiCoefficient() + " is below the expected minimum value " + expectedResults.phiCoefficient())); - } - - public static void writeDetailedOutput(GoldStandardProject project, ArDoCoResult arDoCoResult) { - String name = project.getProjectName(); - var path = Path.of(OUTPUT).resolve(name); - try { - Files.createDirectories(path); - } catch (IOException e) { - logger.warn("Could not create directories.", e); - } - FilePrinter.printResultsInFiles(path, name, arDoCoResult); - } - - private static EvaluationResults calculateResults(ImmutableList goldStandard, ArDoCoResult arDoCoResult, String modelId) { - var traceLinks = arDoCoResult.getTraceLinksForModelAsStrings(modelId); - logger.info("Found {} trace links", traceLinks.size()); - - return TestUtil.compareTLR(arDoCoResult, traceLinks, goldStandard); - } - - private static void printDetailedDebug(EvaluationResults results, DataRepository data) { - var falseNegatives = results.falseNegatives().stream().map(Object::toString); - var falsePositives = results.falsePositives().stream().map(Object::toString); - - var sentences = data.getData(PreprocessingData.ID, PreprocessingData.class).orElseThrow().getText().getSentences(); - var modelStates = data.getData(ModelStates.ID, ModelStates.class).orElseThrow(); - - for (String modelId : modelStates.modelIds()) { - var instances = modelStates.getModelExtractionState(modelId).getInstances(); - - var falseNegativeOutput = createOutputStrings(falseNegatives, sentences, instances); - var falsePositivesOutput = createOutputStrings(falsePositives, sentences, instances); - - logger.debug("Model: \n{}", modelId); - if (!falseNegativeOutput.isEmpty()) { - logger.debug("False negatives:\n{}", String.join("\n", falseNegativeOutput)); - } - if (!falsePositivesOutput.isEmpty()) { - logger.debug("False positives:\n{}", String.join("\n", falsePositivesOutput)); - } - } - - } - - private static MutableList createOutputStrings(Stream traceLinkStrings, ImmutableList sentences, - ImmutableList instances) { - var outputList = Lists.mutable.empty(); - for (var traceLinkString : traceLinkStrings.toList()) { - var parts = traceLinkString.split(",", -1); - if (parts.length < 2) { - continue; - } - var id = parts[0]; - - var modelElement = instances.detect(instance -> instance.getUid().equals(id)); - - var sentence = parts[1]; - - var sentenceNo = -1; - try { - sentenceNo = Integer.parseInt(sentence); - } catch (NumberFormatException e) { - logger.debug("Having problems retrieving sentence, so skipping line: {}", traceLinkString); - continue; - } - var sentenceText = sentences.get(sentenceNo - 1); - - outputList.add(String.format("%-20s - %s (%s)", modelElement.getFullName(), sentenceText.getText(), traceLinkString)); - } - return outputList; - } - } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java index 07093be..ae136f1 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java @@ -51,8 +51,7 @@ protected ImmutableList createTraceLinkStringList(ArDoCoResult arDoCoRes @Override protected ImmutableList getGoldStandard(CodeProject codeProject) { - ImmutableList samCodeGoldStandard = codeProject.getSamCodeGoldStandard(); - return samCodeGoldStandard; + return codeProject.getSamCodeGoldStandard(); } @Override diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java index 244e246..25c165b 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java @@ -3,63 +3,30 @@ import static edu.kit.kastel.mcse.ardoco.core.tests.eval.ProjectHelper.ANALYZE_CODE_DIRECTLY; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedHashMap; +import java.util.Arrays; import java.util.List; -import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Predicate; -import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.list.MutableList; -import org.eclipse.collections.api.tuple.Pair; import org.junit.jupiter.api.*; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.MethodSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType; -import edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.SadSamTraceLink; import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; import edu.kit.kastel.mcse.ardoco.core.common.RepositoryHandler; -import edu.kit.kastel.mcse.ardoco.core.common.util.DataRepositoryHelper; -import edu.kit.kastel.mcse.ardoco.core.execution.ArDoCo; -import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil; import edu.kit.kastel.mcse.ardoco.core.tests.eval.CodeProject; import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLDiffFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLLogFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLModelFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLPreviousFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSentenceFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSummaryFile; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) -public class TraceLinkEvaluationIT { - - protected static final Logger logger = LoggerFactory.getLogger(TraceLinkEvaluationIT.class); +class TraceLinkEvaluationIT { protected static final String OUTPUT = "target/testout-tlr-it"; protected static final String LOGGING_ARDOCO_CORE = "org.slf4j.simpleLogger.log.edu.kit.kastel.mcse.ardoco.core"; protected static AtomicBoolean analyzeCodeDirectly = ANALYZE_CODE_DIRECTLY; - protected static final List>> RESULTS = new ArrayList<>(); - protected static final MutableList> PROJECT_RESULTS = Lists.mutable.empty(); - protected static final Map DATA_MAP = new LinkedHashMap<>(); - @BeforeAll static void beforeAll() { System.setProperty(LOGGING_ARDOCO_CORE, "info"); @@ -67,9 +34,6 @@ static void beforeAll() { @AfterAll static void afterAll() { - logOverallResultsForSadSamTlr(); - writeOutputForSadSamTlr(); - System.setProperty(LOGGING_ARDOCO_CORE, "error"); } @@ -77,66 +41,12 @@ private static void cleanUpCodeRepository(CodeProject codeProject) { RepositoryHandler.removeRepository(codeProject.getCodeLocation()); } - private static void logOverallResultsForSadSamTlr() { - if (logger.isInfoEnabled()) { - var name = "Overall Weighted"; - var results = ResultCalculatorUtil.calculateWeightedAverageResults(PROJECT_RESULTS.toImmutable()); - TestUtil.logResults(logger, name, results); - - name = "Overall Macro"; - results = ResultCalculatorUtil.calculateAverageResults(PROJECT_RESULTS.toImmutable()); - TestUtil.logResults(logger, name, results); - } - } - - private static void writeOutputForSadSamTlr() { - var evalDir = Path.of(OUTPUT).resolve("ardoco_eval_tl"); - try { - Files.createDirectories(evalDir); - - TLSummaryFile.save(evalDir.resolve("summary.txt"), RESULTS, DATA_MAP); - TLModelFile.save(evalDir.resolve("models.txt"), DATA_MAP); - TLSentenceFile.save(evalDir.resolve("sentences.txt"), DATA_MAP); - TLLogFile.append(evalDir.resolve("log.txt"), RESULTS); - TLPreviousFile.save(evalDir.resolve("previous.csv"), RESULTS, logger); // save before loading - TLDiffFile.save(evalDir.resolve("diff.txt"), RESULTS, TLPreviousFile.load(evalDir.resolve("previous.csv"), DATA_MAP), DATA_MAP); - } catch (IOException e) { - logger.error("Failed to write output.", e); - } - } - - private static List getHistoricalProjects() { - return filterForHistoricalProjects(List.of(Project.values())); - } - - private static List getNonHistoricalCodeProjects() { - return filterForNonHistoricalProjects(List.of(CodeProject.values())); - } - - private static > List filterForHistoricalProjects(Collection unfilteredProjects) { - return filterForProjects(unfilteredProjects, p -> p.name().endsWith("HISTORICAL")); - } - - private static > List filterForNonHistoricalProjects(Collection unfilteredProjects) { - return filterForProjects(unfilteredProjects, p -> !p.name().endsWith("HISTORICAL")); - } - - private static > List filterForProjects(Collection unfilteredProjects, Predicate filter) { - List projects = new ArrayList<>(); - for (var project : unfilteredProjects) { - if (filter.test(project)) { - projects.add(project); - } - } - return projects; - } - @EnabledIfEnvironmentVariable(named = "testCodeFull", matches = ".*") @DisplayName("Evaluate SAD-SAM-Code TLR (Full)") @ParameterizedTest(name = "{0}") - @MethodSource("getNonHistoricalCodeProjects") + @EnumSource(CodeProject.class) @Order(1) - protected void evaluateSadSamCodeTlrFullIT(CodeProject project) { + void evaluateSadSamCodeTlrFullIT(CodeProject project) { analyzeCodeDirectly.set(true); if (analyzeCodeDirectly.get()) cleanUpCodeRepository(project); @@ -149,9 +59,9 @@ protected void evaluateSadSamCodeTlrFullIT(CodeProject project) { @EnabledIfEnvironmentVariable(named = "testCodeFull", matches = ".*") @DisplayName("Evaluate SAM-Code TLR (Full)") @ParameterizedTest(name = "{0}") - @EnumSource(value = CodeProject.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$") + @EnumSource(value = CodeProject.class) @Order(2) - protected void evaluateSamCodeTlrFullIT(CodeProject project) { + void evaluateSamCodeTlrFullIT(CodeProject project) { analyzeCodeDirectly.set(true); if (analyzeCodeDirectly.get()) cleanUpCodeRepository(project); @@ -163,9 +73,9 @@ protected void evaluateSamCodeTlrFullIT(CodeProject project) { @DisplayName("Evaluate SAD-SAM-Code TLR") @ParameterizedTest(name = "{0}") - @MethodSource("getNonHistoricalCodeProjects") + @EnumSource(CodeProject.class) @Order(9) - protected void evaluateSadSamCodeTlrIT(CodeProject codeProject) { + void evaluateSadSamCodeTlrIT(CodeProject codeProject) { analyzeCodeDirectly.set(false); if (analyzeCodeDirectly.get()) cleanUpCodeRepository(codeProject); @@ -179,9 +89,9 @@ protected void evaluateSadSamCodeTlrIT(CodeProject codeProject) { @DisplayName("Evaluate SAM-Code TLR") @ParameterizedTest(name = "{0}") - @MethodSource("getNonHistoricalCodeProjects") + @EnumSource(CodeProject.class) @Order(10) - protected void evaluateSamCodeTlrIT(CodeProject project) { + void evaluateSamCodeTlrIT(CodeProject project) { analyzeCodeDirectly.set(false); if (analyzeCodeDirectly.get()) cleanUpCodeRepository(project); @@ -193,68 +103,15 @@ protected void evaluateSamCodeTlrIT(CodeProject project) { @DisplayName("Evaluate SAD-SAM TLR") @ParameterizedTest(name = "{0}") - @MethodSource("getNonHistoricalCodeProjects") + @MethodSource("getProjects") @Order(20) - protected void evaluateSadSamTlrIT(T project) { + void evaluateSadSamTlrIT(T project) { var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>(); - var results = evaluation.runTraceLinkEvaluation(project); - Assertions.assertNotNull(results); - } - - @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*") - @DisplayName("Evaluate TLR (Historical)") - @ParameterizedTest(name = "{0}") - @MethodSource("getHistoricalProjects") - @Order(21) - protected void evaluateSadSamTlrHistoricalIT(T project) { - var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>(); - ArDoCoResult arDoCoResult = evaluation.getArDoCoResult(project); + var arDoCoResult = evaluation.runTraceLinkEvaluation(project); Assertions.assertNotNull(arDoCoResult); - - SadSamTraceabilityLinkRecoveryEvaluation.checkResults(project, arDoCoResult); - SadSamTraceabilityLinkRecoveryEvaluation.writeDetailedOutput(project, arDoCoResult); } - /** - * Test if the results from executing ArDoCo with UML are the same as with PCM - * - * @param project the project, provided by the EnumSource - */ - @Disabled("Only enable this for local tests.") - @DisplayName("Compare TLR for UML/PCM") - @ParameterizedTest(name = "{0}") - @EnumSource(value = Project.class) - @Order(29) - protected void compareSadSamTlRForPcmAndUmlIT(Project project) { - String name = project.name(); - var inputText = project.getTextFile(); - - var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>(); - - var ardocoRunForPCM = evaluation.getArDoCoResult(project); - Assertions.assertNotNull(ardocoRunForPCM); - - var arDoCo = ArDoCo.getInstance(name); - var preprocessingData = ardocoRunForPCM.getPreprocessingData(); - DataRepositoryHelper.putPreprocessingData(arDoCo.getDataRepository(), preprocessingData); - - File umlModelFile = project.getModelFile(ArchitectureModelType.UML); - File additionalConfigurations = project.getAdditionalConfigurationsFile(); - var ardocoRunForUML = evaluation.getArDoCoResult(name, inputText, umlModelFile, ArchitectureModelType.UML, additionalConfigurations); - Assertions.assertNotNull(ardocoRunForUML); - - var pcmTLs = ardocoRunForPCM.getAllTraceLinks() - .toList() - .sortThisBy(SadSamTraceLink::getModelElementUid) - .sortThisByInt(SadSamTraceLink::getSentenceNumber); - var umlTLs = ardocoRunForUML.getAllTraceLinks() - .toList() - .sortThisBy(SadSamTraceLink::getModelElementUid) - .sortThisByInt(SadSamTraceLink::getSentenceNumber); - - Assertions.assertAll( // - () -> Assertions.assertEquals(pcmTLs.size(), umlTLs.size()), // - () -> Assertions.assertIterableEquals(pcmTLs, umlTLs) // - ); + private static List getProjects() { + return Arrays.asList(Project.values()); } } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java index 6944b61..522c963 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java @@ -3,51 +3,29 @@ import static edu.kit.kastel.mcse.ardoco.core.tests.eval.ProjectHelper.ANALYZE_CODE_DIRECTLY; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; -import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.function.Predicate; -import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.list.MutableList; import org.eclipse.collections.api.tuple.Pair; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.junit.jupiter.params.provider.EnumSource; import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil; import edu.kit.kastel.mcse.ardoco.core.tests.eval.CodeProject; import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLDiffFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLLogFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLModelFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLPreviousFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSentenceFile; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSummaryFile; +import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.ModelElementSentenceLink; class TraceLinkEvaluationSadCodeDirectIT { - protected static final Logger logger = LoggerFactory.getLogger(TraceLinkEvaluationIT.class); - - protected static final String OUTPUT = "target/testout"; - protected static final String LOGGING_ARDOCO_CORE = "org.slf4j.simpleLogger.log.edu.kit.kastel.mcse.ardoco.core"; - protected static final List>> RESULTS = new ArrayList<>(); - protected static final MutableList> PROJECT_RESULTS = Lists.mutable.empty(); + protected static final List>> RESULTS = new ArrayList<>(); protected static final Map DATA_MAP = new LinkedHashMap<>(); @BeforeAll @@ -57,64 +35,16 @@ static void beforeAll() { @AfterAll static void afterAll() { - logOverallResultsForSadSamTlr(); - writeOutputForSadSamTlr(); System.setProperty(LOGGING_ARDOCO_CORE, "error"); } @DisplayName("Evaluate SAD-Code TLR") @ParameterizedTest(name = "{0}") - @MethodSource("getNonHistoricalCodeProjects") + @EnumSource(CodeProject.class) void evaluateSadCodeTlrIT(CodeProject project) { ANALYZE_CODE_DIRECTLY.set(false); var evaluation = new SadCodeTraceabilityLinkRecoveryEvaluation(); ArDoCoResult results = evaluation.runTraceLinkEvaluation(project); Assertions.assertNotNull(results); } - - private static List getNonHistoricalCodeProjects() { - return filterForNonHistoricalProjects(List.of(CodeProject.values())); - } - - private static > List filterForNonHistoricalProjects(Collection unfilteredProjects) { - return filterForProjects(unfilteredProjects, p -> !p.name().endsWith("HISTORICAL")); - } - - private static > List filterForProjects(Collection unfilteredProjects, Predicate filter) { - List projects = new ArrayList<>(); - for (var project : unfilteredProjects) { - if (filter.test(project)) { - projects.add(project); - } - } - return projects; - } - - private static void logOverallResultsForSadSamTlr() { - if (logger.isInfoEnabled()) { - var name = "Overall Weighted"; - var results = ResultCalculatorUtil.calculateWeightedAverageResults(PROJECT_RESULTS.toImmutable()); - TestUtil.logResults(logger, name, results); - - name = "Overall Macro"; - results = ResultCalculatorUtil.calculateAverageResults(PROJECT_RESULTS.toImmutable()); - TestUtil.logResults(logger, name, results); - } - } - - private static void writeOutputForSadSamTlr() { - var evalDir = Path.of(OUTPUT).resolve("ardoco_eval_tl"); - try { - Files.createDirectories(evalDir); - - TLSummaryFile.save(evalDir.resolve("summary.txt"), RESULTS, DATA_MAP); - TLModelFile.save(evalDir.resolve("models.txt"), DATA_MAP); - TLSentenceFile.save(evalDir.resolve("sentences.txt"), DATA_MAP); - TLLogFile.append(evalDir.resolve("log.txt"), RESULTS); - TLPreviousFile.save(evalDir.resolve("previous.csv"), RESULTS, logger); // save before loading - TLDiffFile.save(evalDir.resolve("diff.txt"), RESULTS, TLPreviousFile.load(evalDir.resolve("previous.csv"), DATA_MAP), DATA_MAP); - } catch (IOException e) { - logger.error("Failed to write output.", e); - } - } } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java index b9e36f7..81658a0 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java @@ -9,7 +9,6 @@ import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; -import java.util.stream.Collectors; import org.eclipse.collections.api.collection.ImmutableCollection; import org.eclipse.collections.api.factory.Lists; @@ -31,7 +30,7 @@ import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix; +import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator; public abstract class TraceabilityLinkRecoveryEvaluation { protected static final Logger logger = LoggerFactory.getLogger(TraceabilityLinkRecoveryEvaluation.class); @@ -40,9 +39,11 @@ public abstract class TraceabilityLinkRecoveryEvaluation resultMap = new LinkedHashMap<>(); - protected ArDoCoResult runTraceLinkEvaluation(T project) { + protected final ArDoCoResult runTraceLinkEvaluation(T project) { ArDoCoResult result = resultMap.get(project); if (result == null || !resultHasRequiredData(result)) { ArDoCoRunner runner = getAndSetupRunner(project); @@ -160,44 +161,16 @@ protected EvaluationResults calculateEvaluationResults(ArDoCoResult arDo Set distinctTraceLinks = new LinkedHashSet<>(results.castToCollection()); Set distinctGoldStandard = new LinkedHashSet<>(goldStandard.castToCollection()); + int confusionMatrixSum = getConfusionMatrixSum(arDoCoResult); + + var calculator = ClassificationMetricsCalculator.getInstance(); + var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum); + return new EvaluationResults<>(classification); - // True Positives are the trace links that are contained on both lists - Set truePositives = distinctTraceLinks.stream() - .filter(tl -> isTraceLinkContainedInGoldStandard(tl, distinctGoldStandard)) - .collect(Collectors.toSet()); - ImmutableList truePositivesList = Lists.immutable.ofAll(truePositives); - - // False Positives are the trace links that are only contained in the result set - Set falsePositives = distinctTraceLinks.stream() - .filter(tl -> !isTraceLinkContainedInGoldStandard(tl, distinctGoldStandard)) - .collect(Collectors.toSet()); - ImmutableList falsePositivesList = Lists.immutable.ofAll(falsePositives); - - // False Negatives are the trace links that are only contained in the gold standard - Set falseNegatives = distinctGoldStandard.stream() - .filter(gstl -> !isGoldStandardTraceLinkContainedInTraceLinks(gstl, distinctTraceLinks)) - .collect(Collectors.toSet()); - ImmutableList falseNegativesList = Lists.immutable.ofAll(falseNegatives); - - int trueNegatives = getConfusionMatrixSum(arDoCoResult) - truePositives.size() - falsePositives.size() - falseNegatives.size(); - return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList)); } protected abstract ImmutableList createTraceLinkStringList(ArDoCoResult arDoCoResult); protected abstract int getConfusionMatrixSum(ArDoCoResult arDoCoResult); - private static boolean areTraceLinksMatching(String goldStandardTraceLink, String traceLink) { - traceLink = traceLink.strip(); - goldStandardTraceLink = goldStandardTraceLink.strip(); - return (goldStandardTraceLink.equals(traceLink)); - } - - private static boolean isTraceLinkContainedInGoldStandard(String traceLink, Set goldStandard) { - return goldStandard.stream().anyMatch(goldStandardTraceLink -> areTraceLinksMatching(goldStandardTraceLink, traceLink)); - } - - private static boolean isGoldStandardTraceLinkContainedInTraceLinks(String goldStandardTraceLink, Set traceLinks) { - return traceLinks.stream().anyMatch(traceLink -> areTraceLinksMatching(goldStandardTraceLink, traceLink)); - } } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java similarity index 51% rename from tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java rename to tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java index 42bb6c3..04ed6d4 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java @@ -8,15 +8,15 @@ /** * Represents a simple trace link by the id of the model and number of the sentence involved. */ -public record TestLink(String modelId, int sentenceNr) implements Comparable { +public record ModelElementSentenceLink(String modelElementId, int sentenceNumber) implements Comparable { - public TestLink(SadSamTraceLink traceLink) { + public ModelElementSentenceLink(SadSamTraceLink traceLink) { this(traceLink.getModelElementUid(), traceLink.getSentenceNumber()); } @Override - public int compareTo(TestLink o) { - return Comparator.comparing(TestLink::modelId).thenComparing(TestLink::sentenceNr).compare(this, o); + public int compareTo(ModelElementSentenceLink o) { + return Comparator.comparing(ModelElementSentenceLink::modelElementId).thenComparing(ModelElementSentenceLink::sentenceNumber).compare(this, o); } } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java index cf5ef6b..e21bf0d 100644 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java +++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java @@ -28,8 +28,8 @@ private TLRUtil() { * @param data the {@link EvaluationResults} * @return the trace links */ - public static ImmutableList getTraceLinks(DataRepository data) { - var traceLinks = Lists.mutable.empty(); + public static ImmutableList getTraceLinks(DataRepository data) { + var traceLinks = Lists.mutable.empty(); var connectionStates = data.getData(ConnectionStates.ID, ConnectionStates.class).orElseThrow(); var modelStates = data.getData(ModelStates.ID, ModelStates.class).orElseThrow(); @@ -40,7 +40,7 @@ public static ImmutableList getTraceLinks(DataRepository data) { .map(connectionStates::getConnectionState) .toList(); for (var connectionState : connectionStatesList) { - traceLinks.addAll(connectionState.getTraceLinks().stream().map(TestLink::new).toList()); + traceLinks.addAll(connectionState.getTraceLinks().stream().map(ModelElementSentenceLink::new).toList()); } return traceLinks.toImmutable(); } diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java deleted file mode 100644 index 5489385..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java +++ /dev/null @@ -1,140 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.text.DecimalFormat; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; -import java.util.Map; - -import org.eclipse.collections.api.tuple.Pair; - -import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; - -/** - * This is a helper class to write out a diff-file for the evaluation results of TLR. - */ -public class TLDiffFile { - - private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("+##0.00%;-##0.00%"); - private static final String LINE_SEPARATOR = System.lineSeparator(); - - private TLDiffFile() { - throw new IllegalAccessError("This constructor should not be called!"); - } - - /** - * Writes out the differences of new and old results. - * - * @param targetFile file to write into - * @param newProjectResults new results - * @param oldProjectResults old results - * @param dataMap the mapping of Project to ArDoCoResult of the new run - * @throws IOException if writing fails - */ - public static void save(Path targetFile, Collection>> newProjectResults, - Collection>> oldProjectResults, Map dataMap) - throws IOException { - // Assumption: Both collections contain the same projects - - newProjectResults = newProjectResults.stream().sorted(Comparator.comparing(x -> x.getOne().getProjectName())).toList(); - oldProjectResults = oldProjectResults.stream().sorted(Comparator.comparing(x -> x.getOne().getProjectName())).toList(); - - var builder = new StringBuilder(); - - builder.append("Time of evaluation: `").append(CommonUtilities.getCurrentTimeAsString()).append("`"); - builder.append(LINE_SEPARATOR); - - var newResults = newProjectResults.stream().map(Pair::getTwo).toList(); - var oldResults = newProjectResults.stream().map(Pair::getTwo).toList(); - - // Append average differences in precision, recall, f1 - var oldAvgPrecision = oldResults.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN); - var oldAvgRecall = oldResults.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN); - var oldAvgF1 = oldResults.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN); - var newAvgPrecision = newResults.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN); - var newAvgRecall = newResults.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN); - var newAvgF1 = newResults.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN); - - builder.append("Ø "); - builder.append(NUMBER_FORMAT.format(newAvgPrecision - oldAvgPrecision)).append(" Precision, "); - builder.append(NUMBER_FORMAT.format(newAvgRecall - oldAvgRecall)).append(" Recall, "); - builder.append(NUMBER_FORMAT.format(newAvgF1 - oldAvgF1)).append(" F1"); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - // Append project specific details - for (Pair> oldProjectResult : oldProjectResults) { - var project = oldProjectResult.getOne(); - var newResultOptional = newProjectResults.stream().filter(r -> r.getOne().equals(project)).findAny(); - if (newResultOptional.isEmpty()) { - continue; - } - var newResult = newResultOptional.get().getTwo(); - var data = dataMap.get(project); - - builder.append("# ").append(project.getProjectName()); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - var oldResult = oldProjectResult.getTwo(); - builder.append(NUMBER_FORMAT.format(newResult.precision() - oldResult.precision())).append(" Precision, "); - builder.append(NUMBER_FORMAT.format(newResult.recall() - oldResult.recall())).append(" Recall, "); - builder.append(NUMBER_FORMAT.format(newResult.f1() - oldResult.f1())).append(" F1"); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - var newTruePositives = findNewLinks(oldResult.truePositives().castToList(), newResult.truePositives().castToList()); - appendList(builder, "New true positives", newTruePositives, data); - - var newFalsePositives = findNewLinks(oldResult.falsePositives().castToList(), newResult.falsePositives().castToList()); - appendList(builder, "New false positives", newFalsePositives, data); - - var newFalseNegatives = findNewLinks(oldResult.falseNegatives().castToList(), newResult.falseNegatives().castToList()); - appendList(builder, "New false negatives", newFalseNegatives, data); - - var lostFalsePositives = findMissingLinks(oldResult.falsePositives().castToList(), newResult.falsePositives().castToList()); - appendList(builder, "False positives that are now true negatives", lostFalsePositives, data); - - builder.append(LINE_SEPARATOR); - } - - Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } - - private static List findNewLinks(List oldLinks, List newLinks) { - return newLinks.stream().filter(link -> !oldLinks.contains(link)).toList(); - } - - private static List findMissingLinks(List oldLinks, List newLinks) { - return oldLinks.stream().filter(link -> !newLinks.contains(link)).toList(); - } - - private static void appendList(StringBuilder builder, String description, List links, ArDoCoResult arDoCoResult) { - var text = arDoCoResult.getText(); - if (links.isEmpty()) { - return; - } - - builder.append(description).append(":"); - builder.append(LINE_SEPARATOR); - - for (TestLink link : links) { - for (var modelId : arDoCoResult.getModelIds()) { - var dataModel = arDoCoResult.getModelState(modelId); - var line = TLSummaryFile.format(link, text, dataModel); - if (line != null && !line.isBlank()) { - builder.append("- ").append(line).append(LINE_SEPARATOR); - } - } - } - - builder.append(LINE_SEPARATOR); - } - -} diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java deleted file mode 100644 index 626fd7b..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java +++ /dev/null @@ -1,35 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; - -import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.list.MutableList; - -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; - -public class TLGoldStandardFile { - - private TLGoldStandardFile() { - // no instantiation - throw new IllegalAccessError("No instantiation allowed"); - } - - public static MutableList loadLinks(GoldStandardProject goldStandardProject) throws IOException { - Path path = goldStandardProject.getTlrGoldStandardFile().toPath(); - List lines = Files.readAllLines(path); - - return Lists.mutable.ofAll(lines.stream() - .skip(1) // skip csv header - .map(line -> line.split(",")) // modelElementId,sentenceNr - .map(array -> new TestLink(array[0], Integer.parseInt(array[1]))) - .map(link -> new TestLink(link.modelId(), link.sentenceNr() - 1)) - // ^ goldstandard sentences start with 1 while ISentences are zero indexed - .toList()); - } - -} diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java deleted file mode 100644 index 7ef9dd0..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java +++ /dev/null @@ -1,68 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; - -import org.eclipse.collections.api.tuple.Pair; - -import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; - -/** - * This helper-class offer functionality to write out a log of the results for TLR. - */ -public class TLLogFile { - private static final String LINE_SEPARATOR = System.lineSeparator(); - private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("##0.00%"); - - private TLLogFile() { - throw new IllegalAccessError("This constructor should not be called!"); - } - - /** - * Appends the given results to the given file. - * - * @param targetFile file to append to - * @param projectResults the results to write out - * @throws IOException if writing to file system fails - */ - public static void append(Path targetFile, List>> projectResults) throws IOException { - List> results = projectResults.stream().map(Pair::getTwo).toList(); - var builder = new StringBuilder(); - - builder.append("- `").append(CommonUtilities.getCurrentTimeAsString()).append("` "); - - // calc average - double avgPrecision = results.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN); - double avgRecall = results.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN); - double avgF1 = results.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN); - - builder.append(String.format("[`Ø` %s %s %s]", NUMBER_FORMAT.format(avgPrecision), NUMBER_FORMAT.format(avgRecall), NUMBER_FORMAT.format(avgF1))); - - var sortedResults = new ArrayList<>(projectResults); - sortedResults.sort(Comparator.comparing(x -> x.getOne().getProjectName())); - for (Pair> projectResult : sortedResults) { - String alias = projectResult.getOne().getAlias(); - EvaluationResults result = projectResult.getTwo(); - String precision = NUMBER_FORMAT.format(result.precision()); - String recall = NUMBER_FORMAT.format(result.recall()); - String F1 = NUMBER_FORMAT.format(result.f1()); - - builder.append(String.format(" [`%s` %s %s %s]", alias, precision, recall, F1)); - } - - builder.append(LINE_SEPARATOR); - - Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); - } - -} diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java deleted file mode 100644 index 0afdc95..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java +++ /dev/null @@ -1,68 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.Map; - -import edu.kit.kastel.mcse.ardoco.core.api.models.ModelInstance; -import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; - -/** - * This helper-class offers functionality to write out information about the models as seen by ArDoCo after evaluation of TLR. - */ -public class TLModelFile { - - private static final String LINE_SEPARATOR = System.lineSeparator(); - - private TLModelFile() { - throw new IllegalAccessError("This constructor should not be called!"); - } - - /** - * Writes out information about models to the target file. - * - * @param targetFile the file to write to - * @param dataMap the data map to extract model information for each project - * @throws IOException if writing to file system fails - */ - public static void save(Path targetFile, Map dataMap) throws IOException { - var projects = dataMap.keySet().stream().sorted().toList(); - var builder = new StringBuilder(); - - for (GoldStandardProject project : projects) { - var projectData = dataMap.get(project); - - builder.append("# ").append(project.getProjectName()); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - for (var modelId : projectData.getModelIds()) { - var models = projectData.getModelState(modelId).getInstances(); - builder.append("## ModelId: ").append(modelId); - builder.append(LINE_SEPARATOR); - for (ModelInstance model : models) { - builder.append("- [") - .append(model.getUid()) - .append("]: \"") - .append(model.getFullName()) - .append("\" (") - .append(model.getFullType()) - .append(") (") - .append(String.join(", ", model.getNameParts())) - .append(") (") - .append(String.join(", ", model.getTypeParts())) - .append(")") - .append(LINE_SEPARATOR); - } - } - - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - } - - Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } - -} diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java deleted file mode 100644 index 52c2802..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java +++ /dev/null @@ -1,112 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.tuple.Pair; -import org.eclipse.collections.impl.tuple.Tuples; -import org.slf4j.Logger; - -import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; - -/** - * This is a helper class to load and write out the results of the previous evaluation run for TLR results. - */ -public class TLPreviousFile { - private static final String LINE_SEPARATOR = System.lineSeparator(); - - private TLPreviousFile() { - throw new IllegalAccessError("This constructor should not be called!"); - } - - /** - * Loads the previous results - * - * @param sourceFile file to load from - * @return the previous results - * @throws IOException if file access fails - */ - public static Collection>> load(Path sourceFile, - final Map DATA_MAP) throws IOException { - List lines = Files.readAllLines(sourceFile); - Map> foundLinkMap = new LinkedHashMap<>(); - List>> results = new ArrayList<>(); - - for (String line : lines) { - var parts = line.split(",", -1); - Project project = Project.valueOf(parts[0]); - String modelId = parts[1]; - int sentenceNr = Integer.parseInt(parts[2]); - - var testLink = new TestLink(modelId, sentenceNr); - - if (!foundLinkMap.containsKey(project)) { - foundLinkMap.put(project, new ArrayList<>()); - } - - foundLinkMap.get(project).add(testLink); - } - - for (Project project : foundLinkMap.keySet()) { - var correctLinks = TLGoldStandardFile.loadLinks(project); - var foundLinks = foundLinkMap.get(project); - - ArDoCoResult arDoCoResult = DATA_MAP.get(project); - if (arDoCoResult != null) { - results.add(Tuples.pair(project, TestUtil.compareTLR(arDoCoResult, Lists.immutable.ofAll(foundLinks), correctLinks.toImmutable()))); - } - } - - return results; - } - - /** - * Saves the given results to the given file. - * - * @param targetFile file to save to - * @param projectResults results to save - * @throws IOException if writing to file system fails - */ - public static void save(Path targetFile, Collection>> projectResults, Logger logger) - throws IOException { - if (Files.exists(targetFile)) { - logger.warn("File with the results of the previous evaluation run already exists."); - return; // do not overwrite - } - - var sortedResults = new ArrayList<>(projectResults); - sortedResults.sort(Comparator.comparing(x -> x.getOne().getProjectName())); - - var builder = new StringBuilder(); - - for (Pair> projectResult : sortedResults) { - EvaluationResults result = projectResult.getTwo(); - for (TestLink foundLink : result.getFound()) { - builder.append(projectResult.getOne().getProjectName()); - builder.append(','); - builder.append(foundLink.modelId()); - builder.append(','); - builder.append(foundLink.sentenceNr()); - builder.append(LINE_SEPARATOR); - } - } - - Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE); - } - -} diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java deleted file mode 100644 index 0e24ada..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java +++ /dev/null @@ -1,53 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.Map; - -import org.eclipse.collections.api.list.ImmutableList; - -import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; - -/** - * This helper-class offers functionality to write out the sentences as seen by ArDoCo after the evaluation runs for TLR are done. - */ -public class TLSentenceFile { - private static final String LINE_SEPARATOR = System.lineSeparator(); - - private TLSentenceFile() { - throw new IllegalAccessError("This constructor should not be called!"); - } - - /** - * Write out the sentences from the given data map to the target file - * - * @param targetFile file to write to - * @param dataMap data to extract the sentences from - * @throws IOException if writing to file system fails - */ - public static void save(Path targetFile, Map dataMap) throws IOException { - var projects = dataMap.keySet().stream().sorted().toList(); - var builder = new StringBuilder(); - - for (GoldStandardProject project : projects) { - ImmutableList sentences = dataMap.get(project).getText().getSentences(); - - builder.append("# ").append(project.getProjectName()); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - for (Sentence sentence : sentences) { - builder.append("- [").append(sentence.getSentenceNumber()).append("]: ").append(sentence.getText()).append(LINE_SEPARATOR); - } - - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - } - - Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } - -} diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java deleted file mode 100644 index 7d9e650..0000000 --- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java +++ /dev/null @@ -1,147 +0,0 @@ -/* Licensed under MIT 2022-2024. */ -package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.text.DecimalFormat; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -import org.eclipse.collections.api.factory.Lists; -import org.eclipse.collections.api.tuple.Pair; - -import edu.kit.kastel.mcse.ardoco.core.api.models.LegacyModelExtractionState; -import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult; -import edu.kit.kastel.mcse.ardoco.core.api.text.Text; -import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities; -import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults; -import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil; -import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink; - -/** - * This helper class offers functionality to write out a summary of the TLR evaluation runs for all projects. - */ -public class TLSummaryFile { - private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("##0.00%"); - private static final String LINE_SEPARATOR = System.lineSeparator(); - - private TLSummaryFile() { - throw new IllegalAccessError("This constructor should not be called!"); - } - - /** - * Writes a summary of the given results, data etc. to the given file. - * - * @param targetFile file to write to - * @param results results of the runs - * @param dataMap the outcomes (data) of the runs - * @throws IOException if writing to file system fails - */ - public static void save(Path targetFile, Collection>> results, - Map dataMap) throws IOException { - var sortedResults = results.stream().sorted().toList(); - var builder = new StringBuilder(); - - builder.append("Time of evaluation: `").append(CommonUtilities.getCurrentTimeAsString()).append("`"); - builder.append(LINE_SEPARATOR); - - appendOverallResults(sortedResults, builder); - - for (var result : sortedResults) { - appendProjectResultSummary(dataMap, builder, result); - builder.append(LINE_SEPARATOR); - } - - Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } - - private static void appendProjectResultSummary(Map dataMap, StringBuilder builder, - Pair> projectResult) { - var data = dataMap.get(projectResult.getOne()); - var text = data.getText(); - - var result = projectResult.getTwo(); - - var precision = NUMBER_FORMAT.format(result.precision()); - var recall = NUMBER_FORMAT.format(result.recall()); - var f1Score = NUMBER_FORMAT.format(result.f1()); - var truePosCount = result.truePositives().size(); - var falsePositives = result.falsePositives(); - var falsePosCount = falsePositives.size(); - var falseNegatives = result.falseNegatives(); - var falseNegCount = falseNegatives.size(); - - builder.append("# ").append(projectResult.getOne().getProjectName()); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - builder.append("Summary:").append(LINE_SEPARATOR); - builder.append(String.format("- %s Precision / %s Recall / %s F1", precision, recall, f1Score)); - builder.append(LINE_SEPARATOR); - builder.append(String.format("- %s True Positives / %s False Positives / %s False Negatives", truePosCount, falsePosCount, falseNegCount)); - builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR); - - if (!falsePositives.isEmpty()) { - var falsePositivesOutput = createFalseLinksOutput("False Positives", falsePositives.castToList(), data, text); - builder.append(falsePositivesOutput); - } - - if (!falseNegatives.isEmpty()) { - var falseNegativesOutput = createFalseLinksOutput("False Negatives", falseNegatives.castToList(), data, text); - builder.append(falseNegativesOutput); - } - } - - private static void appendOverallResults(List>> projectResults, StringBuilder builder) { - var results = Lists.mutable.ofAll(projectResults.stream().map(Pair::getTwo).toList()); - var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable()); - var macroResults = ResultCalculatorUtil.calculateAverageResults(results.toImmutable()); - var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults); - builder.append(resultString).append(LINE_SEPARATOR); - resultString = TestUtil.createResultLogString("Overall Macro", macroResults); - builder.append(resultString).append(LINE_SEPARATOR).append(LINE_SEPARATOR); - } - - private static String createFalseLinksOutput(String type, List falseLinks, ArDoCoResult data, Text text) { - var builder = new StringBuilder(); - builder.append(type).append(":").append(LINE_SEPARATOR); - - for (TestLink falseLink : falseLinks) { - builder.append(createFalseLinkOutput(data, text, falseLink)); - } - - builder.append(LINE_SEPARATOR); - return builder.toString(); - } - - private static String createFalseLinkOutput(ArDoCoResult data, Text text, TestLink falseLink) { - var builder = new StringBuilder(); - for (var modelId : data.getModelIds()) { - var dataModel = data.getModelState(modelId); - var line = format(falseLink, text, dataModel); - if (line != null && !line.isBlank()) { - builder.append("- ").append(line).append(LINE_SEPARATOR); - } - } - return builder.toString(); - } - - static String format(TestLink link, Text text, LegacyModelExtractionState modelState) { - var model = modelState.getInstances().stream().filter(m -> m.getUid().equals(link.modelId())).findAny().orElse(null); - var sentence = text.getSentences().stream().filter(s -> s.getSentenceNumber() == link.sentenceNr()).findAny().orElse(null); - - if (model == null && sentence == null) { - return null; - } - - var modelStr = model == null ? link.modelId() : "\"" + model.getFullName() + "\""; - var sentenceStr = sentence == null ? String.valueOf(link.sentenceNr()) : "\"" + sentence.getText() + "\""; - - return String.format("%s ⇔ %s [%s,%s]", modelStr, sentenceStr, link.modelId(), link.sentenceNr()); - } - -}