Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Metrics project #2

Merged
merged 14 commits into from
Aug 23, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import edu.kit.kastel.mcse.ardoco.core.execution.runner.AnonymousRunner;
import edu.kit.kastel.mcse.ardoco.core.pipeline.AbstractPipelineStep;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.HistoricProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.helper.StageTest;
import edu.kit.kastel.mcse.ardoco.tlr.text.providers.TextPreprocessingAgent;
Expand All @@ -40,9 +41,7 @@ protected TextExtractionResult runComparable(TextProject project, SortedMap<Stri
var wordAbbreviations = DataRepositoryHelper.getTextState(dataRepository).getWordAbbreviations();
var phraseAbbreviations = DataRepositoryHelper.getTextState(dataRepository).getPhraseAbbreviations();

var result = new TextExtractionResult(wordAbbreviations, phraseAbbreviations);

return result;
return new TextExtractionResult(wordAbbreviations, phraseAbbreviations);
}

@Override
Expand Down Expand Up @@ -87,14 +86,6 @@ void evaluateNonHistoricalDiagramRecognition(TextProject project) {
runComparable(project);
}

@DisplayName("Evaluate Text Extraction (Historical)")
@ParameterizedTest(name = "{0}")
@EnumSource(value = TextProject.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
@Order(2)
void evaluateHistoricalDiagramRecognition(TextProject project) {
runComparable(project);
}

public enum TextProject implements GoldStandardProject {
MEDIASTORE(//
Project.MEDIASTORE, //
Expand All @@ -117,26 +108,26 @@ public enum TextProject implements GoldStandardProject {
"FreeSWITCH Event Socket Layer" }), new Disambiguation("SVG", new String[] { "scalable vector graphics" })) //
), //
TEASTORE_HISTORICAL( //
Project.TEASTORE_HISTORICAL, //
HistoricProject.TEASTORE_HISTORICAL, //
List.of(new Disambiguation("REST", new String[] { "representational state transfer" }), new Disambiguation("JSP", new String[] {
"Java Server Page" }), new Disambiguation("JSPs", new String[] { "Java Server Pages" }), new Disambiguation("OPEN.xtrace",
new String[] { "Open Execution Trace " + "Exchange" })) //
), //
TEAMMATES_HISTORICAL( //
Project.TEAMMATES_HISTORICAL, //
HistoricProject.TEAMMATES_HISTORICAL, //
List.of(new Disambiguation("GAE", new String[] { "Google App Engine" }), new Disambiguation("JSP", new String[] { "Java Server Pages" }),
new Disambiguation("POJOs", new String[] { "Plain Old Java Objects" }), new Disambiguation("CRUD", new String[] {
"Create Read Update Delete" })) //
), //
BIGBLUEBUTTON_HISTORICAL( //
Project.BIGBLUEBUTTON_HISTORICAL, //
HistoricProject.BIGBLUEBUTTON_HISTORICAL, //
List.of(new Disambiguation("LMS", new String[] { "learning management system" })) //
);

private final Project project;
private final GoldStandardProject project;
private final ImmutableList<Disambiguation> disambiguations;

TextProject(Project project, List<Disambiguation> disambiguations) {
TextProject(GoldStandardProject project, List<Disambiguation> disambiguations) {
this.project = project;
this.disambiguations = Lists.immutable.ofAll(disambiguations);
}
Expand Down
6 changes: 6 additions & 0 deletions tests/integration-tests/tests-base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
<groupId>com.tngtech.archunit</groupId>
<artifactId>archunit-junit5</artifactId>
</dependency>
<dependency>
<groupId>io.github.ardoco</groupId>
<artifactId>metrics</artifactId>
<version>0.1.1-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.github.ardoco.core</groupId>
<artifactId>common</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@

import java.util.Locale;
import java.util.Set;
import java.util.stream.Collectors;

import org.eclipse.collections.api.collection.ImmutableCollection;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;

import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;

/**
* This utility class provides methods for running the tests, especially regarding the evaluations.
Expand All @@ -37,20 +34,18 @@ public static <T> EvaluationResults<T> compareTLR(ArDoCoResult arDoCoResult, Imm
Set<T> distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
Set<T> distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());

// True Positives are the trace links that are contained on both lists
Set<T> truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
ImmutableList<T> truePositivesList = Lists.immutable.ofAll(truePositives);
int sentences = arDoCoResult.getText().getSentences().size();
int modelElements = 0;
for (var model : arDoCoResult.getModelIds()) {
modelElements += arDoCoResult.getModelState(model).getInstances().size();
}

// False Positives are the trace links that are only contained in the result set
Set<T> falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falsePositivesList = Lists.immutable.ofAll(falsePositives);
int confusionMatrixSum = sentences * modelElements;
dfuchss marked this conversation as resolved.
Show resolved Hide resolved

// False Negatives are the trace links that are only contained in the gold standard
Set<T> falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falseNegativesList = Lists.immutable.ofAll(falseNegatives);
var calculator = ClassificationMetricsCalculator.getInstance();

int trueNegatives = TestUtil.calculateTrueNegativesForTLR(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
return new EvaluationResults<>(classification);
}

/**
Expand All @@ -67,57 +62,11 @@ public static <T> EvaluationResults<T> compareInconsistencies(ArDoCoResult arDoC
Set<T> distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
Set<T> distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());

// True Positives are the trace links that are contained on both lists
Set<T> truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
ImmutableList<T> truePositivesList = Lists.immutable.ofAll(truePositives);

// False Positives are the trace links that are only contained in the result set
Set<T> falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falsePositivesList = Lists.immutable.ofAll(falsePositives);

// False Negatives are the trace links that are only contained in the gold standard
Set<T> falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falseNegativesList = Lists.immutable.ofAll(falseNegatives);

int trueNegatives = TestUtil.calculateTrueNegativesForInconsistencies(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
}

/**
* Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
* total sum of all entries in the confusion matrix and then substracts the true positives, false positives, and false negatives.
*
* @param arDoCoResult the output of ArDoCo
* @param truePositives nr of true positives
* @param falsePositives nr of false positives
* @param falseNegatives nr of false negatives
* @return the number of true negatives
*/
public static int calculateTrueNegativesForTLR(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
int sentences = arDoCoResult.getText().getSentences().size();
int modelElements = 0;
for (var model : arDoCoResult.getModelIds()) {
modelElements += arDoCoResult.getModelState(model).getInstances().size();
}

int confusionMatrixSum = sentences * modelElements;
return confusionMatrixSum - (truePositives + falsePositives + falseNegatives);
}

/**
* Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
* total sum of all sentences in the {@link ArDoCoResult} and then substracts the true positives, false positives, and false negatives.
*
* @param arDoCoResult the output of ArDoCo
* @param truePositives nr of true positives
* @param falsePositives nr of false positives
* @param falseNegatives nr of false negatives
* @return the number of true negatives
*/
public static int calculateTrueNegativesForInconsistencies(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
int numberOfSentences = arDoCoResult.getText().getSentences().size();
return numberOfSentences - (truePositives + falsePositives + falseNegatives);
int confusionMatrixSum = arDoCoResult.getText().getSentences().size();
dfuchss marked this conversation as resolved.
Show resolved Hide resolved

var calculator = ClassificationMetricsCalculator.getInstance();
var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
return new EvaluationResults<>(classification);
}

/**
Expand Down Expand Up @@ -168,19 +117,6 @@ public static void logExplicitResults(Logger logger, String name, EvaluationResu
logger.info(logString);
}

/**
* Log the provided {@link EvaluationResults} using the provided logger and name. Additionally, provided the expected results.
*
* @param logger Logger to use
* @param name Name to show in the output
* @param results the results
* @param expectedResults the expected results
*/
public static void logResultsWithExpected(Logger logger, String name, EvaluationResults<?> results, ExpectedResults expectedResults) {
var infoString = String.format(Locale.ENGLISH, "%n%s:%n%s", name, results.getResultStringWithExpected(expectedResults));
logger.info(infoString);
}

public static void logExtendedResultsWithExpected(Logger logger, Object testClass, String name, EvaluationResults<?> results,
ExpectedResults expectedResults) {
var infoString = String.format(Locale.ENGLISH, """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class DeterministicArDoCoTest {

@ArchTest
public static final ArchRule forbidUnorderedSetsAndMaps = noClasses().that()
.resideOutsideOfPackages("..tests..")
.resideOutsideOfPackages("..tests..", "..metrics..")
.and(areNotDirectlyAnnotatedWith(Deterministic.class))
.should()
.accessClassesThat(areForbiddenClasses())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
Expand Down Expand Up @@ -159,8 +160,7 @@ public String getCodeModelDirectory() {
loadCodeModelFromResourcesIfNeeded();
return getTemporaryCodeLocation().getAbsolutePath();
} catch (IOException e) {
logger.error(e.getMessage(), e);
return null;
throw new UncheckedIOException(e);
}
}

Expand Down
Loading
Loading