Skip to content

Commit

Permalink
Merge pull request #2 from ArDoCo/feature/metrics
Browse files Browse the repository at this point in the history
Use Metrics project
  • Loading branch information
dfuchss authored Aug 23, 2024
2 parents 00a2101 + c2fa58d commit 158cc58
Show file tree
Hide file tree
Showing 29 changed files with 420 additions and 1,923 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import edu.kit.kastel.mcse.ardoco.core.execution.runner.AnonymousRunner;
import edu.kit.kastel.mcse.ardoco.core.pipeline.AbstractPipelineStep;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.HistoricProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.helper.StageTest;
import edu.kit.kastel.mcse.ardoco.tlr.text.providers.TextPreprocessingAgent;
Expand All @@ -40,9 +41,7 @@ protected TextExtractionResult runComparable(TextProject project, SortedMap<Stri
var wordAbbreviations = DataRepositoryHelper.getTextState(dataRepository).getWordAbbreviations();
var phraseAbbreviations = DataRepositoryHelper.getTextState(dataRepository).getPhraseAbbreviations();

var result = new TextExtractionResult(wordAbbreviations, phraseAbbreviations);

return result;
return new TextExtractionResult(wordAbbreviations, phraseAbbreviations);
}

@Override
Expand Down Expand Up @@ -87,14 +86,6 @@ void evaluateNonHistoricalDiagramRecognition(TextProject project) {
runComparable(project);
}

@DisplayName("Evaluate Text Extraction (Historical)")
@ParameterizedTest(name = "{0}")
@EnumSource(value = TextProject.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
@Order(2)
void evaluateHistoricalDiagramRecognition(TextProject project) {
runComparable(project);
}

public enum TextProject implements GoldStandardProject {
MEDIASTORE(//
Project.MEDIASTORE, //
Expand All @@ -117,26 +108,26 @@ public enum TextProject implements GoldStandardProject {
"FreeSWITCH Event Socket Layer" }), new Disambiguation("SVG", new String[] { "scalable vector graphics" })) //
), //
TEASTORE_HISTORICAL( //
Project.TEASTORE_HISTORICAL, //
HistoricProject.TEASTORE_HISTORICAL, //
List.of(new Disambiguation("REST", new String[] { "representational state transfer" }), new Disambiguation("JSP", new String[] {
"Java Server Page" }), new Disambiguation("JSPs", new String[] { "Java Server Pages" }), new Disambiguation("OPEN.xtrace",
new String[] { "Open Execution Trace " + "Exchange" })) //
), //
TEAMMATES_HISTORICAL( //
Project.TEAMMATES_HISTORICAL, //
HistoricProject.TEAMMATES_HISTORICAL, //
List.of(new Disambiguation("GAE", new String[] { "Google App Engine" }), new Disambiguation("JSP", new String[] { "Java Server Pages" }),
new Disambiguation("POJOs", new String[] { "Plain Old Java Objects" }), new Disambiguation("CRUD", new String[] {
"Create Read Update Delete" })) //
), //
BIGBLUEBUTTON_HISTORICAL( //
Project.BIGBLUEBUTTON_HISTORICAL, //
HistoricProject.BIGBLUEBUTTON_HISTORICAL, //
List.of(new Disambiguation("LMS", new String[] { "learning management system" })) //
);

private final Project project;
private final GoldStandardProject project;
private final ImmutableList<Disambiguation> disambiguations;

TextProject(Project project, List<Disambiguation> disambiguations) {
TextProject(GoldStandardProject project, List<Disambiguation> disambiguations) {
this.project = project;
this.disambiguations = Lists.immutable.ofAll(disambiguations);
}
Expand Down
6 changes: 6 additions & 0 deletions tests/integration-tests/tests-base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
<groupId>com.tngtech.archunit</groupId>
<artifactId>archunit-junit5</artifactId>
</dependency>
<dependency>
<groupId>io.github.ardoco</groupId>
<artifactId>metrics</artifactId>
<version>0.1.1-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.github.ardoco.core</groupId>
<artifactId>common</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@

import java.util.Locale;
import java.util.Set;
import java.util.stream.Collectors;

import org.eclipse.collections.api.collection.ImmutableCollection;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;

import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;

/**
* This utility class provides methods for running the tests, especially regarding the evaluations.
Expand All @@ -37,20 +34,18 @@ public static <T> EvaluationResults<T> compareTLR(ArDoCoResult arDoCoResult, Imm
Set<T> distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
Set<T> distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());

// True Positives are the trace links that are contained on both lists
Set<T> truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
ImmutableList<T> truePositivesList = Lists.immutable.ofAll(truePositives);
int sentences = arDoCoResult.getText().getSentences().size();
int modelElements = 0;
for (var model : arDoCoResult.getModelIds()) {
modelElements += arDoCoResult.getModelState(model).getInstances().size();
}

// False Positives are the trace links that are only contained in the result set
Set<T> falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falsePositivesList = Lists.immutable.ofAll(falsePositives);
int confusionMatrixSum = sentences * modelElements;

// False Negatives are the trace links that are only contained in the gold standard
Set<T> falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falseNegativesList = Lists.immutable.ofAll(falseNegatives);
var calculator = ClassificationMetricsCalculator.getInstance();

int trueNegatives = TestUtil.calculateTrueNegativesForTLR(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
return new EvaluationResults<>(classification);
}

/**
Expand All @@ -67,57 +62,11 @@ public static <T> EvaluationResults<T> compareInconsistencies(ArDoCoResult arDoC
Set<T> distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
Set<T> distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());

// True Positives are the trace links that are contained on both lists
Set<T> truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
ImmutableList<T> truePositivesList = Lists.immutable.ofAll(truePositives);

// False Positives are the trace links that are only contained in the result set
Set<T> falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falsePositivesList = Lists.immutable.ofAll(falsePositives);

// False Negatives are the trace links that are only contained in the gold standard
Set<T> falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
ImmutableList<T> falseNegativesList = Lists.immutable.ofAll(falseNegatives);

int trueNegatives = TestUtil.calculateTrueNegativesForInconsistencies(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
}

/**
* Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
* total sum of all entries in the confusion matrix and then substracts the true positives, false positives, and false negatives.
*
* @param arDoCoResult the output of ArDoCo
* @param truePositives nr of true positives
* @param falsePositives nr of false positives
* @param falseNegatives nr of false negatives
* @return the number of true negatives
*/
public static int calculateTrueNegativesForTLR(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
int sentences = arDoCoResult.getText().getSentences().size();
int modelElements = 0;
for (var model : arDoCoResult.getModelIds()) {
modelElements += arDoCoResult.getModelState(model).getInstances().size();
}

int confusionMatrixSum = sentences * modelElements;
return confusionMatrixSum - (truePositives + falsePositives + falseNegatives);
}

/**
* Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
* total sum of all sentences in the {@link ArDoCoResult} and then substracts the true positives, false positives, and false negatives.
*
* @param arDoCoResult the output of ArDoCo
* @param truePositives nr of true positives
* @param falsePositives nr of false positives
* @param falseNegatives nr of false negatives
* @return the number of true negatives
*/
public static int calculateTrueNegativesForInconsistencies(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
int numberOfSentences = arDoCoResult.getText().getSentences().size();
return numberOfSentences - (truePositives + falsePositives + falseNegatives);
int confusionMatrixSum = arDoCoResult.getText().getSentences().size();

var calculator = ClassificationMetricsCalculator.getInstance();
var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
return new EvaluationResults<>(classification);
}

/**
Expand Down Expand Up @@ -168,19 +117,6 @@ public static void logExplicitResults(Logger logger, String name, EvaluationResu
logger.info(logString);
}

/**
* Log the provided {@link EvaluationResults} using the provided logger and name. Additionally, provided the expected results.
*
* @param logger Logger to use
* @param name Name to show in the output
* @param results the results
* @param expectedResults the expected results
*/
public static void logResultsWithExpected(Logger logger, String name, EvaluationResults<?> results, ExpectedResults expectedResults) {
var infoString = String.format(Locale.ENGLISH, "%n%s:%n%s", name, results.getResultStringWithExpected(expectedResults));
logger.info(infoString);
}

public static void logExtendedResultsWithExpected(Logger logger, Object testClass, String name, EvaluationResults<?> results,
ExpectedResults expectedResults) {
var infoString = String.format(Locale.ENGLISH, """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class DeterministicArDoCoTest {

@ArchTest
public static final ArchRule forbidUnorderedSetsAndMaps = noClasses().that()
.resideOutsideOfPackages("..tests..")
.resideOutsideOfPackages("..tests..", "..metrics..")
.and(areNotDirectlyAnnotatedWith(Deterministic.class))
.should()
.accessClassesThat(areForbiddenClasses())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
Expand Down Expand Up @@ -159,8 +160,7 @@ public String getCodeModelDirectory() {
loadCodeModelFromResourcesIfNeeded();
return getTemporaryCodeLocation().getAbsolutePath();
} catch (IOException e) {
logger.error(e.getMessage(), e);
return null;
throw new UncheckedIOException(e);
}
}

Expand Down
Loading

0 comments on commit 158cc58

Please sign in to comment.