Skip to content

Commit

Permalink
Added neighbor record, applied naming convention, changed CLI method …
Browse files Browse the repository at this point in the history
…for MergingParamters
  • Loading branch information
uuqjz committed Aug 10, 2023
1 parent 0293908 commit 9457adb
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 56 deletions.
3 changes: 1 addition & 2 deletions cli/src/main/java/de/jplag/cli/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,7 @@ private static ClusteringOptions getClusteringOptions(CliOptions options) {
}

private static MergingParameters getMergingParameters(CliOptions options) {
return new MergingParameters().withEnable(options.merging.enable).withMergeBuffer(options.merging.mergeBuffer)
.withSeperatingThreshold(options.merging.seperatingThreshold);
return new MergingParameters(options.merging.enable, options.merging.mergeBuffer, options.merging.seperatingThreshold);
}

private String generateDescription() {
Expand Down
2 changes: 1 addition & 1 deletion cli/src/main/java/de/jplag/cli/CliOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public static class Advanced {
}

public static class Clustering {
@Option(names = {"--cluster-skip"}, description = "Skips the clustering (default: false)\n")
@Option(names = {"--cluster-skip"}, description = "Skips the clustering (default: false)%n")
public boolean disable;

@ArgGroup
Expand Down
108 changes: 55 additions & 53 deletions core/src/main/java/de/jplag/merging/MatchMerging.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package de.jplag.merging;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

Expand All @@ -14,11 +13,12 @@

/**
* This class implements a match merging algorithm which serves as defense mechanism against obfuscation attacks. Based
* on configurable parameters MergeBuffer and SeperatingThreshold, it alters prior results and merges all neighboring
* matches that fit the specified thresholds. When neighboring matches get merged they become one and the tokens
* separating them get removed from the submission clone. MergeBuffer describes how shorter a match can be than the
* Minimum Token Match. SeperatingThreshold describes how many tokens can be between two neighboring matches. Both are
* set in {@link JPlagOptions} as {@link MergingParameters} and default to 0 (which deactivates merging).
* on configurable parameters MergeBuffer and SeperatingThreshold, it alters prior results from pairwise submission
* comparisons and merges all neighboring matches that fit the specified thresholds. Submissions are referred to as left
* and right and neighboring matches as upper and lower. When neighboring matches get merged they become one and the
* tokens separating them get removed from the submission clone. MergeBuffer describes how shorter a match can be than
* the Minimum Token Match. SeperatingThreshold describes how many tokens can be between two neighboring matches. Both
* are set in {@link JPlagOptions} as {@link MergingParameters} and default to 0 (which deactivates merging).
*/
public class MatchMerging {
private JPlagOptions options;
Expand All @@ -44,33 +44,33 @@ public JPlagResult mergeMatchesOf(JPlagResult result) {
List<JPlagComparison> comparisonsMerged = new ArrayList<>();

for (JPlagComparison comparison : comparisons) {
Submission firstSubmission = comparison.firstSubmission().copy();
Submission secondSubmission = comparison.secondSubmission().copy();
Submission leftSubmission = comparison.firstSubmission().copy();
Submission rightSubmission = comparison.secondSubmission().copy();
List<Match> globalMatches = new ArrayList<>(comparison.matches());
globalMatches.addAll(comparison.ignoredMatches());
globalMatches = removeTooShortMatches(mergeNeighbors(globalMatches, firstSubmission, secondSubmission));
comparisonsMerged.add(new JPlagComparison(firstSubmission, secondSubmission, globalMatches, new ArrayList<>()));
globalMatches = removeTooShortMatches(mergeNeighbors(globalMatches, leftSubmission, rightSubmission));
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
}

long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
return new JPlagResult(comparisonsMerged, result.getSubmissions(), result.getDuration() + durationInMillis, options);
}

/**
* Computes neighbors by sorting based on order of matches in the first and the second submission and then checking
* which are next to each other in both.
* Computes neighbors by sorting based on order of matches in the left and right submissions and then checking which are
* next to each other in both.
* @param globalMatches
* @return neighbors containing a list of pairs of neighboring matches
*/
private List<List<Match>> computeNeighbors(List<Match> globalMatches) {
List<List<Match>> neighbors = new ArrayList<>();
List<Match> sortedByFirst = new ArrayList<>(globalMatches);
Collections.sort(sortedByFirst, (match1, match2) -> match1.startOfFirst() - match2.startOfFirst());
List<Match> sortedBySecond = new ArrayList<>(globalMatches);
Collections.sort(sortedBySecond, (match1, match2) -> match1.startOfSecond() - match2.startOfSecond());
for (int i = 0; i < sortedByFirst.size() - 1; i++) {
if (sortedBySecond.indexOf(sortedByFirst.get(i)) == (sortedBySecond.indexOf(sortedByFirst.get(i + 1)) - 1)) {
neighbors.add(Arrays.asList(sortedByFirst.get(i), sortedByFirst.get(i + 1)));
private List<Neighbor> computeNeighbors(List<Match> globalMatches) {
List<Neighbor> neighbors = new ArrayList<>();
List<Match> sortedByLeft = new ArrayList<>(globalMatches);
Collections.sort(sortedByLeft, (match1, match2) -> match1.startOfFirst() - match2.startOfFirst());
List<Match> sortedByRight = new ArrayList<>(globalMatches);
Collections.sort(sortedByRight, (match1, match2) -> match1.startOfSecond() - match2.startOfSecond());
for (int i = 0; i < sortedByLeft.size() - 1; i++) {
if (sortedByRight.indexOf(sortedByLeft.get(i)) == (sortedByRight.indexOf(sortedByLeft.get(i + 1)) - 1)) {
neighbors.add(new Neighbor(sortedByLeft.get(i), sortedByLeft.get(i + 1)));
}
}
return neighbors;
Expand All @@ -82,25 +82,25 @@ private List<List<Match>> computeNeighbors(List<Match> globalMatches) {
* criteria
* @return globalMatches containing merged matches.
*/
private List<Match> mergeNeighbors(List<Match> globalMatches, Submission firstSubmission, Submission secondSubmission) {
private List<Match> mergeNeighbors(List<Match> globalMatches, Submission leftSubmission, Submission rightSubmission) {
int i = 0;
List<List<Match>> neighbors = computeNeighbors(globalMatches);
List<Neighbor> neighbors = computeNeighbors(globalMatches);

while (i < neighbors.size()) {
Match firstNeighbor = neighbors.get(i).get(0);
Match secondNeighbor = neighbors.get(i).get(1);

int lengthUpper = firstNeighbor.length();
int lengthLower = secondNeighbor.length();
int tokenBetweenFirst = secondNeighbor.startOfFirst() - firstNeighbor.endOfFirst() - 1;
int tokensBetweenSecond = secondNeighbor.startOfSecond() - firstNeighbor.endOfSecond() - 1;
double averageTokensBetweenMatches = (tokenBetweenFirst + tokensBetweenSecond) / 2.0;
Match upperNeighbor = neighbors.get(i).upperMatch();
Match lowerNeighbor = neighbors.get(i).lowerMatch();

int lengthUpper = upperNeighbor.length();
int lengthLower = lowerNeighbor.length();
int tokenBetweenLeft = lowerNeighbor.startOfFirst() - upperNeighbor.endOfFirst() - 1;
int tokensBetweenRight = lowerNeighbor.startOfSecond() - upperNeighbor.endOfSecond() - 1;
double averageTokensBetweenMatches = (tokenBetweenLeft + tokensBetweenRight) / 2.0;
// Checking length is not necessary as GST already checked length while computing matches
if (averageTokensBetweenMatches <= options.mergingParameters().seperatingThreshold()) {
globalMatches.removeAll(neighbors.get(i));
globalMatches.add(new Match(firstNeighbor.startOfFirst(), firstNeighbor.startOfSecond(), lengthUpper + lengthLower));
globalMatches = removeToken(globalMatches, firstSubmission, secondSubmission, firstNeighbor.startOfFirst(),
firstNeighbor.startOfSecond(), lengthUpper, tokenBetweenFirst, tokensBetweenSecond);
globalMatches.remove(upperNeighbor);
globalMatches.remove(lowerNeighbor);
globalMatches.add(new Match(upperNeighbor.startOfFirst(), upperNeighbor.startOfSecond(), lengthUpper + lengthLower));
globalMatches = removeToken(globalMatches, leftSubmission, rightSubmission, upperNeighbor, tokenBetweenLeft, tokensBetweenRight);
neighbors = computeNeighbors(globalMatches);
i = 0;
} else {
Expand All @@ -114,30 +114,32 @@ private List<Match> mergeNeighbors(List<Match> globalMatches, Submission firstSu
* This function removes token from both submissions after a merge has been performed. Additionally it moves the
* starting positions from matches, that occur after the merged neighboring matches, by the amount of removed token.
* @param globalMatches
* @param firstSubmission is the first submission
* @param secondSubmission is the second submission
* @param startFirst begin of the upper neighbor in the first submission
* @param startSecond begin of the upper neighbor in the second submission
* @param lengthUpper length of the upper neighbor
* @param tokensBetweenFirst amount of token that separate the neighboring matches in the first submission and need to
* be removed
* @param tokensBetweenSecond amount token that separate the neighboring matches in the send submission and need to be
* @param leftSubmission is the left submission
* @param rightSubmission is the right submission
* @param upperNeighbor is the upper neighboring match
* @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be
* removed
* @param tokensBetweenRight amount token that separate the neighboring matches in the send submission and need to be
* removed
* @return shiftedMatches with the mentioned changes.
*/
private List<Match> removeToken(List<Match> globalMatches, Submission firstSubmission, Submission secondSubmission, int startFirst,
int startSecond, int lengthUpper, int tokensBetweenFirst, int tokensBetweenSecond) {
List<Token> tokenFirst = new ArrayList<>(firstSubmission.getTokenList());
List<Token> tokenSecond = new ArrayList<>(secondSubmission.getTokenList());
tokenFirst.subList(startFirst + lengthUpper, startFirst + lengthUpper + tokensBetweenFirst).clear();
tokenSecond.subList(startSecond + lengthUpper, startSecond + lengthUpper + tokensBetweenSecond).clear();
firstSubmission.setTokenList(tokenFirst);
secondSubmission.setTokenList(tokenSecond);
private List<Match> removeToken(List<Match> globalMatches, Submission leftSubmission, Submission rightSubmission, Match upperNeighbor,
int tokensBetweenLeft, int tokensBetweenRight) {
int startLeft = upperNeighbor.startOfFirst();
int startRight = upperNeighbor.startOfSecond();
int lengthUpper = upperNeighbor.length();

List<Token> tokenLeft = new ArrayList<>(leftSubmission.getTokenList());
List<Token> tokenRight = new ArrayList<>(rightSubmission.getTokenList());
tokenLeft.subList(startLeft + lengthUpper, startLeft + lengthUpper + tokensBetweenLeft).clear();
tokenRight.subList(startRight + lengthUpper, startRight + lengthUpper + tokensBetweenRight).clear();
leftSubmission.setTokenList(tokenLeft);
rightSubmission.setTokenList(tokenRight);

List<Match> shiftedMatches = new ArrayList<>();
for (Match match : globalMatches) {
int leftShift = match.startOfFirst() > startFirst ? tokensBetweenFirst : 0;
int rightShift = match.startOfSecond() > startSecond ? tokensBetweenSecond : 0;
int leftShift = match.startOfFirst() > startLeft ? tokensBetweenLeft : 0;
int rightShift = match.startOfSecond() > startRight ? tokensBetweenRight : 0;
Match alteredMatch = new Match(match.startOfFirst() - leftShift, match.startOfSecond() - rightShift, match.length());
shiftedMatches.add(alteredMatch);
}
Expand Down
9 changes: 9 additions & 0 deletions core/src/main/java/de/jplag/merging/Neighbor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package de.jplag.merging;

import de.jplag.Match;

/*
* This class realizes a pair of neighboring matches, named upperMatch and lowerMatch
*/
public record Neighbor(Match upperMatch, Match lowerMatch) {
}

0 comments on commit 9457adb

Please sign in to comment.