Skip to content

Commit

Permalink
[#215] Extracting info from git diff is not working as expected (#216)
Browse files Browse the repository at this point in the history
FileInfoExtractor#getFilePathFromDiffPattern(String) checks
for the group pattern '+++ b/path/to/file'
within the git diff result in order to get the file path.

However, when a new and also empty file is added in between the
commit range to analysis, the file will appear in the git diff result,
but since there are no lines to display, the above group does not
get printed, thus 
FileInfoExtractor#getFilePathFromDiffPattern(String) is unable
to match the pattern, causing the program to crash.

Let's add a check to skip these new and empty files in 
the git diff result, since an empty file has no lines to 
attribute any contribution.
  • Loading branch information
yamidark authored and eugenepeh committed Jul 21, 2018
1 parent a3c6bcc commit a76a1b4
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions src/main/java/reposense/authorship/FileInfoExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
Expand All @@ -31,18 +32,23 @@ public class FileInfoExtractor {

private static final String DIFF_FILE_CHUNK_SEPARATOR = "\ndiff --git a/.*\n";
private static final String BINARY_FILE_SYMBOL = "\nBinary files ";
private static final String FILE_DELETED_METADATA = "deleted file mode 100644\n";
private static final String LINE_CHUNKS_SEPARATOR = "\n@@ ";
private static final String LINE_INSERTED_SYMBOL = "+";
private static final String STARTING_LINE_NUMBER_GROUP_NAME = "startingLineNumber";
private static final String FILE_CHANGED_GROUP_NAME = "filePath";
private static final String MATCH_GROUP_FAIL_MESSAGE_FORMAT = "Failed to match the %s group for:\n%s";

private static final int LINE_CHANGED_HEADER_INDEX = 0;

private static final Pattern STARTING_LINE_NUMBER_PATTERN = Pattern.compile(
"-(\\d)+(,)?(\\d)* \\+(?<startingLineNumber>\\d+)(,)?(\\d)* @@");
private static final Pattern FILE_CHANGED_PATTERN = Pattern.compile("\n(\\+){3} b/(?<filePath>.*)\n");

private static final Predicate<String> FILE_DELETED_PREDICATE = Pattern.compile(
"^deleted file mode [\\d]{6}\n").asPredicate();
private static final Predicate<String> NEW_EMPTY_FILE_PREDICATE = Pattern.compile(
"^new file mode [a-zA-Z0-9\n. ]*$").asPredicate();

/**
* Extracts a list of relevant files given in {@code config}.
*/
Expand Down Expand Up @@ -84,8 +90,9 @@ public static List<FileInfo> getEditedFileInfos(RepoConfiguration config, String
String[] fileDiffResultList = fullDiffResult.split(DIFF_FILE_CHUNK_SEPARATOR);

for (String fileDiffResult : fileDiffResultList) {
// file deleted or is a binary file, skip it
if (fileDiffResult.startsWith(FILE_DELETED_METADATA) || fileDiffResult.contains(BINARY_FILE_SYMBOL)) {
// file deleted, is binary file or is new and empty file, skip it
if (FILE_DELETED_PREDICATE.test(fileDiffResult) || fileDiffResult.contains(BINARY_FILE_SYMBOL)
|| NEW_EMPTY_FILE_PREDICATE.test(fileDiffResult)) {
continue;
}

Expand Down Expand Up @@ -202,6 +209,7 @@ private static String getFilePathFromDiffPattern(String fileDiffResult) {
Matcher filePathMatcher = FILE_CHANGED_PATTERN.matcher(fileDiffResult);

if (!filePathMatcher.find()) {
logger.severe(String.format(MATCH_GROUP_FAIL_MESSAGE_FORMAT, "file path", fileDiffResult));
throw new AssertionError("Should not have error matching file path pattern inside file diff result!");
}

Expand All @@ -216,6 +224,7 @@ private static int getStartingLineNumber(String linesChanged) {
Matcher chunkHeaderMatcher = STARTING_LINE_NUMBER_PATTERN.matcher(linesChanged);

if (!chunkHeaderMatcher.find()) {
logger.severe(String.format(MATCH_GROUP_FAIL_MESSAGE_FORMAT, "line changed", linesChanged));
throw new AssertionError("Should not have error matching line number pattern inside chunk header!");
}

Expand Down

0 comments on commit a76a1b4

Please sign in to comment.