Skip to content

Commit

Permalink
Merge master into feature/emr
Browse files Browse the repository at this point in the history
  • Loading branch information
aws-toolkit-automation authored Dec 23, 2024
2 parents 1dda2fd + 745306d commit e4cb209
Show file tree
Hide file tree
Showing 3 changed files with 467 additions and 38 deletions.
279 changes: 279 additions & 0 deletions .github/workflows/filterDuplicates.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
/**
* Filters the report produced by jscpd to only include clones that involve changes from the given git diff.
* If the filtered report is non-empty, i.e. there exists a clone in the changes,
* the program exits with an error and logs the filtered report to console.
*
* Usage:
* node filterDuplicates.js run [path_to_git_diff] [path_to_jscpd_report]
*
* Tests:
* node filterDuplicates.js test
*/

const fs = require('fs/promises')
const path = require('path')

function parseDiffFilePath(filePathLine) {
return filePathLine.split(' ')[2].split('/').slice(1).join('/')
}

function parseDiffRange(rangeLine) {
const [_fromRange, toRange] = rangeLine.split(' ').slice(1, 3)
const [startLine, numLines] = toRange.slice(1).split(',').map(Number)
const range = [startLine, startLine + numLines]
return range
}

async function parseDiff(diffPath) {
const diff = await fs.readFile(diffPath, 'utf8')
const lines = diff.split('\n')
let currentFile = null
let currentFileChanges = []
const fileChanges = new Map()

for (const line of lines) {
if (line.startsWith('diff')) {
if (currentFile) {
fileChanges.set(currentFile, currentFileChanges)
}
currentFile = parseDiffFilePath(line)
currentFileChanges = []
}
if (line.startsWith('@@')) {
currentFileChanges.push(parseDiffRange(line))
}
}

fileChanges.set(currentFile, currentFileChanges)

return fileChanges
}

function doesOverlap(range1, range2) {
const [start1, end1] = range1
const [start2, end2] = range2
return (
(start1 >= start2 && start1 <= end2) || (end1 >= start2 && end1 <= end2) || (start2 >= start1 && end2 <= end1)
)
}

function isCloneInChanges(changes, cloneInstance) {
const fileName = cloneInstance.name
const cloneStart = cloneInstance.start
const cloneEnd = cloneInstance.end
const lineChangeRanges = changes.get(fileName)

if (!lineChangeRanges) {
return false
}

return lineChangeRanges.some((range) => doesOverlap([cloneStart, cloneEnd], range))
}

function isInChanges(changes, dupe) {
return isCloneInChanges(changes, dupe.firstFile) || isCloneInChanges(changes, dupe.secondFile)
}

function filterDuplicates(report, changes) {
duplicates = []
for (const dupe of report.duplicates) {
if (isInChanges(changes, dupe)) {
duplicates.push(dupe)
}
}
return duplicates
}

async function run() {
const rawDiffPath = process.argv[3]
const jscpdReportPath = process.argv[4]
const changes = await parseDiff(rawDiffPath)
const jscpdReport = JSON.parse(await fs.readFile(jscpdReportPath, 'utf8'))
const filteredDuplicates = filterDuplicates(jscpdReport, changes)

console.log('%s files changes', changes.size)
console.log('%s duplicates found', filteredDuplicates.length)
if (filteredDuplicates.length > 0) {
console.log(filteredDuplicates)
process.exit(1)
}
}

/**
* Mini-test Suite
*/
console.log(__dirname)
const testDiffFile = path.resolve(__dirname, 'test/test_diff.txt')
let testCounter = 0
function assertEqual(actual, expected) {
if (actual !== expected) {
throw new Error(`Expected ${expected} but got ${actual}`)
}
testCounter += 1
}

async function test() {
test_parseDiffFilePath()
test_parseDiffRange()
test_doesOverlap()
await test_parseDiff()
await test_isCloneInChanges()
await test_isInChanges()
await test_filterDuplicates()
console.log('All tests passed (%s)', testCounter)
}

function test_parseDiffFilePath() {
assertEqual(
parseDiffFilePath(
'diff --git a/.github/workflows/copyPasteDetection.yml b/.github/workflows/copyPasteDetection.yml'
),
'.github/workflows/copyPasteDetection.yml'
)
assertEqual(
parseDiffFilePath('diff --git a/.github/workflows/filterDuplicates.js b/.github/workflows/filterDuplicates.js'),
'.github/workflows/filterDuplicates.js'
)
}

function test_parseDiffRange() {
assertEqual(parseDiffRange('@@ -1,4 +1,4 @@').join(','), '1,5')
assertEqual(parseDiffRange('@@ -10,4 +10,4 @@').join(','), '10,14')
assertEqual(parseDiffRange('@@ -10,4 +10,5 @@').join(','), '10,15')
}

function test_doesOverlap() {
assertEqual(doesOverlap([1, 5], [2, 4]), true)
assertEqual(doesOverlap([2, 3], [2, 4]), true)
assertEqual(doesOverlap([2, 3], [1, 4]), true)
assertEqual(doesOverlap([1, 5], [5, 6]), true)
assertEqual(doesOverlap([1, 5], [6, 7]), false)
assertEqual(doesOverlap([6, 7], [1, 5]), false)
assertEqual(doesOverlap([2, 5], [4, 5]), true)
}

async function test_parseDiff() {
const changes = await parseDiff(testDiffFile)
assertEqual(changes.size, 2)
assertEqual(changes.get('.github/workflows/copyPasteDetection.yml').length, 1)
assertEqual(changes.get('.github/workflows/filterDuplicates.js').length, 1)
assertEqual(changes.get('.github/workflows/filterDuplicates.js')[0].join(','), '1,86')
assertEqual(changes.get('.github/workflows/copyPasteDetection.yml')[0].join(','), '26,73')
}

async function test_isCloneInChanges() {
const changes = await parseDiff(testDiffFile)
assertEqual(
isCloneInChanges(changes, {
name: '.github/workflows/filterDuplicates.js',
start: 1,
end: 86,
}),
true
)
assertEqual(
isCloneInChanges(changes, {
name: '.github/workflows/filterDuplicates.js',
start: 80,
end: 95,
}),
true
)
assertEqual(
isCloneInChanges(changes, {
name: '.github/workflows/filterDuplicates.js',
start: 87,
end: 95,
}),
false
)
assertEqual(
isCloneInChanges(changes, {
name: 'some-fake-file',
start: 1,
end: 100,
}),
false
)
}

async function test_isInChanges() {
const changes = await parseDiff(testDiffFile)
const dupe = {
firstFile: {
name: '.github/workflows/filterDuplicates.js',
start: 1,
end: 86,
},
secondFile: {
name: '.github/workflows/filterDuplicates.js',
start: 80,
end: 95,
},
}
assertEqual(isInChanges(changes, dupe), true)
dupe.secondFile.start = 87
assertEqual(isInChanges(changes, dupe), true)
dupe.firstFile.name = 'some-fake-file'
assertEqual(isInChanges(changes, dupe), false)
}

async function test_filterDuplicates() {
assertEqual(
filterDuplicates(
{
duplicates: [
{
firstFile: {
name: '.github/workflows/filterDuplicates.js',
start: 1,
end: 86,
},
secondFile: {
name: '.github/workflows/filterDuplicates.js',
start: 80,
end: 95,
},
},
],
},
await parseDiff(testDiffFile)
).length,
1
)
assertEqual(
filterDuplicates(
{
duplicates: [
{
firstFile: {
name: 'some-other-file',
start: 1,
end: 86,
},
secondFile: {
name: '.github/workflows/filterDuplicates.js',
start: 90,
end: 95,
},
},
],
},
await parseDiff(testDiffFile)
).length,
0
)
}

async function main() {
const mode = process.argv[2]
if (mode === 'run') {
await run()
} else if (mode === 'test') {
await test()
} else {
throw new Error('Invalid mode')
}
}

void main()
41 changes: 3 additions & 38 deletions .github/workflows/node.js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
env:
CURRENT_BRANCH: ${{ github.head_ref }}
TARGET_BRANCH: ${{ github.event.pull_request.base.ref }}
run: git diff --name-only origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt
run: git diff origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt

- run: npm install -g jscpd

Expand All @@ -100,43 +100,8 @@ jobs:
name: unfiltered-jscpd-report
path: ./jscpd-report.json

- name: Filter jscpd report for changed files
run: |
if [ ! -f ./jscpd-report.json ]; then
echo "jscpd-report.json not found"
exit 1
fi
echo "Filtering jscpd report for changed files..."
CHANGED_FILES=$(jq -R -s -c 'split("\n")[:-1]' diff_output.txt)
echo "Changed files: $CHANGED_FILES"
jq --argjson changed_files "$CHANGED_FILES" '
.duplicates | map(select(
(.firstFile?.name as $fname | $changed_files | any(. == $fname)) or
(.secondFile?.name as $sname | $changed_files | any(. == $sname))
))
' ./jscpd-report.json > filtered-jscpd-report.json
cat filtered-jscpd-report.json
- name: Check for duplicates
run: |
if [ $(wc -l < ./filtered-jscpd-report.json) -gt 1 ]; then
echo "filtered_report_exists=true" >> $GITHUB_ENV
else
echo "filtered_report_exists=false" >> $GITHUB_ENV
fi
- name: upload filtered report (if applicable)
if: env.filtered_report_exists == 'true'
uses: actions/upload-artifact@v4
with:
name: filtered-jscpd-report
path: ./filtered-jscpd-report.json

- name: Fail and log found duplicates.
if: env.filtered_report_exists == 'true'
run: |
cat ./filtered-jscpd-report.json
echo "Duplications found, failing the check."
exit 1
- name: Check for Duplicates
run: node "$GITHUB_WORKSPACE/.github/workflows/filterDuplicates.js" run diff_output.txt jscpd-report.json

macos:
needs: lint-commits
Expand Down
Loading

0 comments on commit e4cb209

Please sign in to comment.