diff --git a/.github/workflows/filterDuplicates.js b/.github/workflows/filterDuplicates.js new file mode 100644 index 00000000000..0284ea20654 --- /dev/null +++ b/.github/workflows/filterDuplicates.js @@ -0,0 +1,279 @@ +/** + * Filters the report produced by jscpd to only include clones that involve changes from the given git diff. + * If the filtered report is non-empty, i.e. there exists a clone in the changes, + * the program exits with an error and logs the filtered report to console. + * + * Usage: + * node filterDuplicates.js run [path_to_git_diff] [path_to_jscpd_report] + * + * Tests: + * node filterDuplicates.js test + */ + +const fs = require('fs/promises') +const path = require('path') + +function parseDiffFilePath(filePathLine) { + return filePathLine.split(' ')[2].split('/').slice(1).join('/') +} + +function parseDiffRange(rangeLine) { + const [_fromRange, toRange] = rangeLine.split(' ').slice(1, 3) + const [startLine, numLines] = toRange.slice(1).split(',').map(Number) + const range = [startLine, startLine + numLines] + return range +} + +async function parseDiff(diffPath) { + const diff = await fs.readFile(diffPath, 'utf8') + const lines = diff.split('\n') + let currentFile = null + let currentFileChanges = [] + const fileChanges = new Map() + + for (const line of lines) { + if (line.startsWith('diff')) { + if (currentFile) { + fileChanges.set(currentFile, currentFileChanges) + } + currentFile = parseDiffFilePath(line) + currentFileChanges = [] + } + if (line.startsWith('@@')) { + currentFileChanges.push(parseDiffRange(line)) + } + } + + fileChanges.set(currentFile, currentFileChanges) + + return fileChanges +} + +function doesOverlap(range1, range2) { + const [start1, end1] = range1 + const [start2, end2] = range2 + return ( + (start1 >= start2 && start1 <= end2) || (end1 >= start2 && end1 <= end2) || (start2 >= start1 && end2 <= end1) + ) +} + +function isCloneInChanges(changes, cloneInstance) { + const fileName = cloneInstance.name + const cloneStart = cloneInstance.start + const cloneEnd = cloneInstance.end + const lineChangeRanges = changes.get(fileName) + + if (!lineChangeRanges) { + return false + } + + return lineChangeRanges.some((range) => doesOverlap([cloneStart, cloneEnd], range)) +} + +function isInChanges(changes, dupe) { + return isCloneInChanges(changes, dupe.firstFile) || isCloneInChanges(changes, dupe.secondFile) +} + +function filterDuplicates(report, changes) { + duplicates = [] + for (const dupe of report.duplicates) { + if (isInChanges(changes, dupe)) { + duplicates.push(dupe) + } + } + return duplicates +} + +async function run() { + const rawDiffPath = process.argv[3] + const jscpdReportPath = process.argv[4] + const changes = await parseDiff(rawDiffPath) + const jscpdReport = JSON.parse(await fs.readFile(jscpdReportPath, 'utf8')) + const filteredDuplicates = filterDuplicates(jscpdReport, changes) + + console.log('%s files changes', changes.size) + console.log('%s duplicates found', filteredDuplicates.length) + if (filteredDuplicates.length > 0) { + console.log(filteredDuplicates) + process.exit(1) + } +} + +/** + * Mini-test Suite + */ +console.log(__dirname) +const testDiffFile = path.resolve(__dirname, 'test/test_diff.txt') +let testCounter = 0 +function assertEqual(actual, expected) { + if (actual !== expected) { + throw new Error(`Expected ${expected} but got ${actual}`) + } + testCounter += 1 +} + +async function test() { + test_parseDiffFilePath() + test_parseDiffRange() + test_doesOverlap() + await test_parseDiff() + await test_isCloneInChanges() + await test_isInChanges() + await test_filterDuplicates() + console.log('All tests passed (%s)', testCounter) +} + +function test_parseDiffFilePath() { + assertEqual( + parseDiffFilePath( + 'diff --git a/.github/workflows/copyPasteDetection.yml b/.github/workflows/copyPasteDetection.yml' + ), + '.github/workflows/copyPasteDetection.yml' + ) + assertEqual( + parseDiffFilePath('diff --git a/.github/workflows/filterDuplicates.js b/.github/workflows/filterDuplicates.js'), + '.github/workflows/filterDuplicates.js' + ) +} + +function test_parseDiffRange() { + assertEqual(parseDiffRange('@@ -1,4 +1,4 @@').join(','), '1,5') + assertEqual(parseDiffRange('@@ -10,4 +10,4 @@').join(','), '10,14') + assertEqual(parseDiffRange('@@ -10,4 +10,5 @@').join(','), '10,15') +} + +function test_doesOverlap() { + assertEqual(doesOverlap([1, 5], [2, 4]), true) + assertEqual(doesOverlap([2, 3], [2, 4]), true) + assertEqual(doesOverlap([2, 3], [1, 4]), true) + assertEqual(doesOverlap([1, 5], [5, 6]), true) + assertEqual(doesOverlap([1, 5], [6, 7]), false) + assertEqual(doesOverlap([6, 7], [1, 5]), false) + assertEqual(doesOverlap([2, 5], [4, 5]), true) +} + +async function test_parseDiff() { + const changes = await parseDiff(testDiffFile) + assertEqual(changes.size, 2) + assertEqual(changes.get('.github/workflows/copyPasteDetection.yml').length, 1) + assertEqual(changes.get('.github/workflows/filterDuplicates.js').length, 1) + assertEqual(changes.get('.github/workflows/filterDuplicates.js')[0].join(','), '1,86') + assertEqual(changes.get('.github/workflows/copyPasteDetection.yml')[0].join(','), '26,73') +} + +async function test_isCloneInChanges() { + const changes = await parseDiff(testDiffFile) + assertEqual( + isCloneInChanges(changes, { + name: '.github/workflows/filterDuplicates.js', + start: 1, + end: 86, + }), + true + ) + assertEqual( + isCloneInChanges(changes, { + name: '.github/workflows/filterDuplicates.js', + start: 80, + end: 95, + }), + true + ) + assertEqual( + isCloneInChanges(changes, { + name: '.github/workflows/filterDuplicates.js', + start: 87, + end: 95, + }), + false + ) + assertEqual( + isCloneInChanges(changes, { + name: 'some-fake-file', + start: 1, + end: 100, + }), + false + ) +} + +async function test_isInChanges() { + const changes = await parseDiff(testDiffFile) + const dupe = { + firstFile: { + name: '.github/workflows/filterDuplicates.js', + start: 1, + end: 86, + }, + secondFile: { + name: '.github/workflows/filterDuplicates.js', + start: 80, + end: 95, + }, + } + assertEqual(isInChanges(changes, dupe), true) + dupe.secondFile.start = 87 + assertEqual(isInChanges(changes, dupe), true) + dupe.firstFile.name = 'some-fake-file' + assertEqual(isInChanges(changes, dupe), false) +} + +async function test_filterDuplicates() { + assertEqual( + filterDuplicates( + { + duplicates: [ + { + firstFile: { + name: '.github/workflows/filterDuplicates.js', + start: 1, + end: 86, + }, + secondFile: { + name: '.github/workflows/filterDuplicates.js', + start: 80, + end: 95, + }, + }, + ], + }, + await parseDiff(testDiffFile) + ).length, + 1 + ) + assertEqual( + filterDuplicates( + { + duplicates: [ + { + firstFile: { + name: 'some-other-file', + start: 1, + end: 86, + }, + secondFile: { + name: '.github/workflows/filterDuplicates.js', + start: 90, + end: 95, + }, + }, + ], + }, + await parseDiff(testDiffFile) + ).length, + 0 + ) +} + +async function main() { + const mode = process.argv[2] + if (mode === 'run') { + await run() + } else if (mode === 'test') { + await test() + } else { + throw new Error('Invalid mode') + } +} + +void main() diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 1c5299c8a2e..04a289eded9 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -88,7 +88,7 @@ jobs: env: CURRENT_BRANCH: ${{ github.head_ref }} TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - run: git diff --name-only origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt + run: git diff origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt - run: npm install -g jscpd @@ -100,43 +100,8 @@ jobs: name: unfiltered-jscpd-report path: ./jscpd-report.json - - name: Filter jscpd report for changed files - run: | - if [ ! -f ./jscpd-report.json ]; then - echo "jscpd-report.json not found" - exit 1 - fi - echo "Filtering jscpd report for changed files..." - CHANGED_FILES=$(jq -R -s -c 'split("\n")[:-1]' diff_output.txt) - echo "Changed files: $CHANGED_FILES" - jq --argjson changed_files "$CHANGED_FILES" ' - .duplicates | map(select( - (.firstFile?.name as $fname | $changed_files | any(. == $fname)) or - (.secondFile?.name as $sname | $changed_files | any(. == $sname)) - )) - ' ./jscpd-report.json > filtered-jscpd-report.json - cat filtered-jscpd-report.json - - - name: Check for duplicates - run: | - if [ $(wc -l < ./filtered-jscpd-report.json) -gt 1 ]; then - echo "filtered_report_exists=true" >> $GITHUB_ENV - else - echo "filtered_report_exists=false" >> $GITHUB_ENV - fi - - name: upload filtered report (if applicable) - if: env.filtered_report_exists == 'true' - uses: actions/upload-artifact@v4 - with: - name: filtered-jscpd-report - path: ./filtered-jscpd-report.json - - - name: Fail and log found duplicates. - if: env.filtered_report_exists == 'true' - run: | - cat ./filtered-jscpd-report.json - echo "Duplications found, failing the check." - exit 1 + - name: Check for Duplicates + run: node "$GITHUB_WORKSPACE/.github/workflows/filterDuplicates.js" run diff_output.txt jscpd-report.json macos: needs: lint-commits diff --git a/.github/workflows/test/test_diff.txt b/.github/workflows/test/test_diff.txt new file mode 100644 index 00000000000..9614e902a5e --- /dev/null +++ b/.github/workflows/test/test_diff.txt @@ -0,0 +1,185 @@ +diff --git a/.github/workflows/copyPasteDetection.yml b/.github/workflows/copyPasteDetection.yml +index 793337de5..746b3cecd 100644 +--- a/.github/workflows/copyPasteDetection.yml ++++ b/.github/workflows/copyPasteDetection.yml +@@ -26,61 +26,47 @@ jobs: + with: + node-version: ${{ matrix.node-version }} + ++ - name: Determine if local ++ run: echo "IS_LOCAL=false" >> $GITHUB_ENV ++ + - name: Fetch fork upstream ++ if: ${{ env.IS_LOCAL == 'false' }} + run: | + git remote add forkUpstream https://github.com/${{ github.event.pull_request.head.repo.full_name }} # URL of the fork + git fetch forkUpstream # Fetch fork + + - name: Determine base and target branches for comparison. + run: | +- echo "CURRENT_BRANCH=${{ github.head_ref }}" >> $GITHUB_ENV +- echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV +- - run: git diff --name-only origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt +- - run: | +- npm install -g jscpd ++ if [[ $IS_LOCAL == 'false' ]]; then ++ echo "CURRENT_BRANCH=${{ github.head_ref }}" >> $GITHUB_ENV ++ echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV ++ else ++ echo "CURRENT_BRANCH=${{ github.ref_name }}" >> $GITHUB_ENV ++ echo "TARGET_BRANCH=master" >> $GITHUB_ENV ++ fi ++ ++ - name: Print base and target branches for comparison. ++ run: | ++ echo "CURRENT_BRANCH=$CURRENT_BRANCH" ++ echo "TARGET_BRANCH=$TARGET_BRANCH" ++ ++ - name: Compare target and current branches. ++ run: | ++ if [[ $IS_LOCAL == 'false' ]]; then ++ git diff origin/$TARGET_BRANCH forkUpstream/$CURRENT_BRANCH > diff_output.txt ++ else ++ git diff origin/$TARGET_BRANCH $CURRENT_BRANCH > diff_output.txt ++ fi ++ ++ - run: npm install -g jscpd + + - run: jscpd --config "$GITHUB_WORKSPACE/.github/workflows/jscpd.json" + +- - if: always() ++ - if: ${{ env.IS_LOCAL == 'false' }} + uses: actions/upload-artifact@v4 + with: + name: unfiltered-jscpd-report + path: ./jscpd-report.json + +- - name: Filter jscpd report for changed files +- run: | +- if [ ! -f ./jscpd-report.json ]; then +- echo "jscpd-report.json not found" +- exit 1 +- fi +- echo "Filtering jscpd report for changed files..." +- CHANGED_FILES=$(jq -R -s -c 'split("\n")[:-1]' diff_output.txt) +- echo "Changed files: $CHANGED_FILES" +- jq --argjson changed_files "$CHANGED_FILES" ' +- .duplicates | map(select( +- (.firstFile?.name as $fname | $changed_files | any(. == $fname)) or +- (.secondFile?.name as $sname | $changed_files | any(. == $sname)) +- )) +- ' ./jscpd-report.json > filtered-jscpd-report.json +- cat filtered-jscpd-report.json +- + - name: Check for duplicates +- run: | +- if [ $(wc -l < ./filtered-jscpd-report.json) -gt 1 ]; then +- echo "filtered_report_exists=true" >> $GITHUB_ENV +- else +- echo "filtered_report_exists=false" >> $GITHUB_ENV +- fi +- - name: upload filtered report (if applicable) +- if: env.filtered_report_exists == 'true' +- uses: actions/upload-artifact@v4 +- with: +- name: filtered-jscpd-report +- path: ./filtered-jscpd-report.json +- +- - name: Fail and log found duplicates. +- if: env.filtered_report_exists == 'true' +- run: | +- cat ./filtered-jscpd-report.json +- echo "Duplications found, failing the check." +- exit 1 ++ run: node "$GITHUB_WORKSPACE/.github/workflows/filterDuplicates.js" diff_output.txt jscpd-report.json +diff --git a/.github/workflows/filterDuplicates.js b/.github/workflows/filterDuplicates.js +new file mode 100644 +index 000000000..b2f1e913e +--- /dev/null ++++ b/.github/workflows/filterDuplicates.js +@@ -0,0 +1,85 @@ ++const fs = require('fs/promises') ++ ++function parseDiffFilePath(filePathLine) { ++ return filePathLine.split(' ')[2].split('/').slice(1).join('/') ++} ++ ++function parseDiffRange(rangeLine) { ++ const [_fromRange, toRange] = rangeLine.split(' ').slice(1, 3) ++ const [startLine, numLines] = toRange.slice(1).split(',').map(Number) ++ const range = [startLine, startLine + numLines] ++ return range ++} ++ ++async function parseDiff(diffPath) { ++ const diff = await fs.readFile(diffPath, 'utf8') ++ const lines = diff.split('\n') ++ let currentFile = null ++ let currentFileChanges = [] ++ const fileChanges = new Map() ++ ++ for (const line of lines) { ++ if (line.startsWith('diff')) { ++ if (currentFile) { ++ fileChanges.set(currentFile, currentFileChanges) ++ } ++ currentFile = parseDiffFilePath(line) ++ currentFileChanges = [] ++ } ++ if (line.startsWith('@@')) { ++ currentFileChanges.push(parseDiffRange(line)) ++ } ++ } ++ ++ return fileChanges ++} ++ ++function doesOverlap(range1, range2) { ++ const [start1, end1] = range1 ++ const [start2, end2] = range2 ++ return (start1 >= start2 && start1 <= end2) || (end1 >= start2 && end1 <= end2) ++} ++ ++function isCloneInChanges(changes, cloneInstance) { ++ const fileName = cloneInstance.name ++ const cloneStart = cloneInstance.start ++ const cloneEnd = cloneInstance.end ++ const lineChangeRanges = changes.get(fileName) ++ ++ if (!lineChangeRanges) { ++ return false ++ } ++ ++ return lineChangeRanges.some((range) => doesOverlap([cloneStart, cloneEnd], range)) ++} ++ ++function isInChanges(changes, dupe) { ++ return isCloneInChanges(changes, dupe.firstFile) || isCloneInChanges(changes, dupe.secondFile) ++} ++ ++function filterDuplicates(report, changes) { ++ duplicates = [] ++ for (const dupe of report.duplicates) { ++ if (isInChanges(changes, dupe)) { ++ duplicates.push(dupe) ++ } ++ } ++ return duplicates ++} ++ ++async function main() { ++ const rawDiffPath = process.argv[2] ++ const jscpdReportPath = process.argv[3] ++ const changes = await parseDiff(rawDiffPath) ++ const jscpdReport = JSON.parse(await fs.readFile(jscpdReportPath, 'utf8')) ++ const filteredDuplicates = filterDuplicates(jscpdReport, changes) ++ ++ console.log(filteredDuplicates) ++ console.log('%s files changes', changes.size) ++ console.log('%s duplicates found', filteredDuplicates.length) ++ if (filteredDuplicates.length > 0) { ++ process.exit(1) ++ } ++} ++ ++void main()