Skip to content

Commit

Permalink
refactor(codewhisperer): refactor UTG code path, add tests #5590
Browse files Browse the repository at this point in the history
## Problem
- complicated regex which is not scalable and hard to understand
- stale and complicated code which lacks readability
- test coverage
  • Loading branch information
Will-ShaoHua authored Sep 24, 2024
1 parent ae5288b commit 1db0f97
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 39 deletions.
104 changes: 103 additions & 1 deletion packages/amazonq/test/unit/codewhisperer/util/codeParsingUtil.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,17 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { extractClasses, extractFunctions, isTestFile, utgLanguageConfigs } from 'aws-core-vscode/codewhisperer'
import * as vscode from 'vscode'
import {
PlatformLanguageId,
extractClasses,
extractFunctions,
isTestFile,
utgLanguageConfigs,
} from 'aws-core-vscode/codewhisperer'
import * as path from 'path'
import assert from 'assert'
import { createTestWorkspaceFolder, toFile } from 'aws-core-vscode/test'

describe('RegexValidationForPython', () => {
it('should extract all function names from a python file content', () => {
Expand Down Expand Up @@ -57,6 +66,99 @@ describe('RegexValidationForJava', () => {
})

describe('isTestFile', () => {
let testWsFolder: string
beforeEach(async function () {
testWsFolder = (await createTestWorkspaceFolder()).uri.fsPath
})

it('validate by file path', async function () {
const langs = new Map<string, string>([
['java', '.java'],
['python', '.py'],
['typescript', '.ts'],
['javascript', '.js'],
['typescriptreact', '.tsx'],
['javascriptreact', '.jsx'],
])
const testFilePathsWithoutExt = [
'/test/MyClass',
'/test/my_class',
'/tst/MyClass',
'/tst/my_class',
'/tests/MyClass',
'/tests/my_class',
]

const srcFilePathsWithoutExt = [
'/src/MyClass',
'MyClass',
'foo/bar/MyClass',
'foo/my_class',
'my_class',
'anyFolderOtherThanTest/foo/myClass',
]

for (const [languageId, ext] of langs) {
const testFilePaths = testFilePathsWithoutExt.map((it) => it + ext)
for (const testFilePath of testFilePaths) {
const actual = await isTestFile(testFilePath, { languageId: languageId })
assert.strictEqual(actual, true)
}

const srcFilePaths = srcFilePathsWithoutExt.map((it) => it + ext)
for (const srcFilePath of srcFilePaths) {
const actual = await isTestFile(srcFilePath, { languageId: languageId })
assert.strictEqual(actual, false)
}
}
})

async function assertIsTestFile(
fileNames: string[],
config: { languageId: PlatformLanguageId },
expected: boolean
) {
for (const fileName of fileNames) {
const p = path.join(testWsFolder, fileName)
await toFile('', p)
const document = await vscode.workspace.openTextDocument(p)
const actual = await isTestFile(document.uri.fsPath, { languageId: config.languageId })
assert.strictEqual(actual, expected)
}
}

it('validate by file name', async function () {
const camelCaseSrc = ['Foo.java', 'Bar.java', 'Baz.java']
await assertIsTestFile(camelCaseSrc, { languageId: 'java' }, false)

const camelCaseTst = ['FooTest.java', 'BarTests.java']
await assertIsTestFile(camelCaseTst, { languageId: 'java' }, true)

const snakeCaseSrc = ['foo.py', 'bar.py']
await assertIsTestFile(snakeCaseSrc, { languageId: 'python' }, false)

const snakeCaseTst = ['test_foo.py', 'bar_test.py']
await assertIsTestFile(snakeCaseTst, { languageId: 'python' }, true)

const javascriptSrc = ['Foo.js', 'bar.js']
await assertIsTestFile(javascriptSrc, { languageId: 'javascript' }, false)

const javascriptTst = ['Foo.test.js', 'Bar.spec.js']
await assertIsTestFile(javascriptTst, { languageId: 'javascript' }, true)

const typescriptSrc = ['Foo.ts', 'bar.ts']
await assertIsTestFile(typescriptSrc, { languageId: 'typescript' }, false)

const typescriptTst = ['Foo.test.ts', 'Bar.spec.ts']
await assertIsTestFile(typescriptTst, { languageId: 'typescript' }, true)

const jsxSrc = ['Foo.jsx', 'Bar.jsx']
await assertIsTestFile(jsxSrc, { languageId: 'javascriptreact' }, false)

const jsxTst = ['Foo.test.jsx', 'Bar.spec.jsx']
await assertIsTestFile(jsxTst, { languageId: 'javascriptreact' }, true)
})

it('should return true if the file name matches the test filename pattern - Java', async () => {
const filePaths = ['/path/to/MyClassTest.java', '/path/to/TestMyClass.java', '/path/to/MyClassTests.java']
const language = 'java'
Expand Down
28 changes: 28 additions & 0 deletions packages/amazonq/test/unit/codewhisperer/util/utgUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,31 @@ describe('shouldFetchUtgContext', () => {
assert.strictEqual(utgUtils.shouldFetchUtgContext('c', UserGroup.CrossFile), undefined)
})
})

describe('guessSrcFileName', function () {
it('should return undefined if no matching regex', function () {
assert.strictEqual(utgUtils.guessSrcFileName('Foo.java', 'java'), undefined)
assert.strictEqual(utgUtils.guessSrcFileName('folder1/foo.py', 'python'), undefined)
assert.strictEqual(utgUtils.guessSrcFileName('Bar.js', 'javascript'), undefined)
})

it('java', function () {
assert.strictEqual(utgUtils.guessSrcFileName('FooTest.java', 'java'), 'Foo.java')
assert.strictEqual(utgUtils.guessSrcFileName('FooTests.java', 'java'), 'Foo.java')
})

it('python', function () {
assert.strictEqual(utgUtils.guessSrcFileName('test_foo.py', 'python'), 'foo.py')
assert.strictEqual(utgUtils.guessSrcFileName('foo_test.py', 'python'), 'foo.py')
})

it('typescript', function () {
assert.strictEqual(utgUtils.guessSrcFileName('Foo.test.ts', 'typescript'), 'Foo.ts')
assert.strictEqual(utgUtils.guessSrcFileName('Foo.spec.ts', 'typescript'), 'Foo.ts')
})

it('javascript', function () {
assert.strictEqual(utgUtils.guessSrcFileName('Foo.test.js', 'javascript'), 'Foo.js')
assert.strictEqual(utgUtils.guessSrcFileName('Foo.spec.js', 'javascript'), 'Foo.js')
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,54 @@ import * as vscode from 'vscode'
import path = require('path')
import { normalize } from '../../../shared/utilities/pathUtils'

// TODO: functionExtractionPattern, classExtractionPattern, imposrtStatementRegex are not scalable and we will deprecate and remove the usage in the near future
export interface utgLanguageConfig {
extension: string
testFilenamePattern: RegExp
functionExtractionPattern: RegExp
classExtractionPattern: RegExp
importStatementRegExp: RegExp
testFilenamePattern: RegExp[]
functionExtractionPattern?: RegExp
classExtractionPattern?: RegExp
importStatementRegExp?: RegExp
}

export const utgLanguageConfigs: Record<string, utgLanguageConfig> = {
// Java regexes are not working efficiently for class or function extraction
java: {
extension: '.java',
testFilenamePattern: /(?:Test([^/\\]+)\.java|([^/\\]+)Test\.java|([^/\\]+)Tests\.java)$/,
testFilenamePattern: [/^(.+)Test(\.java)$/, /(.+)Tests(\.java)$/, /Test(.+)(\.java)$/],
functionExtractionPattern:
/(?:(?:public|private|protected)\s+)(?:static\s+)?(?:[\w<>]+\s+)?(\w+)\s*\([^)]*\)\s*(?:(?:throws\s+\w+)?\s*)[{;]/gm, // TODO: Doesn't work for generice <T> T functions.
classExtractionPattern: /(?<=^|\n)\s*public\s+class\s+(\w+)/gm, // TODO: Verify these.
importStatementRegExp: /import .*\.([a-zA-Z0-9]+);/,
},
python: {
extension: '.py',
testFilenamePattern: /(?:test_([^/\\]+)\.py|([^/\\]+)_test\.py)$/,
testFilenamePattern: [/^test_(.+)(\.py)$/, /^(.+)_test(\.py)$/],
functionExtractionPattern: /def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/g, // Worked fine
classExtractionPattern: /^class\s+(\w+)\s*:/gm,
importStatementRegExp: /from (.*) import.*/,
},
typescript: {
extension: '.ts',
testFilenamePattern: [/^(.+)\.test(\.ts|\.tsx)$/, /^(.+)\.spec(\.ts|\.tsx)$/],
},
javascript: {
extension: '.js',
testFilenamePattern: [/^(.+)\.test(\.js|\.jsx)$/, /^(.+)\.spec(\.js|\.jsx)$/],
},
typescriptreact: {
extension: '.tsx',
testFilenamePattern: [/^(.+)\.test(\.ts|\.tsx)$/, /^(.+)\.spec(\.ts|\.tsx)$/],
},
javascriptreact: {
extension: '.jsx',
testFilenamePattern: [/^(.+)\.test(\.js|\.jsx)$/, /^(.+)\.spec(\.js|\.jsx)$/],
},
}

export function extractFunctions(fileContent: string, regex: RegExp) {
export function extractFunctions(fileContent: string, regex?: RegExp) {
if (!regex) {
return []
}
const functionNames: string[] = []
let match: RegExpExecArray | null

Expand All @@ -44,7 +64,10 @@ export function extractFunctions(fileContent: string, regex: RegExp) {
return functionNames
}

export function extractClasses(fileContent: string, regex: RegExp) {
export function extractClasses(fileContent: string, regex?: RegExp) {
if (!regex) {
return []
}
const classNames: string[] = []
let match: RegExpExecArray | null

Expand Down Expand Up @@ -97,6 +120,11 @@ function isTestFileByName(filePath: string, language: vscode.TextDocument['langu
const testFilenamePattern = languageConfig.testFilenamePattern

const filename = path.basename(filePath)
for (const pattern of testFilenamePattern) {
if (pattern.test(filename)) {
return true
}
}

return testFilenamePattern.test(filename)
return false
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ import { getOpenFilesInWindow } from '../../../shared/utilities/editorUtilities'
import { getLogger } from '../../../shared/logger/logger'
import { CodeWhispererSupplementalContext, CodeWhispererSupplementalContextItem, UtgStrategy } from '../../models/model'

type UtgSupportedLanguage = keyof typeof utgLanguageConfigs
const utgSupportedLanguages: vscode.TextDocument['languageId'][] = ['java', 'python']

type UtgSupportedLanguage = (typeof utgSupportedLanguages)[number]

function isUtgSupportedLanguage(languageId: vscode.TextDocument['languageId']): languageId is UtgSupportedLanguage {
return languageId in utgLanguageConfigs
return utgSupportedLanguages.includes(languageId)
}

export function shouldFetchUtgContext(
Expand Down Expand Up @@ -184,41 +186,45 @@ async function getRelevantUtgFiles(editor: vscode.TextEditor): Promise<string[]>
})
}

export function guessSrcFileName(
testFileName: string,
languageId: vscode.TextDocument['languageId']
): string | undefined {
const languageConfig = utgLanguageConfigs[languageId]
if (!languageConfig) {
return undefined
}

for (const pattern of languageConfig.testFilenamePattern) {
try {
const match = testFileName.match(pattern)
if (match) {
return match[1] + match[2]
}
} catch (err) {
if (err instanceof Error) {
getLogger().error(
`codewhisperer: error while guessing source file name from file ${testFileName} and pattern ${pattern}: ${err.message}`
)
}
}
}

return undefined
}

async function findSourceFileByName(
editor: vscode.TextEditor,
languageConfig: utgLanguageConfig,
cancellationToken: vscode.CancellationToken
): Promise<string | undefined> {
const testFileName = path.basename(editor.document.fileName)

let basenameSuffix = testFileName
const match = testFileName.match(languageConfig.testFilenamePattern)
if (match) {
basenameSuffix = match[1] || match[2]
}

throwIfCancelled(cancellationToken)

// Assuming the convention of using similar path structure for test and src files.
const dirPath = path.dirname(editor.document.uri.fsPath)
let newPath = ''
const lastIndexTest = dirPath.lastIndexOf('/test/')
const lastIndexTst = dirPath.lastIndexOf('/tst/')
// This is a faster way on the assumption that source file and test file will follow similar path structure.
if (lastIndexTest > 0) {
newPath = dirPath.substring(0, lastIndexTest) + '/src/' + dirPath.substring(lastIndexTest + 5)
} else if (lastIndexTst > 0) {
newPath = dirPath.substring(0, lastIndexTst) + '/src/' + dirPath.substring(lastIndexTst + 4)
}
newPath = path.join(newPath, basenameSuffix + languageConfig.extension)
// TODO: Add metrics here, as we are not able to find the source file by name.
if (await fs.exists(newPath)) {
return newPath
const assumedSrcFileName = guessSrcFileName(testFileName, editor.document.languageId)
if (!assumedSrcFileName) {
return undefined
}

throwIfCancelled(cancellationToken)

const sourceFiles = await vscode.workspace.findFiles(`**/${basenameSuffix}${languageConfig.extension}`)
const sourceFiles = await vscode.workspace.findFiles(`**/${assumedSrcFileName}`)

throwIfCancelled(cancellationToken)

Expand Down

0 comments on commit 1db0f97

Please sign in to comment.