Skip to content

Commit

Permalink
improve baseline matching by using a diff algorithm and comparing the…
Browse files Browse the repository at this point in the history
… line count
  • Loading branch information
DetachHead committed Sep 29, 2024
1 parent 754a147 commit 1c0e960
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 82 deletions.
29 changes: 29 additions & 0 deletions packages/pyright-internal/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions packages/pyright-internal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"chalk": "^4.1.2",
"chokidar": "^3.6.0",
"command-line-args": "^5.2.1",
"diff": "^7.0.0",
"jsonc-parser": "^3.2.1",
"leven": "3.1.0",
"pyright-to-gitlab-ci": "^0.1.3",
Expand All @@ -40,6 +41,7 @@
},
"devDependencies": {
"@types/command-line-args": "^5.2.3",
"@types/diff": "^5.2.2",
"@types/fs-extra": "^11.0.4",
"@types/jest": "^29.5.12",
"@types/lodash": "^4.14.202",
Expand Down
4 changes: 2 additions & 2 deletions packages/pyright-internal/src/analyzer/sourceFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import { SourceMapper } from './sourceMapper';
import { SymbolTable } from './symbol';
import { TestWalker } from './testWalker';
import { TypeEvaluator } from './typeEvaluatorTypes';
import { filterOutBaselinedDiagnostics } from '../baseline';
import { sortDiagnosticsAndMatchBaseline } from '../baseline';

// Limit the number of import cycles tracked per source file.
const _maxImportCyclesPerFile = 4;
Expand Down Expand Up @@ -1247,7 +1247,7 @@ export class SourceFile {
// Now add in the "unnecessary type ignore" diagnostics.
diagList = diagList.concat(unnecessaryTypeIgnoreDiags);

filterOutBaselinedDiagnostics(this.fileSystem, configOptions.projectRoot, this._uri, diagList);
diagList = sortDiagnosticsAndMatchBaseline(this.fileSystem, configOptions.projectRoot, this._uri, diagList);

// If we're not returning any diagnostics, filter out all of
// the errors and warnings, leaving only the unreachable code
Expand Down
23 changes: 1 addition & 22 deletions packages/pyright-internal/src/backgroundAnalysisBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -789,28 +789,7 @@ function convertAnalysisResults(result: AnalysisResults): AnalysisResults {
}

function convertDiagnostics(diagnostics: Diagnostic[]) {
// Elements are typed as "any" since data crossing the process
// boundary loses type info.
return diagnostics.map<Diagnostic>((d: any) => {
const diag = new Diagnostic(d.category, d.message, d.range, d.priority, d.baselineStatus);
if (d._actions) {
for (const action of d._actions) {
diag.addAction(action);
}
}

if (d._rule) {
diag.setRule(d._rule);
}

if (d._relatedInfo) {
for (const info of d._relatedInfo) {
diag.addRelatedInfo(info.message, info.uri, info.range);
}
}

return diag;
});
return diagnostics.map((d) => d.copy());
}

export type BackgroundRequestKind =
Expand Down
114 changes: 66 additions & 48 deletions packages/pyright-internal/src/baseline.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
import { DiagnosticRule } from './common/diagnosticRules';
import { FileDiagnostics } from './common/diagnosticSink';
import { Uri } from './common/uri/uri';
import { convertLevelToCategory, Diagnostic, DiagnosticCategory } from './common/diagnostic';
import { compareDiagnostics, convertLevelToCategory, Diagnostic, DiagnosticCategory } from './common/diagnostic';
import { extraOptionDiagnosticRules } from './common/configOptions';
import { fileExists } from './common/uri/uriUtils';
import { FileSystem, ReadOnlyFileSystem } from './common/fileSystem';
import { pluralize } from './common/stringUtils';
import { diffArrays } from 'diff';
import { assert } from './common/debug';
import { Range } from './common/textRange';

export interface BaselinedDiagnostic {
code: DiagnosticRule | undefined;
range: Range;
range: {
startColumn: number;
endColumn: number;
/**
* only in baseline files generated with version 1.18.1 or above. we don't store line numbers
* to reduce the diff when the baseline is regenerated and to prevent baselined errors from
* incorredtly resurfacing when lines of code are added or removed.
*/
lineCount?: number;
};
}

interface BaselineFile {
Expand All @@ -19,6 +30,8 @@ interface BaselineFile {
};
}

const lineCount = (range: Range) => range.end.line - range.start.line + 1;

export const baselineFilePath = (rootDir: Uri) => rootDir.combinePaths('.basedpyright/baseline.json');

const diagnosticsToBaseline = (rootDir: Uri, filesWithDiagnostics: readonly FileDiagnostics[]): BaselineFile => {
Expand All @@ -44,7 +57,11 @@ const diagnosticsToBaseline = (rootDir: Uri, filesWithDiagnostics: readonly File
baselineData.files[filePath].push(
...errorDiagnostics.map((diagnostic) => ({
code: diagnostic.getRule() as DiagnosticRule | undefined,
range: diagnostic.range,
range: {
startColumn: diagnostic.range.start.character,
endColumn: diagnostic.range.end.character,
lineCount: lineCount(diagnostic.range),
},
}))
);
}
Expand Down Expand Up @@ -131,60 +148,61 @@ export const getBaselinedErrorsForFile = (fs: ReadOnlyFileSystem, rootDir: Uri,
return result ?? [];
};

export const filterOutBaselinedDiagnostics = (
export const sortDiagnosticsAndMatchBaseline = (
fs: ReadOnlyFileSystem,
rootDir: Uri,
file: Uri,
diagnostics: Diagnostic[]
) => {
let baselinedErrorsForFile = getBaselinedErrorsForFile(fs, rootDir, file);
for (const index in diagnostics) {
const diagnostic = diagnostics[index];
const diagnosticRule = diagnostic.getRule() as DiagnosticRule | undefined;
const matchedError = baselinedErrorsForFile
// first find all matching diagnostics with the same diagnostic code, column numbers
// and number of lines
.filter(
(baselinedError) =>
baselinedError.code === diagnosticRule &&
baselinedError.range.start.character === diagnostic.range.start.character &&
baselinedError.range.end.character === diagnostic.range.end.character &&
baselinedError.range.end.line - baselinedError.range.start.line ===
diagnostic.range.end.line - diagnostic.range.start.line
)
// then find the one with the closest line number
.sort((prev, next) =>
Math.abs(
diagnostic.range.start.line -
prev.range.start.line -
(diagnostic.range.start.line - next.range.start.line)
)
)[0];
if (matchedError) {
baselinedErrorsForFile = baselinedErrorsForFile.filter((baselinedError) => baselinedError !== matchedError);
// if the baselined error can be reported as a hint (eg. unreachable/deprecated), keep it and change its diagnostic level to that instead
): Diagnostic[] => {
diagnostics.sort(compareDiagnostics);
const diff = diffArrays(getBaselinedErrorsForFile(fs, rootDir, file), diagnostics, {
comparator: (baselinedDiagnostic, diagnostic) =>
baselinedDiagnostic.code === diagnostic.getRule() &&
baselinedDiagnostic.range.startColumn === diagnostic.range.start.character &&
baselinedDiagnostic.range.endColumn === diagnostic.range.end.character &&
//for backwards compatibility with old baseline files, only check this if it's present
(baselinedDiagnostic.range.lineCount === undefined ||
baselinedDiagnostic.range.lineCount === lineCount(diagnostic.range)),
});
const result = [];
for (const change of diff) {
if (change.removed) {
continue;
}
if (change.added) {
assert(change.value[0] instanceof Diagnostic, "change object wasn't a Diagnostic");
result.push(...(change.value as Diagnostic[]));
} else {
// if not added and not removed
// if the baselined error can be reported as a hint (eg. unreachable/deprecated), keep it and change its diagnostic
// level to that instead
// TODO: should we only baseline errors and not warnings/notes?
if (diagnosticRule) {
for (const { name, get } of extraOptionDiagnosticRules) {
if (get().includes(diagnosticRule)) {
const newDiagnostic = new Diagnostic(
convertLevelToCategory(name),
diagnostic.message,
diagnostic.range,
diagnostic.priority,
'baselined with hint'
);
const rule = diagnostic.getRule();
if (rule) {
newDiagnostic.setRule(rule);
for (const diagnostic of change.value) {
assert(
diagnostic instanceof Diagnostic,
'diff thingy returned the old value instead of the new one???'
);
let newDiagnostic;
const diagnosticRule = diagnostic.getRule() as DiagnosticRule | undefined;
if (diagnosticRule) {
for (const { name, get } of extraOptionDiagnosticRules) {
if (get().includes(diagnosticRule)) {
newDiagnostic = diagnostic.copy({
category: convertLevelToCategory(name),
baselineStatus: 'baselined with hint',
});
newDiagnostic.setRule(diagnosticRule);
// none of these rules should have multiple extra diagnostic levels so we break after the first match
break;
}
diagnostics[index] = newDiagnostic;
// none of these rules should have multiple extra diagnostic levels so we break after the first match
break;
}
}
if (!newDiagnostic) {
newDiagnostic = diagnostic.copy({ baselineStatus: 'baselined' });
}
result.push(newDiagnostic);
}
diagnostic.baselineStatus = 'baselined';
}
}
return result;
};
40 changes: 39 additions & 1 deletion packages/pyright-internal/src/common/diagnostic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,47 @@ export class Diagnostic {
readonly message: string,
readonly range: Range,
readonly priority: TaskListPriority = TaskListPriority.Normal,
public baselineStatus: BaselineStatus | undefined = undefined
readonly baselineStatus: BaselineStatus | undefined = undefined
) {}

/**
* creates a copy of the diagnostic, optionally with different values for the readonly properties
*/
copy = (
args: {
category?: DiagnosticCategory;
message?: string;
range?: Range;
priority?: TaskListPriority;
baselineStatus?: BaselineStatus | undefined;
} = {}
) => {
const diag = new Diagnostic(
args.category ?? this.category,
args.message ?? this.message,
args.range ?? this.range,
args.priority ?? this.priority,
args.baselineStatus ?? this.baselineStatus
);
if (this._actions) {
for (const action of this._actions) {
diag.addAction(action);
}
}

if (this._rule) {
diag.setRule(this._rule);
}

if (this._relatedInfo) {
for (const info of this._relatedInfo) {
diag.addRelatedInfo(info.message, info.uri, info.range);
}
}

return diag;
};

toJsonObj() {
return {
category: this.category,
Expand Down
16 changes: 7 additions & 9 deletions packages/pyright-internal/src/pyright.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import { CommandLineOptions as PyrightCommandLineOptions } from './common/comman
import { ConsoleInterface, LogLevel, StandardConsole, StderrConsole } from './common/console';
import { fail } from './common/debug';
import { createDeferred } from './common/deferred';
import { Diagnostic, DiagnosticCategory, compareDiagnostics } from './common/diagnostic';
import { Diagnostic, DiagnosticCategory } from './common/diagnostic';
import { FileDiagnostics } from './common/diagnosticSink';
import { FullAccessHost } from './common/fullAccessHost';
import { combinePaths, normalizePath } from './common/pathUtils';
Expand Down Expand Up @@ -1199,7 +1199,7 @@ function reportDiagnosticsAsJsonWithoutLogging(
};

fileDiagnostics.forEach((fileDiag) => {
fileDiag.diagnostics.sort(compareDiagnostics).forEach((diag) => {
fileDiag.diagnostics.forEach((diag) => {
if (
diag.category === DiagnosticCategory.Error ||
diag.category === DiagnosticCategory.Warning ||
Expand Down Expand Up @@ -1308,13 +1308,11 @@ function reportDiagnosticsAsText(

fileDiagnostics.forEach((fileDiagnostics) => {
// Don't report unused code or deprecated diagnostics.
const fileErrorsAndWarnings = fileDiagnostics.diagnostics
.filter(
(diag) =>
!isHintDiagnostic(diag) &&
isDiagnosticIncluded(convertDiagnosticCategoryToSeverity(diag.category), minSeverityLevel)
)
.sort(compareDiagnostics);
const fileErrorsAndWarnings = fileDiagnostics.diagnostics.filter(
(diag) =>
!isHintDiagnostic(diag) &&
isDiagnosticIncluded(convertDiagnosticCategoryToSeverity(diag.category), minSeverityLevel)
);

if (fileErrorsAndWarnings.length > 0) {
console.info(`${fileDiagnostics.fileUri.toUserVisibleString()}`);
Expand Down

0 comments on commit 1c0e960

Please sign in to comment.