From b45d5ea1ed61e3325e77e709e8873c88d89fbf4c Mon Sep 17 00:00:00 2001 From: Gerrit Birkeland Date: Sat, 14 Dec 2024 07:41:08 -0700 Subject: [PATCH] Fix multiple issues with markdown parsing --- .config/typedoc.json | 1 + CHANGELOG.md | 2 + example/typedoc.json | 1 + site/options/input.md | 2 +- site/typedoc.config.jsonc | 6 +- src/lib/converter/comments/blockLexer.ts | 38 +++++++--- src/lib/converter/comments/lineLexer.ts | 30 ++++++-- src/lib/converter/comments/rawLexer.ts | 45 +++++++----- src/lib/converter/comments/textParser.ts | 1 + src/test/comments.test.ts | 92 ++++++++++++++++++++++++ 10 files changed, 181 insertions(+), 37 deletions(-) diff --git a/.config/typedoc.json b/.config/typedoc.json index bff3e89f3..82510943b 100644 --- a/.config/typedoc.json +++ b/.config/typedoc.json @@ -36,6 +36,7 @@ "categorizeByGroup": false, "categoryOrder": ["Reflections", "Types", "Comments", "*"], "groupOrder": ["Common", "Namespaces", "*"], + "hostedBaseUrl": "https://typedoc.org/example/", "navigationLinks": { "Docs": "https://typedoc.org", "Example": "https://typedoc.org/example/index.html", diff --git a/CHANGELOG.md b/CHANGELOG.md index e749e9c9f..4592091ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ title: Changelog ### Bug Fixes - Fix restoration of groups/categories including documents, #2801. +- Fixed missed relative paths within markdown link references in documents. +- Improved handling of incomplete inline code blocks within markdown. ### Thanks! diff --git a/example/typedoc.json b/example/typedoc.json index 4d87a3a2c..1aaa2d34f 100644 --- a/example/typedoc.json +++ b/example/typedoc.json @@ -11,6 +11,7 @@ "searchGroupBoosts": { "Classes": 1.5 }, + "hostedBaseUrl": "https://typedoc.org/example/", "navigationLinks": { "Docs": "https://typedoc.org", "API": "https://typedoc.org/api/index.html", diff --git a/site/options/input.md b/site/options/input.md index 156e8b157..8f1ec5550 100644 --- a/site/options/input.md +++ b/site/options/input.md @@ -90,7 +90,7 @@ Expects all entry points to be `.json` files generated with a previous run of Ty Options to set be set within each package when entryPointStrategy is set to packages. Unlike most options in TypeDoc, paths within this object are interpreted relative to the package directory. This option has no effect if -[entryPointStrategy](#entrypointstrategy) is not set to `packages. +[entryPointStrategy](#entrypointstrategy) is not set to `packages`. ## alwaysCreateEntryPointModule diff --git a/site/typedoc.config.jsonc b/site/typedoc.config.jsonc index 192364a1a..f684eef29 100644 --- a/site/typedoc.config.jsonc +++ b/site/typedoc.config.jsonc @@ -47,7 +47,9 @@ "notExported": false, }, + "hostedBaseUrl": "https://typedoc.org/", "redirects": { + "guides/": "documents/Overview.html", "guides/overview/": "documents/Overview.html", "guides/installation/": "index.html", "guides/options/": "documents/Options.html", @@ -65,7 +67,7 @@ "guides/themes/": "documents/Themes.html", "guides/plugins/": "documents/Plugins.html", "guides/declaration-references/": "documents/Declaration_References.html", - "guides/development": "documents/Development.html", + "guides/development/": "documents/Development.html", "guides/changelog/": "documents/Changelog.html", // Tags @@ -106,7 +108,7 @@ "tags/readonly/": "documents/Tags._readonly.html", "tags/remarks/": "documents/Tags._remarks.html", "tags/returns/": "documents/Tags._returns.html", - "tags/satisfies/": "documents/Tags._satisfies.html", + "tags/satisfies/": "documents/Tags.TypeScript_Tags.html", "tags/sealed/": "documents/Tags._sealed.html", "tags/see/": "documents/Tags._see.html", "tags/template/": "documents/Tags._template.html", diff --git a/src/lib/converter/comments/blockLexer.ts b/src/lib/converter/comments/blockLexer.ts index 4c7b0a095..63f13d655 100644 --- a/src/lib/converter/comments/blockLexer.ts +++ b/src/lib/converter/comments/blockLexer.ts @@ -147,18 +147,31 @@ function* lexBlockComment2( case "`": { // Markdown's code rules are a royal pain. This could be one of several things. - // 1. Inline code: <1-n ticks> - // 2. Code block: <3 ticks>\n\n<3 ticks>\n + // 1. Inline code: <1-n ticks> + // 2. Code block: <3+ ticks>\n\n<3 ticks>\n // 3. Unmatched tick(s), not code, but part of some text. // We don't quite handle #2 correctly yet. PR welcome! braceStartsType = false; let tickCount = 1; - let lookahead = pos; + + let lookahead = pos - 1; + let atNewline = true; + while (lookahead > 0 && file[lookahead] !== "\n") { + if (/\S/.test(file[lookahead])) { + if (!commentHasStars || file[lookahead] !== "*") { + atNewline = false; + break; + } + } + --lookahead; + } + lookahead = pos; while (lookahead + 1 < end && file[lookahead + 1] === "`") { tickCount++; lookahead++; } + const isCodeBlock = atNewline && tickCount >= 3; let lookaheadStart = pos; const codeText: string[] = []; @@ -169,12 +182,17 @@ function* lexBlockComment2( codeText.push( file.substring(lookaheadStart, lookahead), ); - yield { - kind: TokenSyntaxKind.Code, - text: codeText.join(""), - pos, - }; - pos = lookahead; + const codeTextStr = codeText.join(""); + if (isCodeBlock || !/\n\s*\n/.test(codeTextStr)) { + yield { + kind: TokenSyntaxKind.Code, + text: codeTextStr, + pos, + }; + pos = lookahead; + } else { + yield makeToken(TokenSyntaxKind.Text, tickCount); + } break; } else if (file[lookahead] === "`") { while (lookahead < end && file[lookahead] === "`") { @@ -216,7 +234,7 @@ function* lexBlockComment2( if (lookahead >= end && pos !== lookahead) { if ( - tickCount === 3 && + isCodeBlock && file.substring(pos, end).includes("\n") ) { codeText.push(file.substring(lookaheadStart, end)); diff --git a/src/lib/converter/comments/lineLexer.ts b/src/lib/converter/comments/lineLexer.ts index feb1dfbc8..ec838956f 100644 --- a/src/lib/converter/comments/lineLexer.ts +++ b/src/lib/converter/comments/lineLexer.ts @@ -87,12 +87,23 @@ function* lexLineComments2( // We don't quite handle #2 correctly yet. PR welcome! braceStartsType = false; let tickCount = 1; - let lookahead = pos; + + let lookahead = pos - 1; + let atNewline = true; + while (lookahead > 0 && file[lookahead] !== "\n") { + if (/\S/.test(file[lookahead])) { + atNewline = false; + break; + } + --lookahead; + } + lookahead = pos; while (lookahead + 1 < end && file[lookahead + 1] === "`") { tickCount++; lookahead++; } + const isCodeBlock = atNewline && tickCount >= 3; let lookaheadStart = pos; const codeText: string[] = []; @@ -103,12 +114,17 @@ function* lexLineComments2( codeText.push( file.substring(lookaheadStart, lookahead), ); - yield { - kind: TokenSyntaxKind.Code, - text: codeText.join(""), - pos, - }; - pos = lookahead; + const codeTextStr = codeText.join(""); + if (isCodeBlock || !/\n\s*\n/.test(codeTextStr)) { + yield { + kind: TokenSyntaxKind.Code, + text: codeTextStr, + pos, + }; + pos = lookahead; + } else { + yield makeToken(TokenSyntaxKind.Text, tickCount); + } break; } else if (file[lookahead] === "`") { while (lookahead < end && file[lookahead] === "`") { diff --git a/src/lib/converter/comments/rawLexer.ts b/src/lib/converter/comments/rawLexer.ts index 1332de480..e8bc8d6bc 100644 --- a/src/lib/converter/comments/rawLexer.ts +++ b/src/lib/converter/comments/rawLexer.ts @@ -53,7 +53,6 @@ function* lexCommentString2( end--; } - let lineStart = true; let expectingTag = false; for (;;) { @@ -61,14 +60,9 @@ function* lexCommentString2( return; } - if (lineStart) { - lineStart = false; - } - switch (file[pos]) { case "\n": yield makeToken(TokenSyntaxKind.NewLine, 1); - lineStart = true; expectingTag = false; break; @@ -84,17 +78,28 @@ function* lexCommentString2( case "`": { // Markdown's code rules are a royal pain. This could be one of several things. - // 1. Inline code: <1-n ticks> - // 2. Code block: <3 ticks>\n\n<3 ticks>\n + // 1. Inline code: <1-n ticks> + // 2. Code block: <3+ ticks>\n\n<3 ticks>\n // 3. Unmatched tick(s), not code, but part of some text. // We don't quite handle #2 correctly yet. PR welcome! let tickCount = 1; - let lookahead = pos; + + let lookahead = pos - 1; + let atNewline = true; + while (lookahead > 0 && file[lookahead] !== "\n") { + if (/\S/.test(file[lookahead])) { + atNewline = false; + break; + } + --lookahead; + } + lookahead = pos; while (lookahead + 1 < end && file[lookahead + 1] === "`") { tickCount++; lookahead++; } + const isCodeBlock = atNewline && tickCount >= 3; let lookaheadStart = pos; const codeText: string[] = []; @@ -105,13 +110,19 @@ function* lexCommentString2( codeText.push( file.substring(lookaheadStart, lookahead), ); - yield { - kind: TokenSyntaxKind.Code, - text: codeText.join(""), - pos, - }; - expectingTag = false; - pos = lookahead; + const codeTextStr = codeText.join(""); + if (isCodeBlock || !/\n\s*\n/.test(codeTextStr)) { + yield { + kind: TokenSyntaxKind.Code, + text: codeTextStr, + pos, + }; + expectingTag = false; + pos = lookahead; + } else { + yield makeToken(TokenSyntaxKind.Text, tickCount); + expectingTag = false; + } break; } else if (file[lookahead] === "`") { while (lookahead < end && file[lookahead] === "`") { @@ -136,7 +147,7 @@ function* lexCommentString2( if (lookahead >= end && pos !== lookahead) { if ( - tickCount === 3 && + isCodeBlock && file.substring(pos, end).includes("\n") ) { codeText.push(file.substring(lookaheadStart, end)); diff --git a/src/lib/converter/comments/textParser.ts b/src/lib/converter/comments/textParser.ts index c379e24bd..eee4d535d 100644 --- a/src/lib/converter/comments/textParser.ts +++ b/src/lib/converter/comments/textParser.ts @@ -139,6 +139,7 @@ export function textContent( continue; } + data.atNewLine = token.text[data.pos] === "\n"; ++data.pos; } diff --git a/src/test/comments.test.ts b/src/test/comments.test.ts index 4faa3dcf3..cc48af427 100644 --- a/src/test/comments.test.ts +++ b/src/test/comments.test.ts @@ -50,6 +50,12 @@ describe("Block Comment Lexer", () => { function lex(text: string): Token[] { return Array.from(lexBlockComment(text)); } + function lexNoPos(text: string): Omit[] { + return Array.from(lexBlockComment(text), (t) => { + const { pos: _, ...noPos } = t; + return noPos; + }); + } it("Should handle an empty comment", () => { const tokens = lex("/**/"); @@ -566,6 +572,35 @@ describe("Block Comment Lexer", () => { { kind: TokenSyntaxKind.CloseBrace, text: "}", pos: 12 }, ]); }); + + it("Should detect unmatched code ticks within a line", () => { + const tokens = lexNoPos( + "/** non-code `tick\n\nstill non-code, `code` */", + ); + + equal(tokens, [ + { + kind: TokenSyntaxKind.Text, + text: "non-code `tick", + }, + { + kind: TokenSyntaxKind.NewLine, + text: "\n", + }, + { + kind: TokenSyntaxKind.NewLine, + text: "\n", + }, + { + kind: TokenSyntaxKind.Text, + text: "still non-code, ", + }, + { + kind: TokenSyntaxKind.Code, + text: "`code`", + }, + ]); + }); }); describe("Line Comment Lexer", () => { @@ -581,6 +616,13 @@ describe("Line Comment Lexer", () => { ); } + function lexNoPos(text: string): Omit[] { + return lex(text).map((t) => { + const { pos: _, ...noPos } = t; + return noPos; + }); + } + it("Should handle an empty string", () => { equal(lex("//"), []); @@ -893,12 +935,47 @@ describe("Line Comment Lexer", () => { { kind: TokenSyntaxKind.CloseBrace, text: "}", pos: 11 }, ]); }); + + it("Should detect unmatched code ticks within a line", () => { + const tokens = lexNoPos( + "// non-code `tick\n//\n//still non-code, `code`", + ); + + equal(tokens, [ + { + kind: TokenSyntaxKind.Text, + text: "non-code `tick", + }, + { + kind: TokenSyntaxKind.NewLine, + text: "\n", + }, + { + kind: TokenSyntaxKind.NewLine, + text: "\n", + }, + { + kind: TokenSyntaxKind.Text, + text: "still non-code, ", + }, + { + kind: TokenSyntaxKind.Code, + text: "`code`", + }, + ]); + }); }); describe("Raw Lexer", () => { function lex(text: string): Token[] { return Array.from(lexCommentString(text)); } + function lexNoPos(text: string): Omit[] { + return Array.from(lexCommentString(text), (t) => { + const { pos: _, ...noPos } = t; + return noPos; + }); + } it("Should handle an empty string", () => { equal(lex(""), []); @@ -1057,6 +1134,21 @@ describe("Raw Lexer", () => { { kind: TokenSyntaxKind.CloseBrace, text: "}", pos: 36 }, ]); }); + + it("Should detect unmatched code ticks within a line", () => { + const tokens = lexNoPos("non-code `tick\n\nstill non-code, `code`"); + + equal(tokens, [ + { + kind: TokenSyntaxKind.Text, + text: "non-code `tick\n\nstill non-code, ", + }, + { + kind: TokenSyntaxKind.Code, + text: "`code`", + }, + ]); + }); }); describe("Comment Parser", () => {