From a062138ce90d180f2f51d4c6b4ce3ae17c8bd12b Mon Sep 17 00:00:00 2001 From: IgnatBeresnev Date: Thu, 26 Oct 2023 21:28:25 +0200 Subject: [PATCH] Trim trailing whitespace in headers --- .../src/test/kotlin/markdown/ParserTest.kt | 41 ++++++++++++++++++- .../analysis/markdown/jb/MarkdownParser.kt | 24 ++++++++--- 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/plugins/base/src/test/kotlin/markdown/ParserTest.kt b/plugins/base/src/test/kotlin/markdown/ParserTest.kt index abbed881a0..bcca27c43e 100644 --- a/plugins/base/src/test/kotlin/markdown/ParserTest.kt +++ b/plugins/base/src/test/kotlin/markdown/ParserTest.kt @@ -1577,12 +1577,11 @@ class ParserTest : KDocTest() { } @Test // exists due to #3231 - fun `should ignore the first whitespace in header in-between the hash symbol and header text`() { + fun `should ignore the leading whitespace in header in-between the hash symbol and header text`() { val markdown = """ | # first header | ## second header | ### third header - | """.trimMargin() val actualDocumentationNode = parseMarkdownToDocNode(markdown).children val expectedDocumentationNode = listOf( @@ -1592,5 +1591,43 @@ class ParserTest : KDocTest() { ) assertEquals(actualDocumentationNode, expectedDocumentationNode) } + + @Test // exists due to #3231 + fun `should ignore trailing whitespace in header`() { + val markdown = """ + | # first header + | ## second header + | ### third header + """.trimMargin() + val actualDocumentationNode = parseMarkdownToDocNode(markdown).children + val expectedDocumentationNode = listOf( + H1(listOf(Text("first header"))), + H2(listOf(Text("second header"))), + H3(listOf(Text("third header"))), + ) + assertEquals(actualDocumentationNode, expectedDocumentationNode) + } + + @Test // exists due to #3231 + fun `should ignore leading and trailing whitespace in header, but not whitespace in the middle`() { + val markdown = """ + | # first header + | ## second ~~header~~ in a **long** sentence ending with whitespaces + | ### third header + """.trimMargin() + val actualDocumentationNode = parseMarkdownToDocNode(markdown).children + val expectedDocumentationNode = listOf( + H1(listOf(Text("first header"))), + H2(listOf( + Text("second "), + Strikethrough(listOf(Text("header"))), + Text(" in a "), + B(listOf(Text("long"))), + Text(" sentence ending with whitespaces") + )), + H3(listOf(Text("third header"))), + ) + assertEquals(actualDocumentationNode, expectedDocumentationNode) + } } diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt index 675ca8c391..130c6def31 100644 --- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt +++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt @@ -88,19 +88,33 @@ public open class MarkdownParser( private fun headerContentHandler(node: ASTNode): List { // ATX_CONTENT contains everything after the `#` symbol, so if there's a space // in-between the `#` symbol and the text (like `# header`), it will be present here too. - // However, we don't need the first space between the `#` symbol and the text, + // However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces, // so we just skip it (otherwise the header text will be parsed as `header` instead of `header`). // If there's more space between `#` and text, like `# header`, it will still be a single WHITE_SPACE - // element, but it will be wider, so the solution below should still hold. - val textStartsWithWhitespace = node.children.firstOrNull()?.type == MarkdownTokenTypes.WHITE_SPACE - val children = if (textStartsWithWhitespace) node.children.subList(1, node.children.size) else node.children + // element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces. + val trimmedChildren = node.children.trimWhitespaceToken() + val children = trimmedChildren.evaluateChildren() return DocTagsFromIElementFactory.getInstance( MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions - children = children.evaluateChildren() + children = children ) } + /** + * @return a sublist of [this] list that does not contain + * leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements + */ + private fun List.trimWhitespaceToken(): List { + val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE } + if (firstNonWhitespaceIndex == -1) { + return this + } + val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE } + + return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1) + } + private fun horizontalRulesHandler() = DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)