Trim trailing whitespace in headers

Kotlin · Oct 26, 2023 · a062138 · a062138
1 parent 97b2db2
commit a062138
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 7 deletions.
diff --git a/plugins/base/src/test/kotlin/markdown/ParserTest.kt b/plugins/base/src/test/kotlin/markdown/ParserTest.kt
@@ -1577,12 +1577,11 @@ class ParserTest : KDocTest() {
     }
 
     @Test // exists due to #3231
-    fun `should ignore the first whitespace in header in-between the hash symbol and header text`() {
+    fun `should ignore the leading whitespace in header in-between the hash symbol and header text`() {
         val markdown = """
         | #   first header
         | ##     second header
         | ###                third header
-        | 
         """.trimMargin()
         val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
         val expectedDocumentationNode = listOf(
@@ -1592,5 +1591,43 @@ class ParserTest : KDocTest() {
         )
         assertEquals(actualDocumentationNode, expectedDocumentationNode)
     }
+
+    @Test // exists due to #3231
+    fun `should ignore trailing whitespace in header`() {
+        val markdown = """
+        | # first header     
+        | ## second header        
+        | ### third header                                          
+        """.trimMargin()
+        val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
+        val expectedDocumentationNode = listOf(
+            H1(listOf(Text("first header"))),
+            H2(listOf(Text("second header"))),
+            H3(listOf(Text("third header"))),
+        )
+        assertEquals(actualDocumentationNode, expectedDocumentationNode)
+    }
+
+    @Test // exists due to #3231
+    fun `should ignore leading and trailing whitespace in header, but not whitespace in the middle`() {
+        val markdown = """
+        | #          first header     
+        | ##     second ~~header~~   in a **long** sentence ending     with whitespaces   
+        | ###                third      header        
+        """.trimMargin()
+        val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
+        val expectedDocumentationNode = listOf(
+            H1(listOf(Text("first header"))),
+            H2(listOf(
+                Text("second "),
+                Strikethrough(listOf(Text("header"))),
+                Text("   in a "),
+                B(listOf(Text("long"))),
+                Text(" sentence ending     with whitespaces")
+            )),
+            H3(listOf(Text("third      header"))),
+        )
+        assertEquals(actualDocumentationNode, expectedDocumentationNode)
+    }
 }
 
diff --git a/...is-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt b/...is-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt
@@ -88,19 +88,33 @@ public open class MarkdownParser(
     private fun headerContentHandler(node: ASTNode): List<DocTag> {
         // ATX_CONTENT contains everything after the `#` symbol, so if there's a space
         // in-between the `#` symbol and the text (like `# header`), it will be present here too.
-        // However, we don't need the first space between the `#` symbol and the text,
+        // However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces,
         // so we just skip it (otherwise the header text will be parsed as `<whitespace>header` instead of `header`).
         // If there's more space between `#` and text, like `#     header`, it will still be a single WHITE_SPACE
-        // element, but it will be wider, so the solution below should still hold.
-        val textStartsWithWhitespace = node.children.firstOrNull()?.type == MarkdownTokenTypes.WHITE_SPACE
-        val children = if (textStartsWithWhitespace) node.children.subList(1, node.children.size) else node.children
+        // element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces.
+        val trimmedChildren = node.children.trimWhitespaceToken()
 
+        val children = trimmedChildren.evaluateChildren()
         return DocTagsFromIElementFactory.getInstance(
             MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions
-            children = children.evaluateChildren()
+            children = children
         )
     }
 
+    /**
+     * @return a sublist of [this] list that does not contain
+     *         leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements
+     */
+    private fun List<ASTNode>.trimWhitespaceToken(): List<ASTNode> {
+        val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE }
+        if (firstNonWhitespaceIndex == -1) {
+            return this
+        }
+        val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE }
+
+        return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1)
+    }
+
     private fun horizontalRulesHandler() =
         DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)