Skip to content

Commit

Permalink
Trim trailing whitespace in headers
Browse files Browse the repository at this point in the history
  • Loading branch information
IgnatBeresnev committed Oct 26, 2023
1 parent 97b2db2 commit a062138
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 7 deletions.
41 changes: 39 additions & 2 deletions plugins/base/src/test/kotlin/markdown/ParserTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -1577,12 +1577,11 @@ class ParserTest : KDocTest() {
}

@Test // exists due to #3231
fun `should ignore the first whitespace in header in-between the hash symbol and header text`() {
fun `should ignore the leading whitespace in header in-between the hash symbol and header text`() {
val markdown = """
| # first header
| ## second header
| ### third header
|
""".trimMargin()
val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
val expectedDocumentationNode = listOf(
Expand All @@ -1592,5 +1591,43 @@ class ParserTest : KDocTest() {
)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}

@Test // exists due to #3231
fun `should ignore trailing whitespace in header`() {
val markdown = """
| # first header
| ## second header
| ### third header
""".trimMargin()
val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
val expectedDocumentationNode = listOf(
H1(listOf(Text("first header"))),
H2(listOf(Text("second header"))),
H3(listOf(Text("third header"))),
)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}

@Test // exists due to #3231
fun `should ignore leading and trailing whitespace in header, but not whitespace in the middle`() {
val markdown = """
| # first header
| ## second ~~header~~ in a **long** sentence ending with whitespaces
| ### third header
""".trimMargin()
val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
val expectedDocumentationNode = listOf(
H1(listOf(Text("first header"))),
H2(listOf(
Text("second "),
Strikethrough(listOf(Text("header"))),
Text(" in a "),
B(listOf(Text("long"))),
Text(" sentence ending with whitespaces")
)),
H3(listOf(Text("third header"))),
)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,33 @@ public open class MarkdownParser(
private fun headerContentHandler(node: ASTNode): List<DocTag> {
// ATX_CONTENT contains everything after the `#` symbol, so if there's a space
// in-between the `#` symbol and the text (like `# header`), it will be present here too.
// However, we don't need the first space between the `#` symbol and the text,
// However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces,
// so we just skip it (otherwise the header text will be parsed as `<whitespace>header` instead of `header`).
// If there's more space between `#` and text, like `# header`, it will still be a single WHITE_SPACE
// element, but it will be wider, so the solution below should still hold.
val textStartsWithWhitespace = node.children.firstOrNull()?.type == MarkdownTokenTypes.WHITE_SPACE
val children = if (textStartsWithWhitespace) node.children.subList(1, node.children.size) else node.children
// element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces.
val trimmedChildren = node.children.trimWhitespaceToken()

val children = trimmedChildren.evaluateChildren()
return DocTagsFromIElementFactory.getInstance(
MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions
children = children.evaluateChildren()
children = children
)
}

/**
* @return a sublist of [this] list that does not contain
* leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements
*/
private fun List<ASTNode>.trimWhitespaceToken(): List<ASTNode> {
val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE }
if (firstNonWhitespaceIndex == -1) {
return this
}
val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE }

return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1)
}

private fun horizontalRulesHandler() =
DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)

Expand Down

0 comments on commit a062138

Please sign in to comment.