Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into project-structure-r…
Browse files Browse the repository at this point in the history
…efactoring
  • Loading branch information
IgnatBeresnev committed Oct 31, 2023
2 parents d4c2058 + 7951aff commit dc02ecc
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,44 @@ public open class MarkdownParser(
).flatMap { it.children }
)

/**
* Handler for [MarkdownTokenTypes.ATX_CONTENT], which is the content of the header
* elements like [MarkdownElementTypes.ATX_1], [MarkdownElementTypes.ATX_2] and so on.
*
* For example, a header line like `# Header text` is expected to be parsed into:
* - One [MarkdownTokenTypes.ATX_HEADER] with startOffset = 0, endOffset = 1 (only the `#` symbol)
* - Composite [MarkdownTokenTypes.ATX_CONTENT] with four children: WHITE_SPACE, TEXT, WHITE_SPACE, TEXT.
*/
private fun headerContentHandler(node: ASTNode): List<DocTag> {
// ATX_CONTENT contains everything after the `#` symbol, so if there's a space
// in-between the `#` symbol and the text (like `# header`), it will be present here too.
// However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces,
// so we just skip it (otherwise the header text will be parsed as `<whitespace>header` instead of `header`).
// If there's more space between `#` and text, like `# header`, it will still be a single WHITE_SPACE
// element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces.
val trimmedChildren = node.children.trimWhitespaceToken()

val children = trimmedChildren.evaluateChildren()
return DocTagsFromIElementFactory.getInstance(
MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions
children = children
)
}

/**
* @return a sublist of [this] list that does not contain
* leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements
*/
private fun List<ASTNode>.trimWhitespaceToken(): List<ASTNode> {
val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE }
if (firstNonWhitespaceIndex == -1) {
return this
}
val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE }

return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1)
}

private fun horizontalRulesHandler() =
DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)

Expand Down Expand Up @@ -365,6 +403,7 @@ public open class MarkdownParser(
MarkdownElementTypes.ATX_5,
MarkdownElementTypes.ATX_6,
-> headersHandler(node)
MarkdownTokenTypes.ATX_CONTENT -> headerContentHandler(node)
MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler()
MarkdownElementTypes.STRONG -> strongHandler(node)
MarkdownElementTypes.EMPH -> emphasisHandler(node)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1573,7 +1573,60 @@ class ParserTest : KDocTest() {
P(listOf(Text(" sdsdsds sdd"))),
P(listOf(Text(" eweww ")))
)
print(expectedDocumentationNode)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}

@Test // exists due to #3231
fun `should ignore the leading whitespace in header in-between the hash symbol and header text`() {
val markdown = """
| # first header
| ## second header
| ### third header
""".trimMargin()
val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
val expectedDocumentationNode = listOf(
H1(listOf(Text("first header"))),
H2(listOf(Text("second header"))),
H3(listOf(Text("third header"))),
)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}

@Test // exists due to #3231
fun `should ignore trailing whitespace in header`() {
val markdown = """
| # first header
| ## second header
| ### third header
""".trimMargin()
val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
val expectedDocumentationNode = listOf(
H1(listOf(Text("first header"))),
H2(listOf(Text("second header"))),
H3(listOf(Text("third header"))),
)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}

@Test // exists due to #3231
fun `should ignore leading and trailing whitespace in header, but not whitespace in the middle`() {
val markdown = """
| # first header
| ## second ~~header~~ in a **long** sentence ending with whitespaces
| ### third header
""".trimMargin()
val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
val expectedDocumentationNode = listOf(
H1(listOf(Text("first header"))),
H2(listOf(
Text("second "),
Strikethrough(listOf(Text("header"))),
Text(" in a "),
B(listOf(Text("long"))),
Text(" sentence ending with whitespaces")
)),
H3(listOf(Text("third header"))),
)
assertEquals(actualDocumentationNode, expectedDocumentationNode)
}
}
Expand Down
2 changes: 1 addition & 1 deletion gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ korlibs-template = "4.0.10"
kotlinx-html = "0.9.1"

## Markdown
jetbrains-markdown = "0.3.1"
jetbrains-markdown = "0.5.2"

## JSON
jackson = "2.12.7" # jackson 2.13.X does not support kotlin language version 1.4, check before updating
Expand Down

0 comments on commit dc02ecc

Please sign in to comment.