diff --git a/h2m.go b/h2m.go index 614ef13f0d..4c97fe5246 100644 --- a/h2m.go +++ b/h2m.go @@ -578,7 +578,7 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) { case atom.A: node.Type = ast.NodeLink text := strings.TrimSpace(util.DomText(n)) - if "" == text && nil != n.Parent && lute.parentIs(n, atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6, atom.Div, atom.Section) && nil == util.DomChildByType(n, atom.Img) { + if "" == text && nil != n.Parent && lute.parentIs(n, atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6, atom.Div, atom.Section) && nil == util.DomChildrenByType(n, atom.Img) { // 丢弃标题中文本为空的链接,这样的链接是没有锚文本的锚点 // https://github.com/Vanessa219/vditor/issues/359 // https://github.com/siyuan-note/siyuan/issues/11445 @@ -727,7 +727,7 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) { table := n.Parent.Parent node.Type = ast.NodeTableRow - if nil == tree.Context.Tip.ChildByType(ast.NodeTableHead) && nil == util.DomChildByType(table, atom.Thead) { + if nil == tree.Context.Tip.ChildByType(ast.NodeTableHead) && 1 > len(util.DomChildrenByType(table, atom.Thead)) { // 补全 thread 节点 thead := &ast.Node{Type: ast.NodeTableHead} tree.Context.Tip.AppendChild(thead) @@ -804,7 +804,9 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) { } } if strings.Contains(strings.ToLower(strings.TrimSpace(util.DomAttrValue(n, "class"))), "mathjax") { - if script := util.DomChildByType(n, atom.Script); nil != script { + scripts := util.DomChildrenByType(n, atom.Script) + if 0 < len(scripts) { + script := scripts[0] if tex := util.DomText(script.FirstChild); "" != tex { appendInlineMath(tree, tex) return @@ -848,16 +850,20 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) { return } case atom.Figcaption: - node.Type = ast.NodeParagraph - node.AppendChild(&ast.Node{Type: ast.NodeHardBreak}) - node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: util.StrToBytes(util.DomText(n))}) - tree.Context.Tip.AppendChild(node) - return + if tree.Context.Tip.IsContainerBlock() { + node.Type = ast.NodeParagraph + node.AppendChild(&ast.Node{Type: ast.NodeHardBreak}) + node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: util.StrToBytes(util.DomText(n))}) + tree.Context.Tip.AppendChild(node) + return + } case atom.Figure: - node.Type = ast.NodeParagraph - tree.Context.Tip.AppendChild(node) - tree.Context.Tip = node - defer tree.Context.ParentTip() + if tree.Context.Tip.IsContainerBlock() { + node.Type = ast.NodeParagraph + tree.Context.Tip.AppendChild(node) + tree.Context.Tip = node + defer tree.Context.ParentTip() + } default: } diff --git a/test/h2m_test.go b/test/h2m_test.go index 8315890ff0..64f31ae4c2 100644 --- a/test/h2m_test.go +++ b/test/h2m_test.go @@ -18,6 +18,7 @@ import ( var html2MdTests = []parseTest{ + {"119", "
\"\"
Source: Nvidia, SemiAnalysis
", "[![](https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png)](https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png)Source: Nvidia, SemiAnalysis\n"}, {"118", "

foo

", "**foo**\n"}, {"117", "
1
2
3
4
5
6
7
8
9
import os
 
res = os.popen(\"find ./ -name *.lua\").readlines()
 
for i in range(0, len(res)) :
    path = res[i].strip(\"\\n\")
    cmd = \"java -jar /home/winmt/unluac_miwifi/build/unluac.jar \" + path + \" > \" + path + \".dis\"
    print(cmd)
    os.system(cmd)
", "```python\nimport os\n \nres = os.popen(\"find ./ -name *.lua\").readlines()\n \nfor i in range(0, len(res)) :\n path = res[i].strip(\"\\n\")\n cmd = \"java -jar /home/winmt/unluac_miwifi/build/unluac.jar \" + path + \" > \" + path + \".dis\"\n print(cmd)\n os.system(cmd)\n```\n"}, {"116", "

【Q22为什么22中助手说自己依然会存在于平行世界?         

", "\u200b**【Q22**\u200b**】为什么**\u200b**22话**\u200b**中助手说自己依然会存在于平行世界? \u200b**\n"}, diff --git a/util/html.go b/util/html.go index 2fb8e9b1cf..7f5436a937 100644 --- a/util/html.go +++ b/util/html.go @@ -72,15 +72,6 @@ func GetTextMarkFileAnnotationRefData(n *html.Node) (id string) { return } -func DomChildByType(n *html.Node, dataAtom atom.Atom) *html.Node { - for c := n.FirstChild; nil != c; c = c.NextSibling { - if c.DataAtom == dataAtom { - return c - } - } - return nil -} - func DomChildByTypeAndClass(n *html.Node, dataAtom atom.Atom, class ...string) *html.Node { if nil == n { return nil @@ -119,7 +110,7 @@ func DomExistChildByType(n *html.Node, dataAtom ...atom.Atom) bool { } for _, a := range dataAtom { - if nil != DomChildByType(n, a) { + if nil != domChildByType(n, a) { return true } } @@ -132,6 +123,15 @@ func DomExistChildByType(n *html.Node, dataAtom ...atom.Atom) bool { return false } +func domChildByType(n *html.Node, dataAtom atom.Atom) *html.Node { + for c := n.FirstChild; nil != c; c = c.NextSibling { + if c.DataAtom == dataAtom { + return c + } + } + return nil +} + func DomHTML(n *html.Node) []byte { if nil == n { return nil