Skip to content

Commit

Permalink
🎨 Improve HTML img clipping siyuan-note/siyuan#11595
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed May 31, 2024
1 parent c2b54f4 commit 493de57
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 22 deletions.
30 changes: 18 additions & 12 deletions h2m.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) {
case atom.A:
node.Type = ast.NodeLink
text := strings.TrimSpace(util.DomText(n))
if "" == text && nil != n.Parent && lute.parentIs(n, atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6, atom.Div, atom.Section) && nil == util.DomChildByType(n, atom.Img) {
if "" == text && nil != n.Parent && lute.parentIs(n, atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6, atom.Div, atom.Section) && nil == util.DomChildrenByType(n, atom.Img) {
// 丢弃标题中文本为空的链接,这样的链接是没有锚文本的锚点
// https://github.com/Vanessa219/vditor/issues/359
// https://github.com/siyuan-note/siyuan/issues/11445
Expand Down Expand Up @@ -727,7 +727,7 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) {
table := n.Parent.Parent
node.Type = ast.NodeTableRow

if nil == tree.Context.Tip.ChildByType(ast.NodeTableHead) && nil == util.DomChildByType(table, atom.Thead) {
if nil == tree.Context.Tip.ChildByType(ast.NodeTableHead) && 1 > len(util.DomChildrenByType(table, atom.Thead)) {
// 补全 thread 节点
thead := &ast.Node{Type: ast.NodeTableHead}
tree.Context.Tip.AppendChild(thead)
Expand Down Expand Up @@ -804,7 +804,9 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) {
}
}
if strings.Contains(strings.ToLower(strings.TrimSpace(util.DomAttrValue(n, "class"))), "mathjax") {
if script := util.DomChildByType(n, atom.Script); nil != script {
scripts := util.DomChildrenByType(n, atom.Script)
if 0 < len(scripts) {
script := scripts[0]
if tex := util.DomText(script.FirstChild); "" != tex {
appendInlineMath(tree, tex)
return
Expand Down Expand Up @@ -848,16 +850,20 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) {
return
}
case atom.Figcaption:
node.Type = ast.NodeParagraph
node.AppendChild(&ast.Node{Type: ast.NodeHardBreak})
node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: util.StrToBytes(util.DomText(n))})
tree.Context.Tip.AppendChild(node)
return
if tree.Context.Tip.IsContainerBlock() {
node.Type = ast.NodeParagraph
node.AppendChild(&ast.Node{Type: ast.NodeHardBreak})
node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: util.StrToBytes(util.DomText(n))})
tree.Context.Tip.AppendChild(node)
return
}
case atom.Figure:
node.Type = ast.NodeParagraph
tree.Context.Tip.AppendChild(node)
tree.Context.Tip = node
defer tree.Context.ParentTip()
if tree.Context.Tip.IsContainerBlock() {
node.Type = ast.NodeParagraph
tree.Context.Tip.AppendChild(node)
tree.Context.Tip = node
defer tree.Context.ParentTip()
}
default:
}

Expand Down
1 change: 1 addition & 0 deletions test/h2m_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

var html2MdTests = []parseTest{

{"119", "<div class=\"captioned-image-container\"><figure><a class=\"image-link is-viewable-img image2\" target=\"_blank\" href=\"https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png\" data-component-name=\"Image2ToDOM\" rel=\"\"><div class=\"image2-inset\"><picture><source type=\"image/webp\" srcset=\"https://substackcdn.com/image/fetch/w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 424w, https://substackcdn.com/image/fetch/w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 848w, https://substackcdn.com/image/fetch/w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 1272w, https://substackcdn.com/image/fetch/w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 1456w\" sizes=\"100vw\"><img src=\"https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png\" width=\"1456\" height=\"723\" data-attrs=\"{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:723,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1310063,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null}\" class=\"sizing-normal\" alt=\"\" srcset=\"https://substackcdn.com/image/fetch/w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 424w, https://substackcdn.com/image/fetch/w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 848w, https://substackcdn.com/image/fetch/w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 1272w, https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png 1456w\" sizes=\"100vw\" fetchpriority=\"high\"></picture><div class=\"image-link-expand\"><svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"lucide lucide-maximize2 \"><polyline points=\"15 3 21 3 21 9\"></polyline><polyline points=\"9 21 3 21 3 15\"></polyline><line x1=\"21\" x2=\"14\" y1=\"3\" y2=\"10\"></line><line x1=\"3\" x2=\"10\" y1=\"21\" y2=\"14\"></line></svg></div></div></a><figcaption class=\"image-caption\">Source: Nvidia, SemiAnalysis</figcaption></figure></div>", "[![](https://substackcdn.com/image/fetch/w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png)](https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7e8ccaf-d891-4b77-b044-3ed0ff28b187_1775x881.png)Source: Nvidia, SemiAnalysis\n"},
{"118", "<p><span><strong data-brushtype=\"text\"><strong><span>foo</span></strong></strong></span></p>", "**foo**\n"},
{"117", "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" class=\"syntaxhighlighter python\"><tbody><tr><td class=\"gutter\"><div class=\"line number1 index0 alt2\">1</div><div class=\"line number2 index1 alt1\">2</div><div class=\"line number3 index2 alt2\">3</div><div class=\"line number4 index3 alt1\">4</div><div class=\"line number5 index4 alt2\">5</div><div class=\"line number6 index5 alt1\">6</div><div class=\"line number7 index6 alt2\">7</div><div class=\"line number8 index7 alt1\">8</div><div class=\"line number9 index8 alt2\">9</div></td><td class=\"code\"><div class=\"container\"><div class=\"line number1 index0 alt2\"><code class=\"python keyword\">import</code> <code class=\"python plain\">os</code></div><div class=\"line number2 index1 alt1\">&nbsp;</div><div class=\"line number3 index2 alt2\"><code class=\"python plain\">res </code><code class=\"python keyword\">=</code> <code class=\"python plain\">os.popen(</code><code class=\"python string\">\"find ./ -name *.lua\"</code><code class=\"python plain\">).readlines()</code></div><div class=\"line number4 index3 alt1\">&nbsp;</div><div class=\"line number5 index4 alt2\"><code class=\"python keyword\">for</code> <code class=\"python plain\">i </code><code class=\"python keyword\">in</code> <code class=\"python functions\">range</code><code class=\"python plain\">(</code><code class=\"python value\">0</code><code class=\"python plain\">, </code><code class=\"python functions\">len</code><code class=\"python plain\">(res)) :</code></div><div class=\"line number6 index5 alt1\"><code class=\"python spaces\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"python plain\">path </code><code class=\"python keyword\">=</code> <code class=\"python plain\">res[i].strip(</code><code class=\"python string\">\"\\n\"</code><code class=\"python plain\">)</code></div><div class=\"line number7 index6 alt2\"><code class=\"python spaces\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"python plain\">cmd </code><code class=\"python keyword\">=</code> <code class=\"python string\">\"java -jar /home/winmt/unluac_miwifi/build/unluac.jar \"</code> <code class=\"python keyword\">+</code> <code class=\"python plain\">path </code><code class=\"python keyword\">+</code> <code class=\"python string\">\" &gt; \"</code> <code class=\"python keyword\">+</code> <code class=\"python plain\">path </code><code class=\"python keyword\">+</code> <code class=\"python string\">\".dis\"</code></div><div class=\"line number8 index7 alt1\"><code class=\"python spaces\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"python functions\">print</code><code class=\"python plain\">(cmd)</code></div><div class=\"line number9 index8 alt2\"><code class=\"python spaces\">&nbsp;&nbsp;&nbsp;&nbsp;</code><code class=\"python plain\">os.system(cmd)</code></div></div></td></tr></tbody></table>", "```python\nimport os\n \nres = os.popen(\"find ./ -name *.lua\").readlines()\n \nfor i in range(0, len(res)) :\n path = res[i].strip(\"\\n\")\n cmd = \"java -jar /home/winmt/unluac_miwifi/build/unluac.jar \" + path + \" > \" + path + \".dis\"\n print(cmd)\n os.system(cmd)\n```\n"},
{"116", "<p><span class=\"color-blue-03\"><strong><span class=\"font-size-16\">【Q</span></strong><strong><span class=\"font-size-16\">22</span></strong><strong><span class=\"font-size-16\">】</span></strong><strong><span class=\"font-size-16\">为什么</span></strong><strong><span class=\"font-size-16\">22</span></strong><strong><span class=\"font-size-16\">话</span></strong><strong><span class=\"font-size-16\">中助手说自己依然会存在于平行世界?&nbsp;&nbsp;&nbsp;&nbsp;</span></strong></span><strong><span class=\"font-size-16\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span></strong></p>", "\u200b**【Q22**\u200b**】为什么**\u200b**22话**\u200b**中助手说自己依然会存在于平行世界? \u200b**\n"},
Expand Down
20 changes: 10 additions & 10 deletions util/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,6 @@ func GetTextMarkFileAnnotationRefData(n *html.Node) (id string) {
return
}

func DomChildByType(n *html.Node, dataAtom atom.Atom) *html.Node {
for c := n.FirstChild; nil != c; c = c.NextSibling {
if c.DataAtom == dataAtom {
return c
}
}
return nil
}

func DomChildByTypeAndClass(n *html.Node, dataAtom atom.Atom, class ...string) *html.Node {
if nil == n {
return nil
Expand Down Expand Up @@ -119,7 +110,7 @@ func DomExistChildByType(n *html.Node, dataAtom ...atom.Atom) bool {
}

for _, a := range dataAtom {
if nil != DomChildByType(n, a) {
if nil != domChildByType(n, a) {
return true
}
}
Expand All @@ -132,6 +123,15 @@ func DomExistChildByType(n *html.Node, dataAtom ...atom.Atom) bool {
return false
}

func domChildByType(n *html.Node, dataAtom atom.Atom) *html.Node {
for c := n.FirstChild; nil != c; c = c.NextSibling {
if c.DataAtom == dataAtom {
return c
}
}
return nil
}

func DomHTML(n *html.Node) []byte {
if nil == n {
return nil
Expand Down

0 comments on commit 493de57

Please sign in to comment.