From 9f893802c79a30b1e774c15064f7bc513253f1be Mon Sep 17 00:00:00 2001 From: Alex Martsinovich Date: Fri, 24 Feb 2023 10:41:48 -0800 Subject: [PATCH] Support splitting nodes while traversing (#447) * Support splitting nodes while traversing * Update lib/floki/traversal.ex * Fix formatting --------- Co-authored-by: Philip Sampaio --- lib/floki.ex | 10 +++---- lib/floki/traversal.ex | 10 ++++++- test/floki/traversal_test.exs | 56 +++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/lib/floki.ex b/lib/floki.ex index 10bd4905..bbaaabf1 100644 --- a/lib/floki.ex +++ b/lib/floki.ex @@ -372,9 +372,9 @@ defmodule Floki do The tree is traversed in a post-walk fashion, where the children are traversed before the parent. - When the function `fun` encounters HTML tag, it receives a tuple with - `{name, attributes, children}`, and should either return a similar tuple or - `nil` to delete the current node. + When the function `fun` encounters HTML tag, it receives a tuple with `{name, + attributes, children}`, and should either return a similar tuple, a list of + tuples to split current node or `nil` to delete it. The function `fun` can also encounter HTML doctype, comment or declaration and will receive, and should return, different tuple for these types. See the @@ -404,7 +404,7 @@ defmodule Floki do @spec traverse_and_update( html_node() | html_tree(), - (html_node() -> html_node() | nil) + (html_node() -> html_node() | [html_node()] | nil) ) :: html_node() | html_tree() defdelegate traverse_and_update(html_tree, fun), to: Floki.Traversal @@ -458,7 +458,7 @@ defmodule Floki do html_node() | html_tree(), traverse_acc, (html_node(), traverse_acc -> - {html_node() | nil, traverse_acc}) + {html_node() | [html_node()] | nil, traverse_acc}) ) :: {html_node() | html_tree(), traverse_acc} when traverse_acc: any() defdelegate traverse_and_update(html_tree, acc, fun), to: Floki.Traversal diff --git a/lib/floki/traversal.ex b/lib/floki/traversal.ex index ae5d6bf0..7d386bcc 100644 --- a/lib/floki/traversal.ex +++ b/lib/floki/traversal.ex @@ -21,7 +21,15 @@ defmodule Floki.Traversal do {mapped_head, new_acc} -> {mapped_tail, new_acc2} = traverse_and_update(tail, new_acc, fun) - {[mapped_head | mapped_tail], new_acc2} + + mapped = + if is_list(mapped_head) do + mapped_head ++ mapped_tail + else + [mapped_head | mapped_tail] + end + + {mapped, new_acc2} end end diff --git a/test/floki/traversal_test.exs b/test/floki/traversal_test.exs index 118bb2f7..81cbae60 100644 --- a/test/floki/traversal_test.exs +++ b/test/floki/traversal_test.exs @@ -75,6 +75,39 @@ defmodule Floki.TraversalTest do ]} ] end + + test "splits a node" do + html = [ + {"div", [], + [ + {"p", [], ["foo"]} + ]}, + {"div", [], + [ + {"p", [], ["hello world"]} + ]} + ] + + assert Floki.traverse_and_update(html, fn + {"p", attrs, [text]} -> + for word <- String.split(text) do + {"p", attrs, [word]} + end + + tag -> + tag + end) == [ + {"div", [], + [ + {"p", [], ["foo"]} + ]}, + {"div", [], + [ + {"p", [], ["hello"]}, + {"p", [], ["world"]} + ]} + ] + end end describe "traverse_and_update/3" do @@ -158,5 +191,28 @@ defmodule Floki.TraversalTest do ]} ], 5} end + + test "splits a node" do + html = [ + {"p", [], ["hello world"]} + ] + + assert Floki.traverse_and_update(html, 0, fn + {"p", attrs, [text]}, acc -> + nodes = + for word <- String.split(text) do + {"p", attrs, [word]} + end + + {nodes, acc + 1} + + tag, acc -> + {tag, acc + 1} + end) == + {[ + {"p", [], ["hello"]}, + {"p", [], ["world"]} + ], 1} + end end end