From 9b41c0bf0c098fdc89c16cbcf5ad9c038fae2103 Mon Sep 17 00:00:00 2001 From: Yuri Pereira Constante Date: Wed, 20 Dec 2023 21:55:33 -0300 Subject: [PATCH 1/2] Use Enum.reduce instead of Enum.flat_map on find_selectors --- lib/floki/finder.ex | 122 +++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 70 deletions(-) diff --git a/lib/floki/finder.ex b/lib/floki/finder.ex index 3c893019..d1606bfb 100644 --- a/lib/floki/finder.ex +++ b/lib/floki/finder.ex @@ -45,8 +45,11 @@ defmodule Floki.Finder do results = tree.node_ids |> Enum.reverse() - |> get_nodes(tree) - |> Enum.flat_map(fn html_node -> get_matches_for_selectors(tree, html_node, selectors) end) + |> Enum.reduce([], fn node_id, acc -> + html_node = get_node(node_id, tree) + get_matches_for_selectors(tree, html_node, selectors, acc) + end) + |> Enum.reverse() |> Enum.uniq() {tree, results} @@ -58,23 +61,17 @@ defmodule Floki.Finder do |> Selector.Parser.parse() end - defp get_matches_for_selectors(tree, html_node, selectors) do - Enum.flat_map(selectors, fn selector -> get_matches(tree, html_node, selector) end) - end - - defp get_matches(tree, html_node, selector = %Selector{combinator: nil}) do - if selector_match?(tree, html_node, selector) do - [html_node] - else - [] - end + defp get_matches_for_selectors(tree, html_node, selectors, acc) do + Enum.reduce(selectors, acc, fn selector, acc -> + get_matches(tree, html_node, selector, acc) + end) end - defp get_matches(tree, html_node, selector = %Selector{combinator: combinator}) do + defp get_matches(tree, html_node, selector = %Selector{combinator: combinator}, acc) do if selector_match?(tree, html_node, selector) do - traverse_with(combinator, tree, [html_node]) + traverse_with(combinator, tree, html_node, acc) else - [] + acc end end @@ -84,69 +81,54 @@ defmodule Floki.Finder do # The stack serves as accumulator when there is another combinator to traverse. # So the scope of one combinator is the stack (or acc) or the parent one. - defp traverse_with(_, _, []), do: [] - defp traverse_with(nil, _, results), do: results - - defp traverse_with(%Selector.Combinator{match_type: :child, selector: s}, tree, stack) do - results = - Enum.flat_map(stack, fn html_node -> - nodes = - html_node.children_nodes_ids - |> Enum.reverse() - |> get_nodes(tree) - - Enum.filter(nodes, fn html_node -> selector_match?(tree, html_node, s) end) - end) - - traverse_with(s.combinator, tree, results) - end - - defp traverse_with(%Selector.Combinator{match_type: :sibling, selector: s}, tree, stack) do - results = - Enum.flat_map(stack, fn html_node -> - # It treats sibling as list to easily ignores those that didn't match - sibling_id = - html_node - |> get_siblings(tree) - |> Enum.take(1) + defp traverse_with(_, _, [], acc), do: acc + defp traverse_with(nil, _, html_node, acc), do: [html_node | acc] - nodes = get_nodes(sibling_id, tree) - - # Finally, try to match those siblings with the selector - Enum.filter(nodes, fn html_node -> selector_match?(tree, html_node, s) end) - end) - - traverse_with(s.combinator, tree, results) + defp traverse_with(%Selector.Combinator{match_type: :child, selector: s}, tree, html_node, acc) do + Enum.reduce(Enum.reverse(html_node.children_nodes_ids), acc, fn node_id, acc -> + html_node = get_node(node_id, tree) + get_matches(tree, html_node, s, acc) + end) end - defp traverse_with(%Selector.Combinator{match_type: :general_sibling, selector: s}, tree, stack) do - results = - Enum.flat_map(stack, fn html_node -> - sibling_ids = get_siblings(html_node, tree) - - nodes = get_nodes(sibling_ids, tree) - - # Finally, try to match those siblings with the selector - Enum.filter(nodes, fn html_node -> selector_match?(tree, html_node, s) end) - end) + defp traverse_with( + %Selector.Combinator{match_type: :sibling, selector: s}, + tree, + html_node, + acc + ) do + case get_siblings(html_node, tree) do + [sibling_id | _] -> + html_node = get_node(sibling_id, tree) + get_matches(tree, html_node, s, acc) - traverse_with(s.combinator, tree, results) + _ -> + acc + end end - defp traverse_with(%Selector.Combinator{match_type: :descendant, selector: s}, tree, stack) do - results = - Enum.flat_map(stack, fn html_node -> - ids_to_match = get_descendant_ids(html_node.node_id, tree) - nodes = get_nodes(ids_to_match, tree) - - Enum.filter(nodes, fn html_node -> selector_match?(tree, html_node, s) end) - end) - - traverse_with(s.combinator, tree, results) + defp traverse_with( + %Selector.Combinator{match_type: :general_sibling, selector: s}, + tree, + html_node, + acc + ) do + Enum.reduce(get_siblings(html_node, tree), acc, fn node_id, acc -> + html_node = get_node(node_id, tree) + get_matches(tree, html_node, s, acc) + end) end - defp get_nodes(ids, tree) do - Enum.map(ids, fn id -> Map.get(tree.nodes, id) end) + defp traverse_with( + %Selector.Combinator{match_type: :descendant, selector: s}, + tree, + html_node, + acc + ) do + Enum.reduce(get_descendant_ids(html_node.node_id, tree), acc, fn node_id, acc -> + html_node = get_node(node_id, tree) + get_matches(tree, html_node, s, acc) + end) end defp get_node(id, tree) do From 4d35f3d973b0d18441bdca3dc114b69fcd0f4360 Mon Sep 17 00:00:00 2001 From: Yuri Pereira Constante Date: Thu, 21 Dec 2023 23:00:47 -0300 Subject: [PATCH 2/2] Use recursion instead of Enum.reduce --- lib/floki/finder.ex | 66 ++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/lib/floki/finder.ex b/lib/floki/finder.ex index d1606bfb..39df3871 100644 --- a/lib/floki/finder.ex +++ b/lib/floki/finder.ex @@ -46,8 +46,7 @@ defmodule Floki.Finder do tree.node_ids |> Enum.reverse() |> Enum.reduce([], fn node_id, acc -> - html_node = get_node(node_id, tree) - get_matches_for_selectors(tree, html_node, selectors, acc) + get_matches_for_selectors(tree, node_id, selectors, acc) end) |> Enum.reverse() |> Enum.uniq() @@ -61,34 +60,43 @@ defmodule Floki.Finder do |> Selector.Parser.parse() end - defp get_matches_for_selectors(tree, html_node, selectors, acc) do + defp get_matches_for_selectors(tree, node_id, selectors, acc) do Enum.reduce(selectors, acc, fn selector, acc -> - get_matches(tree, html_node, selector, acc) + traverse_with(selector, tree, node_id, acc) end) end - defp get_matches(tree, html_node, selector = %Selector{combinator: combinator}, acc) do - if selector_match?(tree, html_node, selector) do + # The stack serves as accumulator when there is another combinator to traverse. + # So the scope of one combinator is the stack (or acc) or the parent one. + defp traverse_with(nil, _, html_node, acc), do: [html_node | acc] + defp traverse_with(_, _, [], acc), do: acc + + defp traverse_with(selector, tree, [node_id | rest], acc) do + traverse_with( + selector, + tree, + rest, + traverse_with(selector, tree, node_id, acc) + ) + end + + defp traverse_with(%Selector{combinator: combinator} = selector, tree, node_id, acc) do + html_node = get_node(node_id, tree) + + if Selector.match?(html_node, selector, tree) do traverse_with(combinator, tree, html_node, acc) else acc end end - defp selector_match?(tree, html_node, selector) do - Selector.match?(html_node, selector, tree) - end - - # The stack serves as accumulator when there is another combinator to traverse. - # So the scope of one combinator is the stack (or acc) or the parent one. - defp traverse_with(_, _, [], acc), do: acc - defp traverse_with(nil, _, html_node, acc), do: [html_node | acc] - - defp traverse_with(%Selector.Combinator{match_type: :child, selector: s}, tree, html_node, acc) do - Enum.reduce(Enum.reverse(html_node.children_nodes_ids), acc, fn node_id, acc -> - html_node = get_node(node_id, tree) - get_matches(tree, html_node, s, acc) - end) + defp traverse_with( + %Selector.Combinator{match_type: :child, selector: s}, + tree, + html_node, + acc + ) do + traverse_with(s, tree, Enum.reverse(html_node.children_nodes_ids), acc) end defp traverse_with( @@ -98,12 +106,8 @@ defmodule Floki.Finder do acc ) do case get_siblings(html_node, tree) do - [sibling_id | _] -> - html_node = get_node(sibling_id, tree) - get_matches(tree, html_node, s, acc) - - _ -> - acc + [sibling_id | _] -> traverse_with(s, tree, sibling_id, acc) + _ -> acc end end @@ -113,10 +117,7 @@ defmodule Floki.Finder do html_node, acc ) do - Enum.reduce(get_siblings(html_node, tree), acc, fn node_id, acc -> - html_node = get_node(node_id, tree) - get_matches(tree, html_node, s, acc) - end) + traverse_with(s, tree, get_siblings(html_node, tree), acc) end defp traverse_with( @@ -125,10 +126,7 @@ defmodule Floki.Finder do html_node, acc ) do - Enum.reduce(get_descendant_ids(html_node.node_id, tree), acc, fn node_id, acc -> - html_node = get_node(node_id, tree) - get_matches(tree, html_node, s, acc) - end) + traverse_with(s, tree, get_descendant_ids(html_node.node_id, tree), acc) end defp get_node(id, tree) do