Skip to content

Commit

Permalink
Optimize HTMLTree.build (#511)
Browse files Browse the repository at this point in the history
* Avoid updating existing entries on tree.nodes

* Don't use HTMLTree on build_tree
  • Loading branch information
ypconstante authored Dec 28, 2023
1 parent ff1d247 commit deb5807
Showing 1 changed file with 129 additions and 90 deletions.
219 changes: 129 additions & 90 deletions lib/floki/html_tree.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,75 +31,92 @@ defmodule Floki.HTMLTree do
root_id = IDSeeder.seed([])
root_node = %HTMLNode{type: tag, attributes: attrs, node_id: root_id}

build_tree(
%HTMLTree{root_nodes_ids: [root_id], node_ids: [root_id], nodes: %{root_id => root_node}},
children,
root_id,
[]
)
{node_ids, nodes} =
build_tree(
[root_id],
[],
children,
root_node,
[]
)

%HTMLTree{
root_nodes_ids: [root_id],
node_ids: node_ids,
nodes: Map.new(nodes)
}
end

def build(html_tuples) when is_list(html_tuples) do
reducer = fn
{:pi, _, _}, tree ->
tree

{tag, attrs, children}, tree ->
root_id = IDSeeder.seed(tree.node_ids)
{:pi, _, _}, state ->
state

{tag, attrs, children}, {root_nodes_ids, node_ids, nodes} ->
root_id = IDSeeder.seed(node_ids)
root_node = %HTMLNode{type: tag, attributes: attrs, node_id: root_id}

build_tree(
%{
tree
| nodes: Map.put(tree.nodes, root_id, root_node),
node_ids: [root_id | tree.node_ids],
root_nodes_ids: [root_id | tree.root_nodes_ids]
},
children,
root_id,
[]
)

text, tree when is_binary(text) ->
root_id = IDSeeder.seed(tree.node_ids)
root_nodes_ids = [root_id | root_nodes_ids]
node_ids = [root_id | node_ids]

{node_ids, nodes} =
build_tree(
node_ids,
nodes,
children,
root_node,
[]
)

{root_nodes_ids, node_ids, nodes}

text, {root_nodes_ids, node_ids, nodes} when is_binary(text) ->
root_id = IDSeeder.seed(node_ids)
root_node = %Text{content: text, node_id: root_id}

build_tree(
%{
tree
| nodes: Map.put(tree.nodes, root_id, root_node),
node_ids: [root_id | tree.node_ids],
root_nodes_ids: [root_id | tree.root_nodes_ids]
},
[],
root_id,
[]
)

{:comment, comment}, tree ->
root_id = IDSeeder.seed(tree.node_ids)
root_nodes_ids = [root_id | root_nodes_ids]
node_ids = [root_id | node_ids]

{node_ids, nodes} =
build_tree(
node_ids,
nodes,
[],
root_node,
[]
)

{root_nodes_ids, node_ids, nodes}

{:comment, comment}, {root_nodes_ids, node_ids, nodes} ->
root_id = IDSeeder.seed(node_ids)
root_node = %Comment{content: comment, node_id: root_id}

build_tree(
%{
tree
| nodes: Map.put(tree.nodes, root_id, root_node),
node_ids: [root_id | tree.node_ids],
root_nodes_ids: [root_id | tree.root_nodes_ids]
},
[],
root_id,
[]
)

_, tree ->
tree
root_nodes_ids = [root_id | root_nodes_ids]
node_ids = [root_id | node_ids]

{node_ids, nodes} =
build_tree(
node_ids,
nodes,
[],
root_node,
[]
)

{root_nodes_ids, node_ids, nodes}

_, state ->
state
end

Enum.reduce(html_tuples, %HTMLTree{}, reducer)
{root_nodes_ids, node_ids, nodes} = Enum.reduce(html_tuples, {[], [], []}, reducer)

%HTMLTree{
root_nodes_ids: root_nodes_ids,
node_ids: node_ids,
nodes: Map.new(nodes)
}
end

def build(_), do: %HTMLTree{}
Expand Down Expand Up @@ -174,70 +191,92 @@ defmodule Floki.HTMLTree do
defp get_ids_for_delete_stack(%HTMLNode{children_nodes_ids: ids}), do: ids
defp get_ids_for_delete_stack(_), do: []

defp build_tree(tree, [], _, []), do: tree
defp build_tree(node_ids, nodes, [], parent_node, []) do
{node_ids, [{parent_node.node_id, parent_node} | nodes]}
end

defp build_tree(tree, [{:pi, _, _} | children], parent_id, stack),
do: build_tree(tree, children, parent_id, stack)
defp build_tree(node_ids, nodes, [{:pi, _, _} | children], parent_node, stack) do
build_tree(node_ids, nodes, children, parent_node, stack)
end

defp build_tree(tree, [{tag, attrs, child_children} | children], parent_id, stack) do
new_id = IDSeeder.seed(tree.node_ids)
defp build_tree(node_ids, nodes, [{tag, attrs, child_children} | children], parent_node, stack) do
new_id = IDSeeder.seed(node_ids)

new_node = %HTMLNode{type: tag, attributes: attrs, node_id: new_id, parent_node_id: parent_id}
new_node = %HTMLNode{
type: tag,
attributes: attrs,
node_id: new_id,
parent_node_id: parent_node.node_id
}

nodes = put_new_node(tree.nodes, new_node)
parent_node = %{
parent_node
| children_nodes_ids: [new_id | parent_node.children_nodes_ids]
}

build_tree(
%{tree | nodes: nodes, node_ids: [new_id | tree.node_ids]},
[new_id | node_ids],
nodes,
child_children,
new_id,
[{parent_id, children} | stack]
new_node,
[{parent_node, children} | stack]
)
end

defp build_tree(tree, [{:comment, comment} | children], parent_id, stack) do
new_id = IDSeeder.seed(tree.node_ids)
new_node = %Comment{content: comment, node_id: new_id, parent_node_id: parent_id}
defp build_tree(node_ids, nodes, [{:comment, comment} | children], parent_node, stack) do
new_id = IDSeeder.seed(node_ids)
new_node = %Comment{content: comment, node_id: new_id, parent_node_id: parent_node.node_id}

nodes = put_new_node(tree.nodes, new_node)
parent_node = %{
parent_node
| children_nodes_ids: [new_id | parent_node.children_nodes_ids]
}

build_tree(
%{tree | nodes: nodes, node_ids: [new_id | tree.node_ids]},
[new_id | node_ids],
[{new_id, new_node} | nodes],
children,
parent_id,
parent_node,
stack
)
end

defp build_tree(tree, [text | children], parent_id, stack) when is_binary(text) do
new_id = IDSeeder.seed(tree.node_ids)
new_node = %Text{content: text, node_id: new_id, parent_node_id: parent_id}
defp build_tree(node_ids, nodes, [text | children], parent_node, stack) when is_binary(text) do
new_id = IDSeeder.seed(node_ids)
new_node = %Text{content: text, node_id: new_id, parent_node_id: parent_node.node_id}

nodes = put_new_node(tree.nodes, new_node)
parent_node = %{
parent_node
| children_nodes_ids: [new_id | parent_node.children_nodes_ids]
}

build_tree(
%{tree | nodes: nodes, node_ids: [new_id | tree.node_ids]},
[new_id | node_ids],
[{new_id, new_node} | nodes],
children,
parent_id,
parent_node,
stack
)
end

defp build_tree(tree, [_other | children], parent_id, stack) do
build_tree(tree, children, parent_id, stack)
end

defp build_tree(tree, [], _, [{parent_node_id, children} | stack]) do
build_tree(tree, children, parent_node_id, stack)
defp build_tree(node_ids, nodes, [_other | children], parent_node, stack) do
build_tree(node_ids, nodes, children, parent_node, stack)
end

defp put_new_node(nodes, new_node) do
parent_node = Map.get(nodes, new_node.parent_node_id)
children_ids = parent_node.children_nodes_ids
updated_parent = %{parent_node | children_nodes_ids: [new_node.node_id | children_ids]}

nodes
|> Map.put(new_node.node_id, new_node)
|> Map.put(new_node.parent_node_id, updated_parent)
defp build_tree(
node_ids,
nodes,
[],
parent_node,
[{next_parent_node, next_parent_children} | stack]
) do
build_tree(
node_ids,
[{parent_node.node_id, parent_node} | nodes],
next_parent_children,
next_parent_node,
stack
)
end

def patch_nodes(html_tree, operation_with_nodes) do
Expand Down

0 comments on commit deb5807

Please sign in to comment.