Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use common tool in treemap view #7471

Merged
merged 5 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 107 additions & 101 deletions ydb/ci/build_bloat/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#!/usr/bin/env python3

import argparse
import copy
import json
from functools import partial
import os
from concurrent.futures import ProcessPoolExecutor

from jinja2 import Environment, FileSystemLoader, StrictUndefined
import tree_map

HEADER_COMPILE_TIME_TO_SHOW = 0.5 # sec

Expand Down Expand Up @@ -57,43 +58,6 @@ def get_compile_duration_and_cpp_path(time_trace_path: str) -> tuple[float, str,
return duration_us / 1e6, cpp_file, time_trace_path


def add_to_tree(chunks: list[tuple[str, str]], value: int, tree: dict) -> None:
tree["name"] = chunks[0][0]
tree["type"] = chunks[0][1]
if len(chunks) == 1:
tree["size"] = value
else:
if "children" not in tree:
tree["children"] = []
for child_ in tree["children"]:
if child_["name"] == chunks[1][0]:
child = child_
break

else:
child = {"name": chunks[1][0]}
tree["children"].append(child)
add_to_tree(chunks[1:], value, child)


def propogate_area(tree):
area = 0
for child_ in tree.get("children", []):
propogate_area(child_)
area += child_["size"]

if "size" not in tree:
tree["size"] = area


def enrich_names_with_sec(tree):
area = 0
for child_ in tree.get("children", []):
enrich_names_with_sec(child_)

tree["name"] = tree["name"] + " " + "{:_} ms".format(tree["size"])


def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> list:
with open(path) as f:
obj = json.load(f)
Expand All @@ -110,26 +74,74 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l

include_events.sort(key=lambda event: (event[0], -event[1]))

path_to_time = {}
current_includes_stack = [] # stack
last_time_stamp = None

result = []
tree_path_to_sum_duration = {}
current_includes_stack = []

for time_stamp, ev, path, duration in include_events:
if current_includes_stack:
last_path = current_includes_stack[-1]
prev = path_to_time.get(last_path, 0)
path_to_time[last_path] = prev + (time_stamp - last_time_stamp) / 1000 / 1000

if ev == 1:
current_includes_stack.append(sanitize_path(path, base_src_dir))
if duration > HEADER_COMPILE_TIME_TO_SHOW * 1000 * 1000:
result.append((current_includes_stack[:], duration))
tree_path = tuple(current_includes_stack)
prev = tree_path_to_sum_duration.get(tree_path, 0)
tree_path_to_sum_duration[tree_path] = prev + duration
else:
assert current_includes_stack[-1] == sanitize_path(path, base_src_dir)
current_includes_stack.pop()
last_time_stamp = time_stamp

# filter small entities
tree_paths_to_include = set()
result = []
for tree_path, duration in tree_path_to_sum_duration.items():
if duration > HEADER_COMPILE_TIME_TO_SHOW * 1000 * 1000:
for i in range(1, len(tree_path) + 1):
tree_paths_to_include.add(tree_path[:i])

def add_to_tree(tree, tree_path, duration):
if len(tree_path) == 0:
tree["duration"] += duration
else:
if tree_path[0] not in tree["children"]:
tree["children"][tree_path[0]] = {
"duration": 0,
"children": {},
}
add_to_tree(tree["children"][tree_path[0]], tree_path[1:], duration)

tree = {"children": {}, "duration": 0}
for tree_path in tree_paths_to_include:
add_to_tree(tree, tree_path, tree_path_to_sum_duration[tree_path])

def print_tree(tree, padding):
for child, child_tree in tree["children"].items():
print(padding + child, child_tree["duration"])
print_tree(child_tree, padding + " ")

# handy for debug
# print_tree(tree,"")

# subtract children
def subtract_duration(tree):
if len(tree["children"]) == 0:
return tree["duration"]
else:
children_duration = 0
for child, child_tree in tree["children"].items():
children_duration += subtract_duration(child_tree)

tree["duration"] -= children_duration
return tree["duration"] + children_duration

subtract_duration(tree)

# collect result
result = []

def collect(tree, current_tree_path):
if current_tree_path:
result.append((current_tree_path[:], tree["duration"]))
for child, child_tree in tree["children"].items():
collect(child_tree, current_tree_path + [child])

collect(tree, [])

return result

Expand Down Expand Up @@ -163,14 +175,27 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
cpp_compilation_times = []
total_compilation_time = 0.0

tree_paths = []

for duration, path, time_trace_path in result:
splitted = path.split(os.sep)
chunks = list(zip(splitted, (len(splitted) - 1) * ["dir"] + ["cpp"]))
add_to_tree(chunks, int(duration * 1000), tree)
chunks = ["/"] + chunks
cpp_tree_path = [[chunk, "dir", 0] for chunk in splitted]
cpp_tree_path[-1][1] = "cpp"

cpp_tree_path_fixed_duration = copy.deepcopy(cpp_tree_path)
cpp_tree_path_fixed_duration[-1][2] = duration * 1000

include_tree = build_include_tree(time_trace_path, build_output_dir, base_src_dir)

for inc_path, inc_duration in include_tree:
additional_chunks = list(zip(inc_path, "h" * len(inc_path)))
add_to_tree(chunks + additional_chunks, inc_duration / 1000, tree)
include_tree_path = [[chunk, "h", 0] for chunk in inc_path]
include_tree_path[-1][2] = inc_duration / 1000
cpp_tree_path_fixed_duration[-1][2] -= include_tree_path[-1][2]
tree_paths.append(cpp_tree_path + include_tree_path)

tree_paths.append(cpp_tree_path_fixed_duration)
print("{} -> {:.2f}s".format(path, duration))
cpp_compilation_times.append(
{
Expand All @@ -179,6 +204,12 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
}
)
total_compilation_time += duration
types = [
("h", "Header", "#66C2A5"),
("cpp", "Cpp", "#FC8D62"),
("dir", "Dir", "#8DA0CB"),
]
tree_map.generate_tree_map_html(result_dir, tree_paths, unit_name="ms", factor=1, types=types)

os.makedirs(result_dir, exist_ok=True)

Expand All @@ -190,11 +221,6 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
with open(os.path.join(result_dir, "output.json"), "w") as f:
json.dump(human_readable_output, f, indent=4)

propogate_area(tree)
enrich_names_with_sec(tree)

return tree


def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, str]], dict]:
print("Processing includes in {}".format(trace_path))
Expand Down Expand Up @@ -310,14 +336,16 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
tree = {}

headers_compile_duration = []

tree_paths = []
for duration, cnt, path in result:
path_chunks = path.split(os.sep)
path_chunks[-1] = path_chunks[-1] + " (total {} times)".format(cnt)
path_chunks_count = len(path_chunks)
chunks = list(zip(path_chunks, (path_chunks_count - 1) * ["dir"] + ["h"]))
add_to_tree(chunks, int(duration * 1000), tree)
tree_path = [[chunk, "dir", 0] for chunk in path_chunks]
tree_path[-1][1] = "h"
tree_path[-1][2] = duration * 1000
print("{} -> {:.2f}s (aggregated {} times)".format(path, duration, cnt))
if duration > HEADER_COMPILE_TIME_TO_SHOW:
tree_paths.append(tree_path)
headers_compile_duration.append(
{
"path": path,
Expand All @@ -326,6 +354,13 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
}
)

types = [
("h", "Header", "#66C2A5"),
("cpp", "Cpp", "#FC8D62"),
("dir", "Dir", "#8DA0CB"),
]
tree_map.generate_tree_map_html(result_dir, tree_paths, unit_name="ms", factor=1, types=types)

time_breakdown = {}

for path in total_time_breakdown:
Expand All @@ -352,10 +387,6 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
with open(os.path.join(result_dir, "output.json"), "w") as f:
json.dump(human_readable_output, f, indent=4)

propogate_area(tree)
enrich_names_with_sec(tree)

return tree


def parse_args():
Expand Down Expand Up @@ -391,44 +422,19 @@ def parse_args():
def main():
args = parse_args()

actions = []

if args.html_dir_cpp:
actions.append(("cpp build time impact", generate_cpp_bloat, args.html_dir_cpp))

if args.html_dir_cpp:
actions.append(("header build time impact", generate_header_bloat, args.html_dir_headers))

current_script_dir = os.path.dirname(os.path.realpath(__file__))
base_src_dir = os.path.normpath(os.path.join(current_script_dir, "../../.."))
# check we a in root of source tree
assert os.path.isfile(os.path.join(base_src_dir, "AUTHORS"))
html_dir = os.path.join(current_script_dir, "html")

for description, fn, output_path in actions:
print("Performing '{}'".format(description))
tree = fn(args.build_dir, output_path, base_src_dir)

env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined)
types = [
("h", "Header", "#66C2A5"),
("cpp", "Cpp", "#FC8D62"),
("dir", "Dir", "#8DA0CB"),
]
file_names = os.listdir(html_dir)
os.makedirs(output_path, exist_ok=True)
for file_name in file_names:
data = env.get_template(file_name).render(types=types)

dst_path = os.path.join(output_path, file_name)
with open(dst_path, "w") as f:
f.write(data)

with open(os.path.join(output_path, "bloat.json"), "w") as f:
f.write("var kTree = ")
json.dump(tree, f, indent=4)

print("Done '{}'".format(description))


if args.html_dir_cpp:
generate_cpp_bloat(args.build_dir, args.html_dir_cpp, base_src_dir)
print("Done '{}'".format("cpp build time impact"))
if args.html_dir_headers:
generate_header_bloat(args.build_dir, args.html_dir_headers, base_src_dir)
print("Done '{}'".format("header build time impact"))



if __name__ == "__main__":
Expand Down
Loading
Loading