diff --git a/ydb/ci/build_bloat/main.py b/ydb/ci/build_bloat/main.py index a7134bf2efd3..58183ea5ecff 100755 --- a/ydb/ci/build_bloat/main.py +++ b/ydb/ci/build_bloat/main.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 import argparse +import copy import json from functools import partial import os from concurrent.futures import ProcessPoolExecutor -from jinja2 import Environment, FileSystemLoader, StrictUndefined +import tree_map HEADER_COMPILE_TIME_TO_SHOW = 0.5 # sec @@ -57,43 +58,6 @@ def get_compile_duration_and_cpp_path(time_trace_path: str) -> tuple[float, str, return duration_us / 1e6, cpp_file, time_trace_path -def add_to_tree(chunks: list[tuple[str, str]], value: int, tree: dict) -> None: - tree["name"] = chunks[0][0] - tree["type"] = chunks[0][1] - if len(chunks) == 1: - tree["size"] = value - else: - if "children" not in tree: - tree["children"] = [] - for child_ in tree["children"]: - if child_["name"] == chunks[1][0]: - child = child_ - break - - else: - child = {"name": chunks[1][0]} - tree["children"].append(child) - add_to_tree(chunks[1:], value, child) - - -def propogate_area(tree): - area = 0 - for child_ in tree.get("children", []): - propogate_area(child_) - area += child_["size"] - - if "size" not in tree: - tree["size"] = area - - -def enrich_names_with_sec(tree): - area = 0 - for child_ in tree.get("children", []): - enrich_names_with_sec(child_) - - tree["name"] = tree["name"] + " " + "{:_} ms".format(tree["size"]) - - def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> list: with open(path) as f: obj = json.load(f) @@ -110,26 +74,74 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l include_events.sort(key=lambda event: (event[0], -event[1])) - path_to_time = {} - current_includes_stack = [] # stack - last_time_stamp = None - - result = [] + tree_path_to_sum_duration = {} + current_includes_stack = [] for time_stamp, ev, path, duration in include_events: - if current_includes_stack: - last_path = current_includes_stack[-1] - prev = path_to_time.get(last_path, 0) - path_to_time[last_path] = prev + (time_stamp - last_time_stamp) / 1000 / 1000 - if ev == 1: current_includes_stack.append(sanitize_path(path, base_src_dir)) - if duration > HEADER_COMPILE_TIME_TO_SHOW * 1000 * 1000: - result.append((current_includes_stack[:], duration)) + tree_path = tuple(current_includes_stack) + prev = tree_path_to_sum_duration.get(tree_path, 0) + tree_path_to_sum_duration[tree_path] = prev + duration else: assert current_includes_stack[-1] == sanitize_path(path, base_src_dir) current_includes_stack.pop() - last_time_stamp = time_stamp + + # filter small entities + tree_paths_to_include = set() + result = [] + for tree_path, duration in tree_path_to_sum_duration.items(): + if duration > HEADER_COMPILE_TIME_TO_SHOW * 1000 * 1000: + for i in range(1, len(tree_path) + 1): + tree_paths_to_include.add(tree_path[:i]) + + def add_to_tree(tree, tree_path, duration): + if len(tree_path) == 0: + tree["duration"] += duration + else: + if tree_path[0] not in tree["children"]: + tree["children"][tree_path[0]] = { + "duration": 0, + "children": {}, + } + add_to_tree(tree["children"][tree_path[0]], tree_path[1:], duration) + + tree = {"children": {}, "duration": 0} + for tree_path in tree_paths_to_include: + add_to_tree(tree, tree_path, tree_path_to_sum_duration[tree_path]) + + def print_tree(tree, padding): + for child, child_tree in tree["children"].items(): + print(padding + child, child_tree["duration"]) + print_tree(child_tree, padding + " ") + + # handy for debug + # print_tree(tree,"") + + # subtract children + def subtract_duration(tree): + if len(tree["children"]) == 0: + return tree["duration"] + else: + children_duration = 0 + for child, child_tree in tree["children"].items(): + children_duration += subtract_duration(child_tree) + + tree["duration"] -= children_duration + return tree["duration"] + children_duration + + subtract_duration(tree) + + # collect result + result = [] + + def collect(tree, current_tree_path): + if current_tree_path: + result.append((current_tree_path[:], tree["duration"])) + for child, child_tree in tree["children"].items(): + collect(child_tree, current_tree_path + [child]) + + collect(tree, []) return result @@ -163,14 +175,27 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str cpp_compilation_times = [] total_compilation_time = 0.0 + tree_paths = [] + for duration, path, time_trace_path in result: splitted = path.split(os.sep) chunks = list(zip(splitted, (len(splitted) - 1) * ["dir"] + ["cpp"])) - add_to_tree(chunks, int(duration * 1000), tree) + chunks = ["/"] + chunks + cpp_tree_path = [[chunk, "dir", 0] for chunk in splitted] + cpp_tree_path[-1][1] = "cpp" + + cpp_tree_path_fixed_duration = copy.deepcopy(cpp_tree_path) + cpp_tree_path_fixed_duration[-1][2] = duration * 1000 + include_tree = build_include_tree(time_trace_path, build_output_dir, base_src_dir) + for inc_path, inc_duration in include_tree: - additional_chunks = list(zip(inc_path, "h" * len(inc_path))) - add_to_tree(chunks + additional_chunks, inc_duration / 1000, tree) + include_tree_path = [[chunk, "h", 0] for chunk in inc_path] + include_tree_path[-1][2] = inc_duration / 1000 + cpp_tree_path_fixed_duration[-1][2] -= include_tree_path[-1][2] + tree_paths.append(cpp_tree_path + include_tree_path) + + tree_paths.append(cpp_tree_path_fixed_duration) print("{} -> {:.2f}s".format(path, duration)) cpp_compilation_times.append( { @@ -179,6 +204,12 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str } ) total_compilation_time += duration + types = [ + ("h", "Header", "#66C2A5"), + ("cpp", "Cpp", "#FC8D62"), + ("dir", "Dir", "#8DA0CB"), + ] + tree_map.generate_tree_map_html(result_dir, tree_paths, unit_name="ms", factor=1, types=types) os.makedirs(result_dir, exist_ok=True) @@ -190,11 +221,6 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str with open(os.path.join(result_dir, "output.json"), "w") as f: json.dump(human_readable_output, f, indent=4) - propogate_area(tree) - enrich_names_with_sec(tree) - - return tree - def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, str]], dict]: print("Processing includes in {}".format(trace_path)) @@ -310,14 +336,16 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: tree = {} headers_compile_duration = [] - + tree_paths = [] for duration, cnt, path in result: path_chunks = path.split(os.sep) path_chunks[-1] = path_chunks[-1] + " (total {} times)".format(cnt) - path_chunks_count = len(path_chunks) - chunks = list(zip(path_chunks, (path_chunks_count - 1) * ["dir"] + ["h"])) - add_to_tree(chunks, int(duration * 1000), tree) + tree_path = [[chunk, "dir", 0] for chunk in path_chunks] + tree_path[-1][1] = "h" + tree_path[-1][2] = duration * 1000 print("{} -> {:.2f}s (aggregated {} times)".format(path, duration, cnt)) + if duration > HEADER_COMPILE_TIME_TO_SHOW: + tree_paths.append(tree_path) headers_compile_duration.append( { "path": path, @@ -326,6 +354,13 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: } ) + types = [ + ("h", "Header", "#66C2A5"), + ("cpp", "Cpp", "#FC8D62"), + ("dir", "Dir", "#8DA0CB"), + ] + tree_map.generate_tree_map_html(result_dir, tree_paths, unit_name="ms", factor=1, types=types) + time_breakdown = {} for path in total_time_breakdown: @@ -352,10 +387,6 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: with open(os.path.join(result_dir, "output.json"), "w") as f: json.dump(human_readable_output, f, indent=4) - propogate_area(tree) - enrich_names_with_sec(tree) - - return tree def parse_args(): @@ -391,44 +422,19 @@ def parse_args(): def main(): args = parse_args() - actions = [] - - if args.html_dir_cpp: - actions.append(("cpp build time impact", generate_cpp_bloat, args.html_dir_cpp)) - - if args.html_dir_cpp: - actions.append(("header build time impact", generate_header_bloat, args.html_dir_headers)) - current_script_dir = os.path.dirname(os.path.realpath(__file__)) base_src_dir = os.path.normpath(os.path.join(current_script_dir, "../../..")) # check we a in root of source tree assert os.path.isfile(os.path.join(base_src_dir, "AUTHORS")) - html_dir = os.path.join(current_script_dir, "html") - - for description, fn, output_path in actions: - print("Performing '{}'".format(description)) - tree = fn(args.build_dir, output_path, base_src_dir) - - env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined) - types = [ - ("h", "Header", "#66C2A5"), - ("cpp", "Cpp", "#FC8D62"), - ("dir", "Dir", "#8DA0CB"), - ] - file_names = os.listdir(html_dir) - os.makedirs(output_path, exist_ok=True) - for file_name in file_names: - data = env.get_template(file_name).render(types=types) - - dst_path = os.path.join(output_path, file_name) - with open(dst_path, "w") as f: - f.write(data) - - with open(os.path.join(output_path, "bloat.json"), "w") as f: - f.write("var kTree = ") - json.dump(tree, f, indent=4) - - print("Done '{}'".format(description)) + + + if args.html_dir_cpp: + generate_cpp_bloat(args.build_dir, args.html_dir_cpp, base_src_dir) + print("Done '{}'".format("cpp build time impact")) + if args.html_dir_headers: + generate_header_bloat(args.build_dir, args.html_dir_headers, base_src_dir) + print("Done '{}'".format("header build time impact")) + if __name__ == "__main__": diff --git a/ydb/ci/build_bloat/template_bloat.py b/ydb/ci/build_bloat/template_bloat.py index 19892a07395a..eb47e8583e7f 100755 --- a/ydb/ci/build_bloat/template_bloat.py +++ b/ydb/ci/build_bloat/template_bloat.py @@ -1,30 +1,46 @@ #!/usr/bin/env python3 import argparse import json -import os import sys -from jinja2 import Environment, FileSystemLoader, StrictUndefined +import tree_map THRESHHOLD_TO_SHOW_ON_TREE_VIEW = 1024*10 def remove_brackets(name, b1, b2): inside_template = 0 - final_name = "" - for c in name: + final_name_builder = [] + pos = 0 + while pos != len(name): + pos_next_b1 = name.find(b1, pos) + pos_next_b2 = name.find(b2, pos) + + pos_next = pos_next_b1 + if pos_next == -1: + pos_next = pos_next_b2 + elif pos_next_b2 != -1 and pos_next_b2 < pos_next: + pos_next = pos_next_b2 + + c = name[pos_next] + if c == b1: inside_template += 1 if inside_template == 1: - final_name += c + final_name_builder.append(name[pos:pos_next]) + elif c == b2: inside_template -= 1 if inside_template == 0: - final_name += c + final_name_builder.append(c) else: - if inside_template: - continue - final_name += c - return final_name + if inside_template == 0: + final_name_builder.append(name[pos:pos_next]) + + if pos_next == -1: + break + pos = pos_next + 1 + + return "".join(final_name_builder) def get_aggregation_key(name): final_name = name @@ -89,51 +105,9 @@ def print_stat(f, d): for s in sorted(p[2]): print(" " + s, file=f) - -def add_to_tree(tree, path, value, count): - tree["name"] = path[0] - if "children" not in tree: - tree["children"] = {} - if len(path) == 1: - # paths can be the same, but return value differs - # assert "size" not in tree - if "size" not in tree: - tree["size"] = 0 - tree["size"] += value - tree["type"] = "function" - tree["count"] = count - else: - tree["type"] = "namespace" - if path[1] not in tree["children"]: - tree["children"][path[1]] = {} - add_to_tree(tree["children"][path[1]], path[1:], value, count) - -def children_to_list(tree): - if "children" not in tree: - return - tree["children"] = list(tree["children"].values()) - for child in tree["children"]: - children_to_list(child) - -def propogate_size(tree): - if "size" not in tree: - tree["size"] = 0 - for child in tree.get("children", []): - tree["size"] += propogate_size(child) - return tree["size"] - -def enrich_names_with_sec(tree): - area = 0 - for child_ in tree.get("children", []): - enrich_names_with_sec(child_) - - tree["name"] = tree["name"] + " " + "{:_} KiB".format(int(tree["size"]/1024)) - if "count" in tree: - tree["name"] += ", {} times".format(tree["count"]) - -def build_tree(items): - tree = {} +def get_tree_paths(items): total_size = 0 + paths_to_add = [] for name, (size, count, obj_files, avg, min, max) in items: # we skip small entities to order to make html view usable if size < THRESHHOLD_TO_SHOW_ON_TREE_VIEW: @@ -161,13 +135,12 @@ def build_tree(items): root_name = "root (all function less than {} KiB are ommited)".format(THRESHHOLD_TO_SHOW_ON_TREE_VIEW // 1024) path = [root_name] + path - - add_to_tree(tree, path, size, count) - children_to_list(tree) - propogate_size(tree) - enrich_names_with_sec(tree) - print("Total size =", total_size) - return tree + path_with_info = [[chunk, "namespace", 0] for chunk in path] + path_with_info[-1][1] = "function" + path_with_info[-1][2] = size + path_with_info[-1][0] += ", {} times".format(count) + paths_to_add.append(path_with_info) + return paths_to_add def parse_args(): @@ -194,7 +167,6 @@ def parse_args(): ) return parser.parse_args() - def main(): options = parse_args() json_path = options.bloat_json @@ -214,28 +186,12 @@ def main(): if options.html_template_bloat: output_dir = options.html_template_bloat - current_script_dir = os.path.dirname(os.path.realpath(__file__)) - html_dir = os.path.join(current_script_dir, "html") - - tree = build_tree(items) - - env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined) + tree_paths = get_tree_paths(items) types = [ ("namespace", "Namespace", "#66C2A5"), ("function", "Function", "#FC8D62"), ] - file_names = os.listdir(html_dir) - os.makedirs(output_dir, exist_ok=True) - for file_name in file_names: - data = env.get_template(file_name).render(types=types) - - dst_path = os.path.join(output_dir, file_name) - with open(dst_path, "w") as f: - f.write(data) - - with open(os.path.join(output_dir, "bloat.json"), "w") as f: - f.write("kTree = ") - json.dump(tree, f, indent=4) + tree_map.generate_tree_map_html(output_dir, tree_paths, unit_name="KiB", factor=1.0/1024, types=types) return 0 diff --git a/ydb/ci/build_bloat/tree_map.py b/ydb/ci/build_bloat/tree_map.py new file mode 100755 index 000000000000..fc02b1ca6d92 --- /dev/null +++ b/ydb/ci/build_bloat/tree_map.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import json +import os + +from jinja2 import Environment, FileSystemLoader, StrictUndefined + +def _add_to_tree(tree, path): + current_name, current_type, current_size = path[0] + tree["name"] = current_name + if "children" not in tree: + tree["children"] = {} + if "size" not in tree: + tree["size"] = 0 + + tree["size"] += current_size + tree["type"] = current_type + + if len(path) == 1: + # paths can be the same, but return value differs + # assert "size" not in tree + pass + else: + next_name = path[1][0] + if next_name not in tree["children"]: + tree["children"][next_name] = {} + _add_to_tree(tree["children"][next_name], path[1:]) + +def _children_to_list(tree): + if "children" not in tree: + return + tree["children"] = list(tree["children"].values()) + for child in tree["children"]: + _children_to_list(child) + +def _propogate_size(tree): + for child in tree.get("children", []): + tree["size"] += _propogate_size(child) + return tree["size"] + +def _intify_size(tree): + for child in tree.get("children", []): + _intify_size(child) + tree["size"] = int(tree["size"]) + +def _enrich_names_with_units(tree, unit_name, factor): + for child_ in tree.get("children", []): + _enrich_names_with_units(child_, unit_name, factor) + + tree["name"] = tree["name"] + ", {:_} {}".format(int(tree["size"]*factor), unit_name) + +def _build_tree_map(paths_to_add, unit_name, factor): + tree = {} + for path in paths_to_add: + _add_to_tree(tree, path) + _children_to_list(tree) + _propogate_size(tree) + _intify_size(tree) + _enrich_names_with_units(tree, unit_name, factor) + return tree + + +def generate_tree_map_html(output_dir: str, tree_paths: list[tuple[str, str, int]], unit_name: str, factor: float, types: list[tuple[str, str, str]]): + current_script_dir = os.path.dirname(os.path.realpath(__file__)) + html_dir = os.path.join(current_script_dir, "html") + + tree = _build_tree_map(tree_paths, unit_name, factor) + + env = Environment(loader=FileSystemLoader(html_dir), undefined=StrictUndefined) + file_names = os.listdir(html_dir) + os.makedirs(output_dir, exist_ok=True) + for file_name in file_names: + data = env.get_template(file_name).render(types=types) + + dst_path = os.path.join(output_dir, file_name) + with open(dst_path, "w") as f: + f.write(data) + + with open(os.path.join(output_dir, "bloat.json"), "w") as f: + f.write("kTree = ") + json.dump(tree, f, indent=4)