Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BenchGC] add tuner tools for benchgc #358

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
7 changes: 7 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ declare_mlir_python_extension(GcPythonSources.CpuInfoExtension
CPUInfo.cpp
)

declare_mlir_python_extension(GcPythonSources.ToolsExtension
MODULE_NAME _tools
ADD_TO_PARENT GcPythonSources
SOURCES
Tools.cpp
)

################################################################################
# Common CAPI
################################################################################
Expand Down
40 changes: 40 additions & 0 deletions python/Tools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/

#include "gc/Analysis/MatmulConfigAnalysis.h"
#include "mlir/Bindings/Python/PybindAdaptors.h"

PYBIND11_MODULE(_tools, m) {

m.def(
"validate_matmul_config",
[](const std::vector<uint32_t> &cfg_list, std::vector<uint32_t> &shape,
bool allow_indivisible_innerblock, bool is_vnni_mm2d) {
if (cfg_list.size() != 9) {
throw std::invalid_argument("cfg_list must have exactly 9 elements");
}
mlir::gc::MatmulConfig cfg{cfg_list[0], cfg_list[1], cfg_list[2],
cfg_list[3], cfg_list[4], cfg_list[5],
cfg_list[6], cfg_list[7], cfg_list[8]};
return mlir::gc::validateConfig(
cfg, shape, allow_indivisible_innerblock, is_vnni_mm2d);
},
py::arg("cfg_list"), py::arg("shape"),
py::arg("allow_indivisible_innerblock"), py::arg("is_vnni_mm2d"),
"Validate the matmul configuration");
}
2 changes: 2 additions & 0 deletions python/gc_mlir/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===-----------------------------------------------------------------------===#

from .._mlir_libs._tools import validate_matmul_config
160 changes: 80 additions & 80 deletions scripts/correctness.sh

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/benchgc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ add_subdirectory("src/benchgc/tensor")
add_subdirectory("src/benchgc/arith")
add_subdirectory("src/benchgc/pattern")
add_subdirectory("src/benchgc/math")
add_subdirectory("src/benchgc/tuner")
4 changes: 4 additions & 0 deletions test/benchgc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ python -m benchgc [OPTIONS] --mode [MODE] --driver [DRIVER] --case [CASE]
### --mode [str]
* C : correctness testing (by default)
* P : performance testing
* T : performance tuning, see tuner [`README.md`](src/benchgc/tuner/README.md)

### --driver [str]
* linalg: test the single op in linalg dialect
Expand Down Expand Up @@ -138,12 +139,15 @@ module {
### --bench_kind [str]
* py : use the MLIR Python API to invoke the kernel and use Python to calculate the time cost
* wrapper : modify MLIR by wrapping the kernel into a new method and calling the `nanoTime()` method before and after calling the kernel. Finally, calculate the difference as the time cost
* default: `py`

### --warm_up [int]
* warm-up times of the execution
* default: 100

### --repeat [int]
* repeat times of the execution
* default: 100

## Pattern Options
Each pattern has its own unique options.
Expand Down
126 changes: 120 additions & 6 deletions test/benchgc/src/benchgc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@
set_default_fill,
)
from benchgc.arg.arg import Arg
from benchgc.bench import mlir_wrapper_bench, py_timeit_bench
from benchgc.bench import (
batch_mlir_wrapper_bench,
batch_py_timeit_bench,
mlir_wrapper_bench,
py_timeit_bench,
)
from benchgc.mlir.arg import get_mlir_args
from benchgc.pattern import get_pattern_clz
from benchgc.tuner.tuner import GATuner, GridTuner, Tuner, TuningSpace
from gc_mlir import ir
from gc_mlir.graph_compiler import GraphCompiler

Expand All @@ -44,7 +50,7 @@ def add_common_options(parser: argparse.ArgumentParser):
"--mode",
required=False,
help="specify the test mode, C for correctness testing, P for performance testing",
choices=["C", "P"],
choices=["C", "P", "T"],
default="C",
type=str,
)
Expand Down Expand Up @@ -198,13 +204,20 @@ def add_common_options(parser: argparse.ArgumentParser):

def add_bench_options(parser: argparse.ArgumentParser):
"""add options for bench mode"""
if parser.parse_known_args()[0].mode == "P":
if parser.parse_known_args()[0].mode in ("P", "T"):
parser.add_argument(
"--bench_kind", type=str, choices=["py", "wrapper"], default="py"
)
parser.add_argument("--warm_up", type=int, default=100)
parser.add_argument("--repeat", type=int, default=100)

parser.add_argument(
"--warm_up",
type=int,
default=100 if parser.parse_known_args()[0].mode == "P" else 2,
)
parser.add_argument(
"--repeat",
type=int,
default=100 if parser.parse_known_args()[0].mode == "P" else 4,
)


def add_pattern_options(parser: argparse.ArgumentParser):
Expand All @@ -213,6 +226,45 @@ def add_pattern_options(parser: argparse.ArgumentParser):
pattern_name = parser.parse_known_args()[0].case
get_pattern_clz(pattern_name).add_args(parser)

def add_tuner_options(parser: argparse.ArgumentParser):
"""add options for the mode T"""
if parser.parse_known_args()[0].mode == "T":
parser.add_argument(
"--search_alg", type=str, choices=["grid", "ga"], default="grid"
)
parser.add_argument(
"--tuning_batch", type=int, default=Tuner.DEFAULT_BATCH_SIZE
)
parser.add_argument("--early_stop", type=int, default=Tuner.DEFAULT_EARLY_STOP)
parser.add_argument(
"--max_tuning_iters", type=int, default=Tuner.DEFAULT_MAX_ITERS
)
parser.add_argument("--timeout", type=int, default=Tuner.DEFAULT_TIMEOUT)
parser.add_argument(
"--space_percent", type=float, default=TuningSpace.DEFAULT_SPACE_PERCENT
)
parser.add_argument(
"--tuner_verbose",
action="store_true",
help="if we need print the tuner log",
)
parser.add_argument("--checkpoint_path", type=str, default="")

if parser.parse_known_args()[0].search_alg == "ga":
parser.add_argument(
"--ga_random_seed", type=int, default=GATuner.DEFAULT_RANDOM_SEED
)
parser.add_argument(
"--ga_elite_num", type=int, default=GATuner.DEFAULT_ELITE_NUM
)
parser.add_argument(
"--ga_mutation_prob", type=float, default=GATuner.DEFAULT_MUTATION_PROB
)
parser.add_argument(
"--ga_expected_tune_num",
type=int,
default=GATuner.DEFAULT_EXPECTED_TUNE_NUM,
)

def get_module_and_args(flags: argparse.Namespace):
args: List[Arg] = []
Expand Down Expand Up @@ -391,17 +443,79 @@ def performance_testing(flags: argparse.Namespace, module: ir.Module, args: List
print(json_res)


def performance_tuning(flags: argparse.Namespace, module: ir.Module, args: List[Arg]):
gc_args: List[torch.Tensor | int] = []
gc_tensors: Dict[str, torch.Tensor] = {}
for i in range(len(args)):
tensor = fill_tensor(flags, args[i], i)
gc_tensors["%arg" + str(i)] = tensor
if args[i].scalar:
gc_args.append(tensor.data_ptr())
else:
gc_args.append(tensor)

mlir_args = get_mlir_args(gc_args)
with module.context as ctx, ir.Location.unknown():
if flags.ir_printing:
ctx.enable_multithreading(False)
batch_bench = (
batch_py_timeit_bench
if flags.bench_kind == "py"
else batch_mlir_wrapper_bench
)

def tuner_batch_bench(ir_moudles):
return batch_bench(
ir_moudles,
flags.entry,
"any(gc-cpu-pipeline)",
mlir_args,
flags.ir_printing,
flags.repeat,
flags.warm_up,
)

assert flags.space_percent > 0 and flags.space_percent <= 1.0
space = TuningSpace(module, flags.space_percent)
if flags.search_alg == "grid":
tuner = GridTuner(
tuner_batch_bench,
space,
flags.tuning_batch,
flags.early_stop,
flags.checkpoint_path,
flags.tuner_verbose,
)
else:
tuner = GATuner(
tuner_batch_bench,
space,
flags.tuning_batch,
flags.early_stop,
flags.checkpoint_path,
flags.tuner_verbose,
flags.ga_elite_num,
flags.ga_mutation_prob,
random_seed=flags.ga_random_seed,
expected_tune_num=flags.ga_expected_tune_num,
)
tuner.run(flags.max_tuning_iters, flags.timeout)


if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(prog="benchmark tool for graph compiler")
add_common_options(arg_parser)
add_bench_options(arg_parser)
add_pattern_options(arg_parser)
add_tuner_options(arg_parser)
flags = arg_parser.parse_args()
benchgc.util.set_seed(flags.seed)
ir_module, module_args = get_module_and_args(flags)
if flags.mode == "C":
correctness_testing(flags, ir_module, module_args)
elif flags.mode == "P":
performance_testing(flags, ir_module, module_args)
elif flags.mode == "T":
performance_tuning(flags, ir_module, module_args)
else:
pass
11 changes: 6 additions & 5 deletions test/benchgc/src/benchgc/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,22 @@ def batch_py_timeit_bench(
ir_modules: List[ir.Module],
entry_name: str,
pipeline: str,
mlir_args: list,
mlir_args: List[Any],
ir_printing=False,
repeat_time=5,
warm_up=2,
) -> List[Tuple[float, float]]:
"""benchmark a batch of mlir with python timeit."""
compiler = GraphCompiler(pipeline)
engines = []
funcs = []
compile_costs = []
for m in ir_modules:
compile_begin = timeit.default_timer()
engine = compiler.compile_and_jit(m, ir_printing=ir_printing)
engines.append(engine)
compile_cost = (timeit.default_timer() - compile_begin) * 1000
compile_costs.append(compile_cost)
funcs.append(engine.lookup(entry_name))

# Copied from execution_engine.py so that the cost of cast does not affect perf result.
packed_args = (ctypes.c_void_p * len(mlir_args))()
Expand All @@ -141,11 +142,11 @@ def batch_py_timeit_bench(
def run_bench(func, arg):
func(arg)

for func in funcs:
for func in [engine.lookup(entry_name) for engine in engines]:
timeit.timeit(lambda: run_bench(func, packed_args), number=warm_up)

execute_costs = []
for func in funcs:
for func in [engine.lookup(entry_name) for engine in engines]:
total_time = timeit.timeit(
lambda: run_bench(func, packed_args), number=repeat_time
)
Expand All @@ -158,7 +159,7 @@ def batch_mlir_wrapper_bench(
ir_modules: ir.Module,
entry_name: str,
pipeline: str,
mlir_args: list,
mlir_args: List[Any],
ir_printing=False,
repeat_time=5,
warm_up=2,
Expand Down
22 changes: 22 additions & 0 deletions test/benchgc/src/benchgc/tuner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################


file(GLOB PYTHON_SCRIPTS "*.py")
foreach(PY_SCRIPT ${PYTHON_SCRIPTS})
configure_file(${PY_SCRIPT} ${CMAKE_BINARY_DIR}/test/benchgc/src/benchgc/tuner/ COPYONLY)
endforeach()
Loading
Loading