From 8e5cf9c977d756b1a166584d24de8fd6917ab2a9 Mon Sep 17 00:00:00 2001 From: Djordje Ramic Date: Tue, 10 Dec 2024 17:20:30 +0000 Subject: [PATCH] Addressing review comments - 1 --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 12 +- .../Rock/Tuning/GridwiseGemmParams.cpp | 4 +- .../performance/analysis/quickTuningGen.py | 150 +++++++++--------- 3 files changed, 85 insertions(+), 81 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 87108f23834b..54042cfc83ce 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -160,7 +160,7 @@ const InitParamsAccel PopulateParamsXDL::initParametersFp16Conv[PopulateParamsXD // END_CONV_XDL_f16_DEFS // BEGIN_GEMM_XDL_i8_DEFS -const InitParamsAccel PopulateParamsXDL::initParametersForward8BitGemm[PopulateParamsXDL::nInitParametersForward8BitGemm] = { +const InitParamsAccel PopulateParamsXDL::initParametersI8Gemm[PopulateParamsXDL::nInitParametersI8Gemm] = { {64,64,16,32,16,4,1,true,true}, {64,128,8,32,16,8,1,true,true}, {32,64,8,16,16,16,1,true,true}, @@ -223,8 +223,8 @@ static const InitParamsAccel initParametersFp16Conv[nInitParametersFp16Conv]; // END_CONV_XDL_f16_DECS // BEGIN_GEMM_XDL_i8_DECS -static constexpr size_t nInitParametersForward8BitGemm = 14; -static const InitParamsAccel initParametersForward8BitGemm[nInitParametersForward8BitGemm]; +static constexpr size_t nInitParametersI8Gemm = 14; +static const InitParamsAccel initParametersI8Gemm[nInitParametersI8Gemm]; // END_GEMM_XDL_i8_DECS // BEGIN_CONV_XDL_i8_DECS @@ -290,7 +290,7 @@ const InitParamsAccel PopulateParamsWmma::initParametersFp16Conv[PopulateParamsW // END_CONV_Wmma_f16_DEFS // BEGIN_GEMM_Wmma_i8_DEFS -const InitParamsAccel PopulateParamsWmma::initParametersForward8BitGemm[PopulateParamsWmma::nInitParametersForward8BitGemm] = { +const InitParamsAccel PopulateParamsWmma::initParametersI8Gemm[PopulateParamsWmma::nInitParametersI8Gemm] = { {128,64,8,32,64,16,1,true,true}, {64,128,8,64,32,16,1,true,true}, {128,128,4,64,64,16,1,true,true}, @@ -340,8 +340,8 @@ static const InitParamsAccel initParametersFp16Conv[nInitParametersFp16Conv]; // END_CONV_Wmma_f16_DECS // BEGIN_GEMM_Wmma_i8_DECS -static constexpr size_t nInitParametersForward8BitGemm = 15; -static const InitParamsAccel initParametersForward8BitGemm[nInitParametersForward8BitGemm]; +static constexpr size_t nInitParametersI8Gemm = 15; +static const InitParamsAccel initParametersI8Gemm[nInitParametersI8Gemm]; // END_GEMM_Wmma_i8_DECS // BEGIN_CONV_Wmma_i8_DECS diff --git a/mlir/lib/Dialect/Rock/Tuning/GridwiseGemmParams.cpp b/mlir/lib/Dialect/Rock/Tuning/GridwiseGemmParams.cpp index 5e1342cf1831..6f17ade10227 100644 --- a/mlir/lib/Dialect/Rock/Tuning/GridwiseGemmParams.cpp +++ b/mlir/lib/Dialect/Rock/Tuning/GridwiseGemmParams.cpp @@ -547,7 +547,7 @@ PopulateParamsXDL::getTuningParameters(KernelType opType, Type dataTypeA, if (opType == KernelType::Gemm) { switch (dataTypeA.getIntOrFloatBitWidth()) { case 8: - params = {initParametersForward8BitGemm, nInitParametersForward8BitGemm}; + params = {initParametersI8Gemm, nInitParametersI8Gemm}; break; case 16: params = {initParametersFp16Gemm, nInitParametersFp16Gemm}; @@ -715,7 +715,7 @@ PopulateParamsWmma::getTuningParameters(KernelType opType, Type dataTypeA, if (opType == KernelType::Gemm) { switch (dataTypeA.getIntOrFloatBitWidth()) { case 8: - params = {initParametersForward8BitGemm, nInitParametersForward8BitGemm}; + params = {initParametersI8Gemm, nInitParametersI8Gemm}; break; case 16: params = {initParametersFp16Gemm, nInitParametersFp16Gemm}; diff --git a/mlir/utils/performance/analysis/quickTuningGen.py b/mlir/utils/performance/analysis/quickTuningGen.py index d8c2da10627d..ecee25b85a2f 100644 --- a/mlir/utils/performance/analysis/quickTuningGen.py +++ b/mlir/utils/performance/analysis/quickTuningGen.py @@ -59,7 +59,7 @@ def replace_section( with open(file_path, 'w') as file: file.write(content) - def isAccel(self, arch, datatype): + def is_accel(self, arch, datatype): instruction_type = self.get_instruction_type(arch, datatype) if instruction_type == "XDL" or instruction_type == "Wmma": return True @@ -97,67 +97,73 @@ def init_inc_file(self, file_path): file.write("// THIS IS AN AUTOGENERATED FILE.\n") file.write("// DO NOT EDIT THIS FILE DIRECTLY!\n\n") file.write("// clang-format off\n") - for instrction_type, datatypes in instruction_types_to_datatypes.items(): - file.write(f"#ifdef {instrction_type}_DEFINITIONS_GEN\n\n") + for instruction_type, datatypes in instruction_types_to_datatypes.items(): + file.write(f"#ifdef {instruction_type}_DEFINITIONS_GEN\n\n") for datatype in datatypes: for marker in markers: file.write( - f"{marker}_{instrction_type}_{datatype}_DEFS\n\n") + f"{marker}_{instruction_type}_{datatype}_DEFS\n\n") file.write(f"#endif\n\n") - file.write(f"#ifdef {instrction_type}_DECLARATIONS_GEN\n\n") + file.write(f"#ifdef {instruction_type}_DECLARATIONS_GEN\n\n") for datatype in datatypes: for marker in markers: file.write( - f"{marker}_{instrction_type}_{datatype}_DECS\n\n") + f"{marker}_{instruction_type}_{datatype}_DECS\n\n") file.write(f"#endif\n\n") - def get_init_params_definitions(self, arch, dtype, op): - """ - Generates initialization parameter definitions for a given data type and operation. - """ - accel_type = 'Accel' if self.isAccel(arch, dtype) else 'NonAccel' - instruction_type = self.get_instruction_type(arch, dtype) - op_cap = op.capitalize() - - if dtype == 'f32': - init_params = f"initParameters{op_cap}" - n_init_params = f"nInitParameters{op_cap}" - if not self.isAccel(arch, dtype): + def get_init_params_definitions(self, arch, dtype, op): + """ + Generates initialization parameter definitions for a given data type and operation. + """ + accel_type = 'Accel' if self.is_accel(arch, dtype) else 'NonAccel' + instruction_type = self.get_instruction_type(arch, dtype) + op_cap = op.capitalize() + + if dtype == 'f32': + init_params = f"initParameters{op_cap}" + n_init_params = f"nInitParameters{op_cap}" + if not self.is_accel(arch, dtype): instruction_type = '' - elif dtype == 'f16': - init_params = f"initParametersFp16{op_cap}" - n_init_params = f"nInitParametersFp16{op_cap}" - elif dtype == 'i8': - init_params = f"initParametersForward8Bit{op_cap}" - n_init_params = f"nInitParametersForward8Bit{op_cap}" - else: - raise ValueError("Unsupported dtype") - - return f"const InitParams{accel_type} PopulateParams{instruction_type}::{init_params}[PopulateParams{instruction_type}::{n_init_params}]" - - def get_init_params_declaration(self, arch, dtype, op): - """ - Generates initialization parameter declarations for a given data type and operation. - """ - op_cap = op.capitalize() - accel_type = 'Accel' if self.isAccel(arch, dtype) else 'NonAccel' - - if dtype == 'f32': - init_params = f"initParameters{op_cap}" - n_init_params = f"nInitParameters{op_cap}" - elif dtype == 'f16': - init_params = f"initParametersFp16{op_cap}" - n_init_params = f"nInitParametersFp16{op_cap}" - elif dtype == 'i8': - init_params = f"initParametersForward8Bit{op_cap}" - n_init_params = f"nInitParametersForward8Bit{op_cap}" - else: - raise ValueError("Unsupported dtype") - - return (f"static const InitParams{accel_type} {init_params}[{n_init_params}]", - f"static constexpr size_t {n_init_params}") - + elif dtype == 'f16': + init_params = f"initParametersFp16{op_cap}" + n_init_params = f"nInitParametersFp16{op_cap}" + elif dtype == 'i8' and op == 'conv': + init_params = f"initParametersForward8Bit{op_cap}" + n_init_params = f"nInitParametersForward8Bit{op_cap}" + elif dtype == 'i8' and op == 'gemm': + init_params = f"initParametersI8{op_cap}" + n_init_params = f"nInitParametersI8{op_cap}" + else: + raise ValueError("Unsupported dtype") + + return f"const InitParams{accel_type} PopulateParams{instruction_type}::{init_params}[PopulateParams{instruction_type}::{n_init_params}]" + + def get_init_params_declaration(self, arch, dtype, op): + """ + Generates initialization parameter declarations for a given data type and operation. + """ + op_cap = op.capitalize() + accel_type = 'Accel' if self.is_accel(arch, dtype) else 'NonAccel' + + if dtype == 'f32': + init_params = f"initParameters{op_cap}" + n_init_params = f"nInitParameters{op_cap}" + elif dtype == 'f16': + init_params = f"initParametersFp16{op_cap}" + n_init_params = f"nInitParametersFp16{op_cap}" + elif dtype == 'i8' and op == 'conv': + init_params = f"initParametersForward8Bit{op_cap}" + n_init_params = f"nInitParametersForward8Bit{op_cap}" + elif dtype == 'i8' and op == 'gemm': + init_params = f"initParametersI8{op_cap}" + n_init_params = f"nInitParametersI8{op_cap}" + else: + raise ValueError("Unsupported dtype") + + return (f"static const InitParams{accel_type} {init_params}[{n_init_params}]", + f"static constexpr size_t {n_init_params}") + def update_config_file(self, result): """ @@ -167,11 +173,11 @@ def update_config_file(self, result): if not os.path.exists(file_path): self.init_inc_file(file_path) - datatype_names_defs= { - 'f32': self.get_init_params_definitions(self.arch, 'f32', self.op), - 'f16': self.get_init_params_definitions(self.arch, 'f16', self.op), - 'i8': self.get_init_params_definitions(self.arch, 'i8', self.op) - } + datatype_names_defs= { + 'f32': self.get_init_params_definitions(self.arch, 'f32', self.op), + 'f16': self.get_init_params_definitions(self.arch, 'f16', self.op), + 'i8': self.get_init_params_definitions(self.arch, 'i8', self.op) + } for datatype, perfconfigs in result.items(): lines = [] @@ -192,13 +198,13 @@ def update_config_file(self, result): f"// END_{self.op.upper()}_{self.get_instruction_type(self.arch, datatype)}_{datatype}_DEFS", new_content) - datatype_names_decs = {} - datatype_n_decs = {} + datatype_names_decs = {} + datatype_n_decs = {} - for dtype in ['f32', 'f16', 'i8']: - init_params_dec, n_params_dec = self.get_init_params_declaration(self.arch, dtype, self.op) - datatype_names_decs[dtype] = init_params_dec - datatype_n_decs[dtype] = n_params_dec + for dtype in ['f32', 'f16', 'i8']: + init_params_dec, n_params_dec = self.get_init_params_declaration(self.arch, dtype, self.op) + datatype_names_decs[dtype] = init_params_dec + datatype_n_decs[dtype] = n_params_dec for datatype, perfconfigs in result.items(): lines = [] @@ -238,7 +244,7 @@ def get_unique_perfconfigs_list(self, problems_to_perfconfigs): def get_top_n_perfconfigs_per_problems(self, df, targetColumns): """ - Identifies the top perfcofnigs for each problem based on a threshold + Identifies the top perfconfigs for each problem based on a threshold """ grouped = df.groupby(targetColumns) problem_df = {} @@ -253,7 +259,7 @@ def find(self): """ Finds the minimal set of perfconfigs that cover all problems using set cover optimizaiton. - Returns : A dictionary containing data types as keys and thier + Returns : A dictionary containing data types as keys and their corresponding selected perfconfigs. """ result = {} @@ -291,26 +297,24 @@ def find(self): n = len(problems) m = len(perfconfigs) - problem_to_index = { - problem: idx for idx, - problem in enumerate(problems)} - perfconfig_to_index = { - perfconfig: idx for idx, - perfconfig in enumerate(perfconfigs)} + + perfconfig_to_index = {perfconfig : idx for idx, + perfconfig in enumerate(perfconfigs)} # Create coverage matrix A = np.zeros((n, m), dtype=int) - for problem, perfconfig_list in problems_to_perfconfigs.items(): - i = problem_to_index[problem] - for perfconfig in perfconfig_list: + for i, problem in enumerate(problems): + for perfconfig in problems_to_perfconfigs[problem]: j = perfconfig_to_index[perfconfig] A[i][j] = 1 # Linear programming model to minimize the number of perfconfigs prob = pulp.LpProblem("SetCoverProblems", pulp.LpMinimize) x = pulp.LpVariable.dicts("x", range(m), cat='Binary') + # Set up objective function to minimize the sum of selected perfconfigs prob += pulp.lpSum([x[j]] for j in range(m)) for i in range(n): + # Add a constraint for each problem ensuring at least one perfconfig is selected prob += pulp.lpSum([A[i][j] * x[j] for j in range(m)]) >= 1, f"Cover_problem_{i}"