From 1ed825730c256b9e1c21a2280fa89631e1e2001f Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram <96096277+nirvedhmeshram@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:02:08 -0500 Subject: [PATCH] Fix bug in artifact creation of the LX6 instructions (#453) There was a bug where were appending LX6 instructions to an existing vector for subsequent entry points rather than making a new vector for each entry point. This change fixes that and hence fixes https://github.com/nod-ai/iree-amd-aie/issues/447 which was indeed a kernel time out due to bad artifacts. Also adds a multi-dispatch e2e test to CI. --- build_tools/ci/cpu_comparison/run_test.sh | 3 ++ .../test_files/three_matmuls.mlir | 31 +++++++++++++++++++ .../AMD-AIE/iree-amd-aie/Target/AIETarget.cpp | 3 +- 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 build_tools/ci/cpu_comparison/test_files/three_matmuls.mlir diff --git a/build_tools/ci/cpu_comparison/run_test.sh b/build_tools/ci/cpu_comparison/run_test.sh index 40fef78d9..93362daea 100755 --- a/build_tools/ci/cpu_comparison/run_test.sh +++ b/build_tools/ci/cpu_comparison/run_test.sh @@ -326,6 +326,9 @@ function run_test() { # Example of running a test directly from an .mlir file with a function. run_test --test_file ${THIS_DIR}/test_files/matmul_int32.mlir +# An example of an arbitrary graph with three matmuls which form three dispatches. +run_test --test_file ${THIS_DIR}/test_files/three_matmuls.mlir + # Example of generating a matmul test from a template, and then running it. test_name=${OUTPUT_DIR}/test_from_template.mlir matmul_template_dir=${THIS_DIR}/matmul_template diff --git a/build_tools/ci/cpu_comparison/test_files/three_matmuls.mlir b/build_tools/ci/cpu_comparison/test_files/three_matmuls.mlir new file mode 100644 index 000000000..fe4fd3fcf --- /dev/null +++ b/build_tools/ci/cpu_comparison/test_files/three_matmuls.mlir @@ -0,0 +1,31 @@ +// This test shows arbitrary matmuls that would have producer consumer relationships +// across different dispatches running on CI. + +// These 4 lines are required by the script which generates input data: +// +// input 32x32xf32 +// input 32x32xf32 +// input 32x4xf32 +// input 4x32xf32 + +!A_TYPE = tensor<32x32xf32> +!B_TYPE = tensor<32x4xf32> +!C_TYPE = tensor<4x32xf32> +!D_TYPE = tensor<4x4xf32> +func.func @two_mm(%lhs : !A_TYPE, + %rhs : !A_TYPE, %rhs_2 : !B_TYPE, %lhs_2 : !C_TYPE) -> !D_TYPE { + %empty = tensor.empty() : !A_TYPE + %empty_2 = tensor.empty() : !B_TYPE + %empty_3 = tensor.empty() : !D_TYPE + %cst = arith.constant 0.0 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%empty : !A_TYPE) -> !A_TYPE + %fill_2 = linalg.fill ins(%cst : f32) outs(%empty_2 : !B_TYPE) -> !B_TYPE + %fill_3 = linalg.fill ins(%cst : f32) outs(%empty_3 : !D_TYPE) -> !D_TYPE + %2 = linalg.matmul ins(%lhs, %rhs : !A_TYPE, !A_TYPE) + outs(%fill : !A_TYPE) -> !A_TYPE + %3 = linalg.matmul ins(%2, %rhs_2 : !A_TYPE, !B_TYPE) + outs(%fill_2 : !B_TYPE) -> !B_TYPE + %4 = linalg.matmul ins(%lhs_2, %3 : !C_TYPE, !B_TYPE) + outs(%fill_3 : !D_TYPE) -> !D_TYPE + return %4 : !D_TYPE +} diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 7eff26b59..5a71a2521 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -253,7 +253,6 @@ LogicalResult AIETargetBackend::serializeExecutable( SmallString<128> aie2xclbin(options.mlirAieInstallDir); llvm::sys::path::append(aie2xclbin, "bin", "aie2xclbin"); - std::vector npuInstrs; std::unique_ptr xclbinIn; FlatbufferBuilder builder; @@ -359,6 +358,8 @@ LogicalResult AIETargetBackend::serializeExecutable( std::ifstream instrFile(static_cast(npuInstPath)); std::string line; + // Vector to store LX6 instructions. + std::vector npuInstrs; while (std::getline(instrFile, line)) { std::istringstream iss(line); uint32_t a;