From 437611752055a0f3af168a8d20f7e35979927460 Mon Sep 17 00:00:00 2001 From: Kunwar Grover Date: Tue, 29 Oct 2024 16:59:36 +0000 Subject: [PATCH] [GPU] Do not treat pad as a tilable producer for operand promotion (#18918) PadOp doesn't have an implementation for deriving thread configuration from derived_thread_config, so ignore promoting it until an implementation is added. --- .../Common/GPU/GPUPromoteMatmulOperands.cpp | 12 +++++++--- .../GPU/test/gpu_promote_matmul_operands.mlir | 24 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp index dd498fad50e8..5e50a956bd82 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp @@ -53,9 +53,15 @@ void promoteOperand(OpBuilder &builder, Operation *op, unsigned index) { return; } } - setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get( - builder.getContext())); - return; + + // We only support thread tile size derivation of linalgOp and Im2colOp for + // now. + if (isa( + producer.getOperation())) { + setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get( + builder.getContext())); + return; + } } auto tensorType = dyn_cast(operand.getType()); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir index f05cf7b1890b..643b12c01e39 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir @@ -82,3 +82,27 @@ func.func @no_promote_fill(%b: tensor<128x128xf32>) -> tensor<4x128xf32> { // CHECK-LABEL: func.func @no_promote_fill // CHECK-NOT: iree_gpu.derived_thread_config // CHECK: return + +// ----- + +#lowering_config = #iree_gpu.lowering_config<{promote_operands = [0]}> + +func.func @promote_pad(%a : tensor<4x127xf32>, %b: tensor<128x128xf32>) -> tensor<4x128xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %empty = tensor.empty() : tensor<4x128xf32> + %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<4x128xf32>) -> tensor<4x128xf32> + %padded = tensor.pad %a low[0, 0] high[0, 1] { + ^bb0(%arg0: index, %arg1: index): + tensor.yield %cst : f32 + } : tensor<4x127xf32> to tensor<4x128xf32> + %mm = linalg.matmul {lowering_config = #lowering_config} + ins(%padded, %b : tensor<4x128xf32>, tensor<128x128xf32>) outs(%fill : tensor<4x128xf32>) -> tensor<4x128xf32> + return %mm : tensor<4x128xf32> +} + +// Verify that pad is promoted with linalg.copy +// CHECK-LABEL: func.func @promote_pad +// CHECK: tensor.pad +// CHECK: linalg.copy +// CHECK-SAME: derived_thread_config +// CHECK: return