Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error: convert-arith-to-emitc #121076

Open
pyl3000 opened this issue Dec 25, 2024 · 2 comments
Open

Error: convert-arith-to-emitc #121076

pyl3000 opened this issue Dec 25, 2024 · 2 comments
Labels

Comments

@pyl3000
Copy link

pyl3000 commented Dec 25, 2024

When I drop the arith dialect to the emitc dialect using mlir-opt --convert-arith-to-emitc, I get the following error:

Image

The input 7-conv-emitc.mlir as follow:

#map = affine_map<(d0, d1) -> (d0 + d1)>
module {
  memref.global "private" @global_seed : memref<i64> = dense<0>
  func.func @torch.aten.convolution$nobias(%arg0: tensor<1x24x16x128x128xf16>, %arg1: tensor<54x24x1x1x1xf16>) -> tensor<1x54x16x128x128xf16> {
    %c24 = arith.constant 24 : index
    %c128 = arith.constant 128 : index
    %c16 = arith.constant 16 : index
    %c54 = arith.constant 54 : index
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %0 = bufferization.to_memref %arg1 : memref<54x24x1x1x1xf16, strided<[?, ?, ?, ?, ?], offset: ?>>
    %1 = bufferization.to_memref %arg0 : memref<1x24x16x128x128xf16, strided<[?, ?, ?, ?, ?], offset: ?>>
    %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x54x16x128x128xf32>
    emitc.for %arg2 = %c0 to %c1 step %c1 {
      emitc.for %arg3 = %c0 to %c54 step %c1 {
        emitc.for %arg4 = %c0 to %c16 step %c1 {
          emitc.for %arg5 = %c0 to %c128 step %c1 {
            emitc.for %arg6 = %c0 to %c128 step %c1 {
              memref.store %cst, %alloc[%arg2, %arg3, %arg4, %arg5, %arg6] : memref<1x54x16x128x128xf32>
            }
          }
        }
      }
    }
    emitc.for %arg2 = %c0 to %c1 step %c1 {
      emitc.for %arg3 = %c0 to %c16 step %c1 {
        emitc.for %arg4 = %c0 to %c128 step %c1 {
          emitc.for %arg5 = %c0 to %c128 step %c1 {
            emitc.for %arg6 = %c0 to %c54 step %c1 {
              emitc.for %arg7 = %c0 to %c1 step %c1 {
                emitc.for %arg8 = %c0 to %c1 step %c1 {
                  emitc.for %arg9 = %c0 to %c1 step %c1 {
                    emitc.for %arg10 = %c0 to %c24 step %c1 {
                      %3 = affine.apply #map(%arg3, %arg7)
                      %4 = affine.apply #map(%arg4, %arg8)
                      %5 = affine.apply #map(%arg5, %arg9)
                      %6 = memref.load %1[%arg2, %arg10, %3, %4, %5] : memref<1x24x16x128x128xf16, strided<[?, ?, ?, ?, ?], offset: ?>>
                      %7 = memref.load %0[%arg6, %arg10, %arg7, %arg8, %arg9] : memref<54x24x1x1x1xf16, strided<[?, ?, ?, ?, ?], offset: ?>>
                      %8 = memref.load %alloc[%arg2, %arg6, %arg3, %arg4, %arg5] : memref<1x54x16x128x128xf32>
                      %9 = arith.extf %6 : f16 to f32
                      %10 = arith.extf %7 : f16 to f32
                      %11 = arith.mulf %9, %10 : f32
                      %12 = arith.addf %8, %11 : f32
                      memref.store %12, %alloc[%arg2, %arg6, %arg3, %arg4, %arg5] : memref<1x54x16x128x128xf32>
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<1x54x16x128x128xf16>
    emitc.for %arg2 = %c0 to %c1 step %c1 {
      emitc.for %arg3 = %c0 to %c54 step %c1 {
        emitc.for %arg4 = %c0 to %c16 step %c1 {
          emitc.for %arg5 = %c0 to %c128 step %c1 {
            emitc.for %arg6 = %c0 to %c128 step %c1 {
              %3 = memref.load %alloc[%c0, %arg3, %arg4, %arg5, %arg6] : memref<1x54x16x128x128xf32>
              %4 = arith.truncf %3 : f32 to f16
              memref.store %4, %alloc_0[%arg2, %arg3, %arg4, %arg5, %arg6] : memref<1x54x16x128x128xf16>
            }
          }
        }
      }
    }
    %2 = bufferization.to_tensor %alloc_0 : memref<1x54x16x128x128xf16>
    return %2 : tensor<1x54x16x128x128xf16>
  }
  func.func @q_conv_test(%arg0: tensor<?x?x?x?xi8>, %arg1: tensor<?x?x?x?xi8>, %arg2: tensor<?xf32>) -> tensor<?x?x?x?xf32> {
    %c1 = arith.constant 1 : index
    %c3_i32 = arith.constant 3 : i32
    %c7_i32 = arith.constant 7 : i32
    %cst = arith.constant 1.000000e-04 : f64
    %cst_0 = arith.constant 0.000000e+00 : f32
    %c3 = arith.constant 3 : index
    %c2 = arith.constant 2 : index
    %cst_1 = arith.constant 2.14748365E+9 : f32
    %cst_2 = arith.constant -2.14748365E+9 : f32
    %c0 = arith.constant 0 : index
    %c1_i64 = arith.constant 1 : i64
    %0 = bufferization.to_memref %arg1 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %1 = bufferization.to_memref %arg0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %2 = bufferization.to_memref %arg1 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %3 = bufferization.to_memref %arg1 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %4 = bufferization.to_memref %arg1 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %5 = bufferization.to_memref %arg0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %6 = bufferization.to_memref %arg0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %7 = bufferization.to_memref %arg0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %8 = bufferization.to_memref %arg2 : memref<?xf32, strided<[?], offset: ?>>
    %9 = bufferization.to_memref %arg2 : memref<?xf32, strided<[?], offset: ?>>
    %dim = memref.dim %9, %c0 : memref<?xf32, strided<[?], offset: ?>>
    %alloc = memref.alloc(%dim) {alignment = 64 : i64} : memref<?xi32>
    %dim_3 = memref.dim %8, %c0 : memref<?xf32, strided<[?], offset: ?>>
    emitc.for %arg3 = %c0 to %dim_3 step %c1 {
      %21 = memref.load %8[%arg3] : memref<?xf32, strided<[?], offset: ?>>
      %22 = arith.truncf %cst : f64 to f32
      %23 = arith.divf %21, %22 : f32
      %24 = math.roundeven %23 : f32
      %25 = arith.addf %24, %cst_0 : f32
      %26 = arith.maximumf %25, %cst_2 : f32
      %27 = arith.minimumf %26, %cst_1 : f32
      %28 = arith.fptosi %27 : f32 to i32
      memref.store %28, %alloc[%arg3] : memref<?xi32>
    }
    %alloc_4 = memref.alloc(%dim) {alignment = 64 : i64} : memref<?xf32>
    emitc.for %arg3 = %c0 to %dim step %c1 {
      %21 = memref.load %alloc[%arg3] : memref<?xi32>
      %22 = arith.sitofp %21 : i32 to f32
      %23 = arith.truncf %cst : f64 to f32
      %24 = arith.mulf %22, %23 : f32
      memref.store %24, %alloc_4[%arg3] : memref<?xf32>
    }
    emitc.for %arg3 = %c0 to %dim step %c1 {
      %21 = memref.load %alloc_4[%arg3] : memref<?xf32>
      %22 = arith.truncf %cst : f64 to f32
      %23 = arith.divf %21, %22 : f32
      %24 = math.roundeven %23 : f32
      %25 = arith.addf %24, %cst_0 : f32
      %26 = arith.maximumf %25, %cst_2 : f32
      %27 = arith.minimumf %26, %cst_1 : f32
      %28 = arith.fptosi %27 : f32 to i32
      memref.store %28, %alloc[%arg3] : memref<?xi32>
    }
    %dim_5 = memref.dim %7, %c0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_6 = memref.dim %6, %c2 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_7 = memref.dim %5, %c3 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_8 = memref.dim %4, %c0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_9 = memref.dim %3, %c2 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_10 = memref.dim %2, %c3 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %10 = arith.index_cast %dim_9 : index to i64
    %11 = arith.index_cast %dim_6 : index to i64
    %12 = arith.subi %10, %c1_i64 : i64
    %13 = arith.subi %11, %12 : i64
    %14 = arith.index_cast %13 : i64 to index
    %15 = arith.index_cast %dim_10 : index to i64
    %16 = arith.index_cast %dim_7 : index to i64
    %17 = arith.subi %15, %c1_i64 : i64
    %18 = arith.subi %16, %17 : i64
    %19 = arith.index_cast %18 : i64 to index
    %alloc_11 = memref.alloc(%dim_5, %dim_8, %14, %19) {alignment = 64 : i64} : memref<?x?x?x?xi32>
    emitc.for %arg3 = %c0 to %dim_5 step %c1 {
      emitc.for %arg4 = %c0 to %dim step %c1 {
        emitc.for %arg5 = %c0 to %14 step %c1 {
          emitc.for %arg6 = %c0 to %19 step %c1 {
            %21 = memref.load %alloc[%arg4] : memref<?xi32>
            memref.store %21, %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x?xi32>
          }
        }
      }
    }
    %dim_12 = memref.dim %1, %c0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_13 = memref.dim %1, %c1 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_14 = memref.dim %0, %c0 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_15 = memref.dim %0, %c2 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    %dim_16 = memref.dim %0, %c3 : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
    emitc.for %arg3 = %c0 to %dim_12 step %c1 {
      emitc.for %arg4 = %c0 to %dim_14 step %c1 {
        emitc.for %arg5 = %c0 to %14 step %c1 {
          emitc.for %arg6 = %c0 to %19 step %c1 {
            emitc.for %arg7 = %c0 to %dim_13 step %c1 {
              emitc.for %arg8 = %c0 to %dim_15 step %c1 {
                emitc.for %arg9 = %c0 to %dim_16 step %c1 {
                  %21 = affine.apply #map(%arg5, %arg8)
                  %22 = affine.apply #map(%arg6, %arg9)
                  %23 = memref.load %1[%arg3, %arg7, %21, %22] : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
                  %24 = memref.load %0[%arg4, %arg7, %arg8, %arg9] : memref<?x?x?x?xi8, strided<[?, ?, ?, ?], offset: ?>>
                  %25 = memref.load %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x?xi32>
                  %26 = arith.extsi %23 : i8 to i32
                  %27 = arith.subi %26, %c7_i32 : i32
                  %28 = arith.extsi %24 : i8 to i32
                  %29 = arith.subi %28, %c3_i32 : i32
                  %30 = arith.muli %27, %29 : i32
                  %31 = arith.addi %25, %30 : i32
                  memref.store %31, %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x?xi32>
                }
              }
            }
          }
        }
      }
    }
    %alloc_17 = memref.alloc(%dim_5, %dim_8, %14, %19) {alignment = 64 : i64} : memref<?x?x?x?xf32>
    emitc.for %arg3 = %c0 to %dim_5 step %c1 {
      emitc.for %arg4 = %c0 to %dim_8 step %c1 {
        emitc.for %arg5 = %c0 to %14 step %c1 {
          emitc.for %arg6 = %c0 to %19 step %c1 {
            %21 = memref.load %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x?xi32>
            %22 = arith.sitofp %21 : i32 to f32
            %23 = arith.truncf %cst : f64 to f32
            %24 = arith.mulf %22, %23 : f32
            memref.store %24, %alloc_17[%arg3, %arg4, %arg5, %arg6] : memref<?x?x?x?xf32>
          }
        }
      }
    }
    %20 = bufferization.to_tensor %alloc_17 : memref<?x?x?x?xf32>
    return %20 : tensor<?x?x?x?xf32>
  }
  func.func @conv_broadcast(%arg0: tensor<1x80x3000xf32>, %arg1: tensor<1024x80x3xf32>, %arg2: tensor<1024xf32>) -> tensor<1x1024x3000xf32> {
    %c3 = arith.constant 3 : index
    %c3000 = arith.constant 3000 : index
    %c1024 = arith.constant 1024 : index
    %c3002 = arith.constant 3002 : index
    %c80 = arith.constant 80 : index
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %0 = bufferization.to_memref %arg0 : memref<1x80x3000xf32, strided<[?, ?, ?], offset: ?>>
    %1 = bufferization.to_memref %arg1 : memref<1024x80x3xf32, strided<[?, ?, ?], offset: ?>>
    %2 = bufferization.to_memref %arg2 : memref<1024xf32, strided<[?], offset: ?>>
    %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x80x3002xf32>
    emitc.for %arg3 = %c0 to %c1 step %c1 {
      emitc.for %arg4 = %c0 to %c80 step %c1 {
        emitc.for %arg5 = %c0 to %c3002 step %c1 {
          memref.store %cst, %alloc[%arg3, %arg4, %arg5] : memref<1x80x3002xf32>
        }
      }
    }
    %reinterpret_cast = memref.reinterpret_cast %alloc to offset: [1], sizes: [1, 80, 3000], strides: [240160, 3002, 1] : memref<1x80x3002xf32> to memref<1x80x3000xf32, strided<[240160, 3002, 1], offset: 1>>
    memref.copy %0, %reinterpret_cast : memref<1x80x3000xf32, strided<[?, ?, ?], offset: ?>> to memref<1x80x3000xf32, strided<[240160, 3002, 1], offset: 1>>
    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<1x1024x3000xf32>
    emitc.for %arg3 = %c0 to %c1 step %c1 {
      emitc.for %arg4 = %c0 to %c1024 step %c1 {
        emitc.for %arg5 = %c0 to %c3000 step %c1 {
          %4 = memref.load %2[%arg4] : memref<1024xf32, strided<[?], offset: ?>>
          memref.store %4, %alloc_0[%arg3, %arg4, %arg5] : memref<1x1024x3000xf32>
        }
      }
    }
    emitc.for %arg3 = %c0 to %c1 step %c1 {
      emitc.for %arg4 = %c0 to %c1024 step %c1 {
        emitc.for %arg5 = %c0 to %c3000 step %c1 {
          emitc.for %arg6 = %c0 to %c80 step %c1 {
            emitc.for %arg7 = %c0 to %c3 step %c1 {
              %4 = affine.apply #map(%arg5, %arg7)
              %5 = memref.load %alloc[%arg3, %arg6, %4] : memref<1x80x3002xf32>
              %6 = memref.load %1[%arg4, %arg6, %arg7] : memref<1024x80x3xf32, strided<[?, ?, ?], offset: ?>>
              %7 = memref.load %alloc_0[%arg3, %arg4, %arg5] : memref<1x1024x3000xf32>
              %8 = arith.mulf %5, %6 : f32
              %9 = arith.addf %7, %8 : f32
              memref.store %9, %alloc_0[%arg3, %arg4, %arg5] : memref<1x1024x3000xf32>
            }
          }
        }
      }
    }
    %3 = bufferization.to_tensor %alloc_0 : memref<1x1024x3000xf32>
    return %3 : tensor<1x1024x3000xf32>
  }
}

How to fix it?

@llvmbot
Copy link
Member

llvmbot commented Dec 25, 2024

@llvm/issue-subscribers-mlir

Author: zzzzzzzzz (pyl3000)

When I drop the arith dialect to the emitc dialect using `mlir-opt --convert-arith-to-emitc`, I get the following error:

Image

The input 7-conv-emitc.mlir as follow:

#map = affine_map&lt;(d0, d1) -&gt; (d0 + d1)&gt;
module {
  memref.global "private" @<!-- -->global_seed : memref&lt;i64&gt; = dense&lt;0&gt;
  func.func @<!-- -->torch.aten.convolution$nobias(%arg0: tensor&lt;1x24x16x128x128xf16&gt;, %arg1: tensor&lt;54x24x1x1x1xf16&gt;) -&gt; tensor&lt;1x54x16x128x128xf16&gt; {
    %c24 = arith.constant 24 : index
    %c128 = arith.constant 128 : index
    %c16 = arith.constant 16 : index
    %c54 = arith.constant 54 : index
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %0 = bufferization.to_memref %arg1 : memref&lt;54x24x1x1x1xf16, strided&lt;[?, ?, ?, ?, ?], offset: ?&gt;&gt;
    %1 = bufferization.to_memref %arg0 : memref&lt;1x24x16x128x128xf16, strided&lt;[?, ?, ?, ?, ?], offset: ?&gt;&gt;
    %alloc = memref.alloc() {alignment = 64 : i64} : memref&lt;1x54x16x128x128xf32&gt;
    emitc.for %arg2 = %c0 to %c1 step %c1 {
      emitc.for %arg3 = %c0 to %c54 step %c1 {
        emitc.for %arg4 = %c0 to %c16 step %c1 {
          emitc.for %arg5 = %c0 to %c128 step %c1 {
            emitc.for %arg6 = %c0 to %c128 step %c1 {
              memref.store %cst, %alloc[%arg2, %arg3, %arg4, %arg5, %arg6] : memref&lt;1x54x16x128x128xf32&gt;
            }
          }
        }
      }
    }
    emitc.for %arg2 = %c0 to %c1 step %c1 {
      emitc.for %arg3 = %c0 to %c16 step %c1 {
        emitc.for %arg4 = %c0 to %c128 step %c1 {
          emitc.for %arg5 = %c0 to %c128 step %c1 {
            emitc.for %arg6 = %c0 to %c54 step %c1 {
              emitc.for %arg7 = %c0 to %c1 step %c1 {
                emitc.for %arg8 = %c0 to %c1 step %c1 {
                  emitc.for %arg9 = %c0 to %c1 step %c1 {
                    emitc.for %arg10 = %c0 to %c24 step %c1 {
                      %3 = affine.apply #map(%arg3, %arg7)
                      %4 = affine.apply #map(%arg4, %arg8)
                      %5 = affine.apply #map(%arg5, %arg9)
                      %6 = memref.load %1[%arg2, %arg10, %3, %4, %5] : memref&lt;1x24x16x128x128xf16, strided&lt;[?, ?, ?, ?, ?], offset: ?&gt;&gt;
                      %7 = memref.load %0[%arg6, %arg10, %arg7, %arg8, %arg9] : memref&lt;54x24x1x1x1xf16, strided&lt;[?, ?, ?, ?, ?], offset: ?&gt;&gt;
                      %8 = memref.load %alloc[%arg2, %arg6, %arg3, %arg4, %arg5] : memref&lt;1x54x16x128x128xf32&gt;
                      %9 = arith.extf %6 : f16 to f32
                      %10 = arith.extf %7 : f16 to f32
                      %11 = arith.mulf %9, %10 : f32
                      %12 = arith.addf %8, %11 : f32
                      memref.store %12, %alloc[%arg2, %arg6, %arg3, %arg4, %arg5] : memref&lt;1x54x16x128x128xf32&gt;
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref&lt;1x54x16x128x128xf16&gt;
    emitc.for %arg2 = %c0 to %c1 step %c1 {
      emitc.for %arg3 = %c0 to %c54 step %c1 {
        emitc.for %arg4 = %c0 to %c16 step %c1 {
          emitc.for %arg5 = %c0 to %c128 step %c1 {
            emitc.for %arg6 = %c0 to %c128 step %c1 {
              %3 = memref.load %alloc[%c0, %arg3, %arg4, %arg5, %arg6] : memref&lt;1x54x16x128x128xf32&gt;
              %4 = arith.truncf %3 : f32 to f16
              memref.store %4, %alloc_0[%arg2, %arg3, %arg4, %arg5, %arg6] : memref&lt;1x54x16x128x128xf16&gt;
            }
          }
        }
      }
    }
    %2 = bufferization.to_tensor %alloc_0 : memref&lt;1x54x16x128x128xf16&gt;
    return %2 : tensor&lt;1x54x16x128x128xf16&gt;
  }
  func.func @<!-- -->q_conv_test(%arg0: tensor&lt;?x?x?x?xi8&gt;, %arg1: tensor&lt;?x?x?x?xi8&gt;, %arg2: tensor&lt;?xf32&gt;) -&gt; tensor&lt;?x?x?x?xf32&gt; {
    %c1 = arith.constant 1 : index
    %c3_i32 = arith.constant 3 : i32
    %c7_i32 = arith.constant 7 : i32
    %cst = arith.constant 1.000000e-04 : f64
    %cst_0 = arith.constant 0.000000e+00 : f32
    %c3 = arith.constant 3 : index
    %c2 = arith.constant 2 : index
    %cst_1 = arith.constant 2.14748365E+9 : f32
    %cst_2 = arith.constant -2.14748365E+9 : f32
    %c0 = arith.constant 0 : index
    %c1_i64 = arith.constant 1 : i64
    %0 = bufferization.to_memref %arg1 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %1 = bufferization.to_memref %arg0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %2 = bufferization.to_memref %arg1 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %3 = bufferization.to_memref %arg1 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %4 = bufferization.to_memref %arg1 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %5 = bufferization.to_memref %arg0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %6 = bufferization.to_memref %arg0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %7 = bufferization.to_memref %arg0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %8 = bufferization.to_memref %arg2 : memref&lt;?xf32, strided&lt;[?], offset: ?&gt;&gt;
    %9 = bufferization.to_memref %arg2 : memref&lt;?xf32, strided&lt;[?], offset: ?&gt;&gt;
    %dim = memref.dim %9, %c0 : memref&lt;?xf32, strided&lt;[?], offset: ?&gt;&gt;
    %alloc = memref.alloc(%dim) {alignment = 64 : i64} : memref&lt;?xi32&gt;
    %dim_3 = memref.dim %8, %c0 : memref&lt;?xf32, strided&lt;[?], offset: ?&gt;&gt;
    emitc.for %arg3 = %c0 to %dim_3 step %c1 {
      %21 = memref.load %8[%arg3] : memref&lt;?xf32, strided&lt;[?], offset: ?&gt;&gt;
      %22 = arith.truncf %cst : f64 to f32
      %23 = arith.divf %21, %22 : f32
      %24 = math.roundeven %23 : f32
      %25 = arith.addf %24, %cst_0 : f32
      %26 = arith.maximumf %25, %cst_2 : f32
      %27 = arith.minimumf %26, %cst_1 : f32
      %28 = arith.fptosi %27 : f32 to i32
      memref.store %28, %alloc[%arg3] : memref&lt;?xi32&gt;
    }
    %alloc_4 = memref.alloc(%dim) {alignment = 64 : i64} : memref&lt;?xf32&gt;
    emitc.for %arg3 = %c0 to %dim step %c1 {
      %21 = memref.load %alloc[%arg3] : memref&lt;?xi32&gt;
      %22 = arith.sitofp %21 : i32 to f32
      %23 = arith.truncf %cst : f64 to f32
      %24 = arith.mulf %22, %23 : f32
      memref.store %24, %alloc_4[%arg3] : memref&lt;?xf32&gt;
    }
    emitc.for %arg3 = %c0 to %dim step %c1 {
      %21 = memref.load %alloc_4[%arg3] : memref&lt;?xf32&gt;
      %22 = arith.truncf %cst : f64 to f32
      %23 = arith.divf %21, %22 : f32
      %24 = math.roundeven %23 : f32
      %25 = arith.addf %24, %cst_0 : f32
      %26 = arith.maximumf %25, %cst_2 : f32
      %27 = arith.minimumf %26, %cst_1 : f32
      %28 = arith.fptosi %27 : f32 to i32
      memref.store %28, %alloc[%arg3] : memref&lt;?xi32&gt;
    }
    %dim_5 = memref.dim %7, %c0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_6 = memref.dim %6, %c2 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_7 = memref.dim %5, %c3 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_8 = memref.dim %4, %c0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_9 = memref.dim %3, %c2 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_10 = memref.dim %2, %c3 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %10 = arith.index_cast %dim_9 : index to i64
    %11 = arith.index_cast %dim_6 : index to i64
    %12 = arith.subi %10, %c1_i64 : i64
    %13 = arith.subi %11, %12 : i64
    %14 = arith.index_cast %13 : i64 to index
    %15 = arith.index_cast %dim_10 : index to i64
    %16 = arith.index_cast %dim_7 : index to i64
    %17 = arith.subi %15, %c1_i64 : i64
    %18 = arith.subi %16, %17 : i64
    %19 = arith.index_cast %18 : i64 to index
    %alloc_11 = memref.alloc(%dim_5, %dim_8, %14, %19) {alignment = 64 : i64} : memref&lt;?x?x?x?xi32&gt;
    emitc.for %arg3 = %c0 to %dim_5 step %c1 {
      emitc.for %arg4 = %c0 to %dim step %c1 {
        emitc.for %arg5 = %c0 to %14 step %c1 {
          emitc.for %arg6 = %c0 to %19 step %c1 {
            %21 = memref.load %alloc[%arg4] : memref&lt;?xi32&gt;
            memref.store %21, %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref&lt;?x?x?x?xi32&gt;
          }
        }
      }
    }
    %dim_12 = memref.dim %1, %c0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_13 = memref.dim %1, %c1 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_14 = memref.dim %0, %c0 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_15 = memref.dim %0, %c2 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    %dim_16 = memref.dim %0, %c3 : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
    emitc.for %arg3 = %c0 to %dim_12 step %c1 {
      emitc.for %arg4 = %c0 to %dim_14 step %c1 {
        emitc.for %arg5 = %c0 to %14 step %c1 {
          emitc.for %arg6 = %c0 to %19 step %c1 {
            emitc.for %arg7 = %c0 to %dim_13 step %c1 {
              emitc.for %arg8 = %c0 to %dim_15 step %c1 {
                emitc.for %arg9 = %c0 to %dim_16 step %c1 {
                  %21 = affine.apply #map(%arg5, %arg8)
                  %22 = affine.apply #map(%arg6, %arg9)
                  %23 = memref.load %1[%arg3, %arg7, %21, %22] : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
                  %24 = memref.load %0[%arg4, %arg7, %arg8, %arg9] : memref&lt;?x?x?x?xi8, strided&lt;[?, ?, ?, ?], offset: ?&gt;&gt;
                  %25 = memref.load %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref&lt;?x?x?x?xi32&gt;
                  %26 = arith.extsi %23 : i8 to i32
                  %27 = arith.subi %26, %c7_i32 : i32
                  %28 = arith.extsi %24 : i8 to i32
                  %29 = arith.subi %28, %c3_i32 : i32
                  %30 = arith.muli %27, %29 : i32
                  %31 = arith.addi %25, %30 : i32
                  memref.store %31, %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref&lt;?x?x?x?xi32&gt;
                }
              }
            }
          }
        }
      }
    }
    %alloc_17 = memref.alloc(%dim_5, %dim_8, %14, %19) {alignment = 64 : i64} : memref&lt;?x?x?x?xf32&gt;
    emitc.for %arg3 = %c0 to %dim_5 step %c1 {
      emitc.for %arg4 = %c0 to %dim_8 step %c1 {
        emitc.for %arg5 = %c0 to %14 step %c1 {
          emitc.for %arg6 = %c0 to %19 step %c1 {
            %21 = memref.load %alloc_11[%arg3, %arg4, %arg5, %arg6] : memref&lt;?x?x?x?xi32&gt;
            %22 = arith.sitofp %21 : i32 to f32
            %23 = arith.truncf %cst : f64 to f32
            %24 = arith.mulf %22, %23 : f32
            memref.store %24, %alloc_17[%arg3, %arg4, %arg5, %arg6] : memref&lt;?x?x?x?xf32&gt;
          }
        }
      }
    }
    %20 = bufferization.to_tensor %alloc_17 : memref&lt;?x?x?x?xf32&gt;
    return %20 : tensor&lt;?x?x?x?xf32&gt;
  }
  func.func @<!-- -->conv_broadcast(%arg0: tensor&lt;1x80x3000xf32&gt;, %arg1: tensor&lt;1024x80x3xf32&gt;, %arg2: tensor&lt;1024xf32&gt;) -&gt; tensor&lt;1x1024x3000xf32&gt; {
    %c3 = arith.constant 3 : index
    %c3000 = arith.constant 3000 : index
    %c1024 = arith.constant 1024 : index
    %c3002 = arith.constant 3002 : index
    %c80 = arith.constant 80 : index
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %0 = bufferization.to_memref %arg0 : memref&lt;1x80x3000xf32, strided&lt;[?, ?, ?], offset: ?&gt;&gt;
    %1 = bufferization.to_memref %arg1 : memref&lt;1024x80x3xf32, strided&lt;[?, ?, ?], offset: ?&gt;&gt;
    %2 = bufferization.to_memref %arg2 : memref&lt;1024xf32, strided&lt;[?], offset: ?&gt;&gt;
    %alloc = memref.alloc() {alignment = 64 : i64} : memref&lt;1x80x3002xf32&gt;
    emitc.for %arg3 = %c0 to %c1 step %c1 {
      emitc.for %arg4 = %c0 to %c80 step %c1 {
        emitc.for %arg5 = %c0 to %c3002 step %c1 {
          memref.store %cst, %alloc[%arg3, %arg4, %arg5] : memref&lt;1x80x3002xf32&gt;
        }
      }
    }
    %reinterpret_cast = memref.reinterpret_cast %alloc to offset: [1], sizes: [1, 80, 3000], strides: [240160, 3002, 1] : memref&lt;1x80x3002xf32&gt; to memref&lt;1x80x3000xf32, strided&lt;[240160, 3002, 1], offset: 1&gt;&gt;
    memref.copy %0, %reinterpret_cast : memref&lt;1x80x3000xf32, strided&lt;[?, ?, ?], offset: ?&gt;&gt; to memref&lt;1x80x3000xf32, strided&lt;[240160, 3002, 1], offset: 1&gt;&gt;
    %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref&lt;1x1024x3000xf32&gt;
    emitc.for %arg3 = %c0 to %c1 step %c1 {
      emitc.for %arg4 = %c0 to %c1024 step %c1 {
        emitc.for %arg5 = %c0 to %c3000 step %c1 {
          %4 = memref.load %2[%arg4] : memref&lt;1024xf32, strided&lt;[?], offset: ?&gt;&gt;
          memref.store %4, %alloc_0[%arg3, %arg4, %arg5] : memref&lt;1x1024x3000xf32&gt;
        }
      }
    }
    emitc.for %arg3 = %c0 to %c1 step %c1 {
      emitc.for %arg4 = %c0 to %c1024 step %c1 {
        emitc.for %arg5 = %c0 to %c3000 step %c1 {
          emitc.for %arg6 = %c0 to %c80 step %c1 {
            emitc.for %arg7 = %c0 to %c3 step %c1 {
              %4 = affine.apply #map(%arg5, %arg7)
              %5 = memref.load %alloc[%arg3, %arg6, %4] : memref&lt;1x80x3002xf32&gt;
              %6 = memref.load %1[%arg4, %arg6, %arg7] : memref&lt;1024x80x3xf32, strided&lt;[?, ?, ?], offset: ?&gt;&gt;
              %7 = memref.load %alloc_0[%arg3, %arg4, %arg5] : memref&lt;1x1024x3000xf32&gt;
              %8 = arith.mulf %5, %6 : f32
              %9 = arith.addf %7, %8 : f32
              memref.store %9, %alloc_0[%arg3, %arg4, %arg5] : memref&lt;1x1024x3000xf32&gt;
            }
          }
        }
      }
    }
    %3 = bufferization.to_tensor %alloc_0 : memref&lt;1x1024x3000xf32&gt;
    return %3 : tensor&lt;1x1024x3000xf32&gt;
  }
}

How to fix it?

@jacquesguan
Copy link
Contributor

I create a PR #121184 to support it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

4 participants