diff --git a/xla/service/gpu/fusions/ir/BUILD b/xla/service/gpu/fusions/ir/BUILD index 91c6eeea6da66..5a3de755dd660 100644 --- a/xla/service/gpu/fusions/ir/BUILD +++ b/xla/service/gpu/fusions/ir/BUILD @@ -135,7 +135,7 @@ cc_library( ":xla_gpu_ops_inc_gen", ":xla_gpu_types_inc_gen", "//xla/service/gpu/model:indexing_analysis", - "@com_google_absl//absl/strings:str_format", + "//xla/service/gpu/model:indexing_map_serialization", "@llvm-project//llvm:Support", "@llvm-project//mlir:ArithDialect", "@llvm-project//mlir:BytecodeOpInterface", diff --git a/xla/service/gpu/fusions/ir/tests/attrs.mlir b/xla/service/gpu/fusions/ir/tests/attrs.mlir index bc37a3ac56fc7..b990103ea2cfa 100644 --- a/xla/service/gpu/fusions/ir/tests/attrs.mlir +++ b/xla/service/gpu/fusions/ir/tests/attrs.mlir @@ -9,17 +9,17 @@ // CHECK-SAME: s0 in [0, 32], // CHECK-SAME: d0 + s0 in [1, 10], // CHECK-SAME: d0 mod 2 in [0, 1], -// CHECK-SAME: is_simplified: true +// CHECK-SAME: is_simplified: true" // CHECK-SAME: > -#map = #xla_gpu.indexing_map<(d0, d1, d2)[s0] -> (d0), - domain: - d0 in [1, 2], - d1 in [5, 8], - d2 in [10, 12], - s0 in [0, 32], - d0 mod 2 in [0, 1], - d0 + s0 in [1, 10], - is_simplified: true +#map = #xla_gpu.indexing_map<"(d0, d1, d2)[s0] -> (d0)," + "domain:" + "d0 in [1, 2]," + "d1 in [5, 8]," + "d2 in [10, 12]," + "s0 in [0, 32]," + "d0 mod 2 in [0, 1]," + "d0 + s0 in [1, 10]," + "is_simplified: true" > func.func private @indexing_map_attr(!xla_gpu.indexed_vector<64x64x32xf64, #map>) @@ -39,20 +39,21 @@ func.func private @indexing_map_attr(!xla_gpu.indexed_vector<64x64x32xf64, #map> // CHECK-SAME: d0 + s0 in [1, 10] // CHECK-SAME: d0 mod 2 in [0, 1] // CHECK-SAME: d1 + s1 + s2 in [1, 32] -// CHECK-SAME: is_simplified: false +// CHECK-SAME: is_simplified: false" // CHECK-SAME: > -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1, s2] -> (d0 + s0, d1 + s1, d1 + s2), - domain: - d0 in [1, 2], - d1 in [5, 8], - s0 in [0, 10], - s1 in [0, 5], - s2 in [0, 32], - d0 mod 2 in [0, 1], - d0 + s0 in [1, 10], - d1 + s1 + s2 in [1, 32], - is_simplified: false - > +#map = #xla_gpu.indexing_map< + "(d0, d1)[s0, s1, s2] -> (d0 + s0, d1 + s1, d1 + s2)," + "domain:" + "d0 in [1, 2]," + "d1 in [5, 8]," + "s0 in [0, 10]," + "s1 in [0, 5]," + "s2 in [0, 32]," + "d0 mod 2 in [0, 1]," + "d0 + s0 in [1, 10]," + "d1 + s1 + s2 in [1, 32]," + "is_simplified: false" + > func.func private @more_range_vars(!xla_gpu.indexed_vector<100x32xf64, #map>) // CHECK-LABEL: @more_range_vars // CHECK: !xla_gpu.indexed_vector<100x32xf64, #[[$INDEX_MAP]]> @@ -64,13 +65,13 @@ func.func private @more_range_vars(!xla_gpu.indexed_vector<100x32xf64, #map>) // CHECK-SAME: domain: // CHECK-SAME: d0 in [0, 100] // CHECK-SAME: s0 in [-3, -1] -// CHECK-SAME: is_simplified: false +// CHECK-SAME: is_simplified: false" // CHECK-SAME: > -#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0), - domain: - d0 in [0, 100], - s0 in [-3, -1], - is_simplified: false +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0)," + "domain:" + "d0 in [0, 100]," + "s0 in [-3, -1]," + "is_simplified: false" > func.func private @indexing_map_small(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK-LABEL: @indexing_map_small @@ -85,15 +86,15 @@ func.func private @indexing_map_small(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK-SAME: d1 in [5, 8] // CHECK-SAME: d2 in [10, 12] // CHECK-SAME: s0 in [0, 32] -// CHECK-SAME: is_simplified: false +// CHECK-SAME: is_simplified: false" // CHECK-SAME: > -#map = #xla_gpu.indexing_map<(d0, d1, d2)[s0] -> (d0), - domain: - d0 in [1, 2], - d1 in [5, 8], - d2 in [10, 12], - s0 in [0, 32], - is_simplified: false +#map = #xla_gpu.indexing_map<"(d0, d1, d2)[s0] -> (d0)," + "domain:" + "d0 in [1, 2]," + "d1 in [5, 8]," + "d2 in [10, 12]," + "s0 in [0, 32]," + "is_simplified: false" > func.func private @no_constraints(!xla_gpu.indexed_vector<32xf64, #map>) // CHECK-LABEL: @no_constraints @@ -106,13 +107,13 @@ func.func private @no_constraints(!xla_gpu.indexed_vector<32xf64, #map>) // CHECK-SAME: domain: // CHECK-SAME: s0 in [3, 5] // CHECK-SAME: s0 mod 2 in [0, 1] -// CHECK-SAME: is_simplified: false +// CHECK-SAME: is_simplified: false" // CHECK-SAME: > -#map = #xla_gpu.indexing_map<()[s0] -> (s0), - domain: - s0 in [3, 5], - s0 mod 2 in [0, 1], - is_simplified: false +#map = #xla_gpu.indexing_map<"()[s0] -> (s0)," + "domain:" + "s0 in [3, 5]," + "s0 mod 2 in [0, 1]," + "is_simplified: false" > func.func private @no_dimensions(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK-LABEL: @no_dimensions @@ -125,13 +126,13 @@ func.func private @no_dimensions(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK-SAME: domain: // CHECK-SAME: d0 in [3, 5] // CHECK-SAME: d0 mod 2 in [0, 1] -// CHECK-SAME: is_simplified: false +// CHECK-SAME: is_simplified: false" // CHECK-SAME: > -#map = #xla_gpu.indexing_map<(d0) -> (d0), - domain: - d0 in [3, 5], - d0 mod 2 in [0, 1], - is_simplified: false +#map = #xla_gpu.indexing_map<"(d0) -> (d0)," + "domain:" + "d0 in [3, 5]," + "d0 mod 2 in [0, 1]," + "is_simplified: false" > func.func private @no_symbols(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK-LABEL: @no_symbols @@ -142,7 +143,7 @@ func.func private @no_symbols(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK: #[[$INDEX_MAP:.*]] = #xla_gpu.indexing_map< // CHECK-SAME: () -> () // CHECK-SAME: > -#map = #xla_gpu.indexing_map<() -> ()> +#map = #xla_gpu.indexing_map<"() -> ()"> func.func private @empty(!xla_gpu.indexed_vector<100xf64, #map>) // CHECK-LABEL: @empty // CHECK: !xla_gpu.indexed_vector<100xf64, #[[$INDEX_MAP]]> @@ -151,7 +152,8 @@ func.func private @empty(!xla_gpu.indexed_vector<100xf64, #map>) func.func private @tensor_layout( %in0: tensor<42xf32, #xla_gpu.layout<"shmem", - (d0) -> (), domain: d0 in [0, 42], is_simplified: true>>) -// CHECK: #layout = #xla_gpu.layout<"shmem", (d0) -> (), -// CHECK-SAME: domain: d0 in [0, 42], is_simplified: true> -// CHECK: tensor<42xf32, #layout> \ No newline at end of file + "(d0) -> ()," + "domain: d0 in [0, 42], is_simplified: true">>) +// CHECK: #layout = #xla_gpu.layout<"shmem", "(d0) -> (), +// CHECK-SAME: domain: d0 in [0, 42], is_simplified: true"> +// CHECK: tensor<42xf32, #layout> diff --git a/xla/service/gpu/fusions/ir/tests/canonicalize.mlir b/xla/service/gpu/fusions/ir/tests/canonicalize.mlir index 495456a5ab36d..bfca90e5c64f5 100644 --- a/xla/service/gpu/fusions/ir/tests/canonicalize.mlir +++ b/xla/service/gpu/fusions/ir/tests/canonicalize.mlir @@ -1,15 +1,13 @@ // RUN: mlir_fusions_opt %s --split-input-file -canonicalize | FileCheck %s -#map0 = #xla_gpu.indexing_map<()[s0, s1] -> (1 + s0 + s1 mod 3 - s1, s0 mod 2), - domain: s0 in [-10, 10], s1 in [0, 2], - is_simplified: false> +#map0 = #xla_gpu.indexing_map<"()[s0, s1] -> (1 + s0 + s1 mod 3 - s1, s0 mod 2), domain: s0 in [-10, 10], s1 in [0, 2], is_simplified: false"> func.func @simplify_apply_indexing(%s0: index, %s1: index) -> (index, index) { %0:2 = xla_gpu.apply_indexing #map0 [%s0, %s1] func.return %0#0, %0#1 : index, index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1, d0 mod 2), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1, d0 mod 2), // CHECK-SAME: domain: d0 in [-10, 10] -// CHECK-SAME: is_simplified: true> +// CHECK-SAME: is_simplified: true"> // CHECK-LABEL: func.func @simplify_apply_indexing // CHECK-SAME: %[[ARG_0:.*]]: index, %[[ARG_1:.*]]: index) @@ -17,14 +15,13 @@ func.func @simplify_apply_indexing(%s0: index, %s1: index) -> (index, index) { // ----- -#map0 = #xla_gpu.indexing_map<(d0, d1, d2)[s0, s1] -> (1 + s0 + s1 mod 4 - s1, s0 mod 2, d0 + d2), - domain: d0 in [0, 1], d1 in [0, 2], d2 in [0, 3], s0 in [-11, 11], s1 in [0, 3], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0, d1, d2)[s0, s1] -> (1 + s0 + s1 mod 4 - s1, s0 mod 2, d0 + d2), domain: d0 in [0, 1], d1 in [0, 2], d2 in [0, 3], s0 in [-11, 11], s1 in [0, 3], is_simplified: false"> func.func @simplify_apply_indexing_remove_dims(%d0: index, %d1: index, %d2: index, %s0: index, %s1: index) -> (index, index, index) { %0:3 = xla_gpu.apply_indexing #map0(%d0, %d1, %d2)[%s0, %s1] func.return %0#0, %0#1, %0#2 : index, index, index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1, d2) -> (d2 + 1, d2 mod 2, d0 + d1), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d2 + 1, d2 mod 2, d0 + d1), // CHECK-SAME: domain: d0 in [0, 1], d1 in [0, 3], d2 in [-11, 11] // CHECK-LABEL: func.func @simplify_apply_indexing_remove_dims @@ -38,23 +35,22 @@ func.func @simplify_apply_indexing_remove_dims(%d0: index, %d1: index, // ----- -#map0 = #xla_gpu.indexing_map<(d0) -> (d0 mod 10), domain: d0 in [0, 9], is_simplified: true> +#map0 = #xla_gpu.indexing_map<"(d0) -> (d0 mod 10), domain: d0 in [0, 9], is_simplified: true"> func.func @do_not_simplify_if_is_simplified_is_true(%d0: index) -> (index) { %0 = xla_gpu.apply_indexing #map0(%d0) func.return %0 : index } -// CHECK: #xla_gpu.indexing_map<(d0) -> (d0 mod 10) +// CHECK: #xla_gpu.indexing_map<"(d0) -> (d0 mod 10) // ----- -#map0 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d0 + s0, 4, d1, 1, s0), - domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0 + s0, 4, d1, 1, s0), domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false"> func.func @fold_indexing_map_results(%d0: index, %d1: index, %s0: index) -> (index, index, index, index, index) { %0:5 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0] func.return %0#0, %0#1, %0#2, %0#3, %0#4 : index, index, index, index, index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), // CHECK-LABEL: func.func @fold_indexing_map_results // CHECK-SAME: %[[ARG_0:.*]]: index, %[[ARG_1:.*]]: index, %[[ARG_2:.*]]: index) @@ -67,13 +63,13 @@ func.func @fold_indexing_map_results(%d0: index, %d1: index, %s0: index) // ----- -#map0 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d0 + s0, s0 + 4, d1 mod 2, 1 + d1, s0), - domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0 + s0, s0 + 4, d1 mod 2, 1 + d1, s0)," + "domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false"> func.func @remove_unused_results(%d0: index, %d1: index, %s0: index) -> (index) { %0:5 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0] func.return %0#2 : index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 mod 2), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 mod 2), // CHECK-SAME: domain: d0 in [0, 2] // CHECK-LABEL: func.func @remove_unused_results @@ -84,8 +80,9 @@ func.func @remove_unused_results(%d0: index, %d1: index, %s0: index) -> (index) // ----- -#map0 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + d1 + s0 + s1 mod 3), - domain: d0 in [0, 10], d1 in [0, 5], s0 in [-10, 10], s1 in [0, 4], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + d1 + s0 + s1 mod 3)," + "domain: d0 in [0, 10], d1 in [0, 5], s0 in [-10, 10], s1 in [0, 4]," + "is_simplified: false"> func.func @fold_operands(%d0: index) -> index { %d1 = arith.constant 1 : index %s0 = arith.constant 2 : index @@ -93,7 +90,7 @@ func.func @fold_operands(%d0: index) -> index { %0 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0, %s1] func.return %0 : index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 3), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 3), // CHECK-SAME: domain: d0 in [0, 10] // CHECK-LABEL: func.func @fold_operands @@ -104,8 +101,8 @@ func.func @fold_operands(%d0: index) -> index { func.func @fold_operands_and_results(%arg0: index, %arg1: index) -> (index, index) { - %0:2 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (0, d1), - domain: d0 in [0, 4], d1 in [0, 5], is_simplified: false>(%arg0, %arg1) + %0:2 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (0, d1)," + "domain: d0 in [0, 4], d1 in [0, 5], is_simplified: false">(%arg0, %arg1) return %0#0, %0#1 : index, index } @@ -117,14 +114,15 @@ func.func @fold_operands_and_results(%arg0: index, %arg1: index) // ----- func.func @fold_sequence(%arg0: index, %arg1: index) -> index { - %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false>(%arg0, %arg1) - %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 mod 100 + 42), - domain: d0 in [0, 10000], is_simplified: false>(%0) + %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map< + "(d0, d1) -> (d0 + d1), domain: d0 in [0, 5], d1 in [0, 4]," + "is_simplified: false">(%arg0, %arg1) + %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 mod 100 + 42)," + "domain: d0 in [0, 10000], is_simplified: false">(%0) func.return %1 : index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 + 42), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 + 42), // CHECK-SAME: domain: d0 in [0, 5], d1 in [0, 4] // CHECK-LABEL: func.func @fold_sequence // CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index) @@ -134,14 +132,15 @@ func.func @fold_sequence(%arg0: index, %arg1: index) -> index { // ----- func.func @fold_sequence_sym(%arg0: index, %arg1: index) -> index { - %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false>(%arg0, %arg1) - %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<()[s0] -> (s0 mod 100 + 42), - domain: s0 in [0, 10000], is_simplified: false>(%0) + %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), " + "domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false">(%arg0, %arg1) + %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map< + "()[s0] -> (s0 mod 100 + 42), domain: s0 in [0, 10000]," + "is_simplified: false">(%0) func.return %1 : index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 + 42), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 + 42), // CHECK-SAME: domain: d0 in [0, 5], d1 in [0, 4] // CHECK-LABEL: func.func @fold_sequence_sym // CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index) @@ -150,12 +149,11 @@ func.func @fold_sequence_sym(%arg0: index, %arg1: index) -> index { // ----- -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0 + 8512), - domain: d0 in [0, 1], d1 in [0, 607], is_simplified: false> -#indexing_map2 = #xla_gpu.indexing_map< - (d0, d1, d2) -> (((d1 floordiv 32 + 1) mod 3) * 64 - + (d1 mod 32) * 2 + (d0 floordiv 192) * 192 + d2), - domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0 + 8512)," + "domain: d0 in [0, 1], d1 in [0, 607], is_simplified: false"> +#indexing_map2 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (" + "((d1 floordiv 32 + 1) mod 3) * 64 + (d1 mod 32) * 2 + (d0 floordiv 192) * 192 + d2)," + "domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false"> func.func @fold_sequence_no_simplification_needed(%i: index) -> index { %thread_id_x = gpu.thread_id x {xla.range = [0 : index, 607 : index]} @@ -168,12 +166,12 @@ func.func @fold_sequence_no_simplification_needed(%i: index) -> index { // ----- -#indexing_map1 = #xla_gpu.indexing_map<(d0) -> (3 * d0), - domain: d0 in [0, 9407], is_simplified: false> -#indexing_map2 = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 floordiv 32 + 1), - domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false> -#indexing_map3 = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 floordiv 32 + 2), - domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false> +#indexing_map1 = #xla_gpu.indexing_map< + "(d0) -> (3 * d0), domain: d0 in [0, 9407], is_simplified: false"> +#indexing_map2 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 floordiv 32 + 1)," + "domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false"> +#indexing_map3 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 floordiv 32 + 2)," + "domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false"> func.func @no_fold_when_producer_has_two_users(%i: index) -> (index, index) { %thread_id_x = gpu.thread_id x {xla.range = [0 : index, 607 : index]} @@ -187,14 +185,14 @@ func.func @no_fold_when_producer_has_two_users(%i: index) -> (index, index) { // ----- func.func @fold_sequence_shared_operands(%arg0: index, %arg1: index) -> index { - %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false>(%arg0, %arg1) - %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - domain: d0 in [0, 4], d1 in [0, 10000], is_simplified: false>(%arg1, %0) + %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1)," + "domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false">(%arg0, %arg1) + %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1)," + "domain: d0 in [0, 4], d1 in [0, 10000], is_simplified: false">(%arg1, %0) func.return %1 : index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1), // CHECK-SAME: domain: d0 in [0, 4], d1 in [0, 5] // CHECK-LABEL: func.func @fold_sequence_shared_operands // CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index) @@ -235,15 +233,15 @@ func.func @atomic_rmw_cst(%in: tensor<2x3xf32>, %i: index, %j: index) // ----- -#map0 = #xla_gpu.indexing_map<(d0)[s0] -> (2 * d0 * s0), - domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0)[s0] -> (2 * d0 * s0)," + "domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false"> func.func @apply_indexing_move_syms_to_dims(%dim0: index, %sym0: index) -> index { %0 = xla_gpu.apply_indexing #map0(%dim0)[%sym0] func.return %0 : index } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> ((d0 * d1) * 2), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> ((d0 * d1) * 2), // CHECK-SAME: domain: d0 in [0, 3], d1 in [0, 2] // CHECK-LABEL: func.func @apply_indexing_move_syms_to_dims // CHECK-NEXT: xla_gpu.apply_indexing #[[$MAP]] @@ -251,8 +249,10 @@ func.func @apply_indexing_move_syms_to_dims(%dim0: index, %sym0: index) // // ----- -#map0 = #xla_gpu.indexing_map<(d0) -> (4 * d0), domain: d0 in [0, 3], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0) -> (4 * d0), domain: d0 in [0, 3]," + "is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1)," + "domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @loop_of_apply_indexing(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) { %idx = xla_gpu.apply_indexing #map0(%dim) %sum = xla_gpu.loop (%idx)[%i, %j] -> (%r0, %r1) in #map1 iter_args(%sum_ = %init) -> (f32) { @@ -263,7 +263,7 @@ func.func @loop_of_apply_indexing(%input: tensor<1024x32xf32>, %init: f32, %dim: func.return %sum : f32 } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 * 4 + s0, s1), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 * 4 + s0, s1), // CHECK-SAME: domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32] // CHECK-LABEL: func.func @loop_of_apply_indexing // CHECK-SAME: %[[ARG0:.*]]: tensor<1024x32xf32>, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: index) @@ -272,8 +272,10 @@ func.func @loop_of_apply_indexing(%input: tensor<1024x32xf32>, %init: f32, %dim: // ----- -#map0 = #xla_gpu.indexing_map<(d0)[s0] -> (2 * d0 * s0), domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0 + s1), domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map0 = #xla_gpu.indexing_map<"(d0)[s0] -> (2 * d0 * s0)," + "domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0 + s1)," + "domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @loop_of_apply_indexing_with_syms(%dim0: index, %sym0: index, %input: tensor<1024x32xf32>, %init: f32) -> (f32) { %0 = xla_gpu.apply_indexing #map0(%dim0)[%sym0] %sum = xla_gpu.loop (%0)[%i, %j] -> (%r0) in #map1 iter_args(%sum_ = %init) -> (f32) { @@ -284,7 +286,7 @@ func.func @loop_of_apply_indexing_with_syms(%dim0: index, %sym0: index, %input: func.return %sum : f32 } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> ((d0 * d1) * 2 + s0 + s1), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> ((d0 * d1) * 2 + s0 + s1), // CHECK-SAME: domain: d0 in [0, 3], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32] // CHECK-LABEL: func.func @loop_of_apply_indexing_with_syms // CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index diff --git a/xla/service/gpu/fusions/ir/tests/invalid.mlir b/xla/service/gpu/fusions/ir/tests/invalid.mlir index 922b3f3bbfff0..3c50b5afcd806 100644 --- a/xla/service/gpu/fusions/ir/tests/invalid.mlir +++ b/xla/service/gpu/fusions/ir/tests/invalid.mlir @@ -1,13 +1,6 @@ // RUN: mlir_fusions_opt %s -split-input-file -verify-diagnostics -#map0 = #xla_gpu.indexing_map< - (d0, d1)[s0] -> (d0, d1 + s0), - domain: - d0 in [1, 2], - d1 in [5, 8], - s0 in [0, 32], - is_simplified: false -> +#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0, d1 + s0), domain: d0 in [1, 2], d1 in [5, 8], s0 in [0, 32], is_simplified: false"> func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index) { // expected-error @+1 {{operand count must match the number of dimensions and symbols in the affine map}} %0:2 = xla_gpu.apply_indexing #map0 (%d0) @@ -16,16 +9,7 @@ func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index) // ----- -#map0 = #xla_gpu.indexing_map< - (d0, d1)[s0] -> (d0, d1 + s0), - domain: - d0 in [1, 2], - d1 in [5, 8], - s0 in [0, 32], - d0 mod 2 in [0, 1], - d0 + s0 in [1, 10], - is_simplified: false -> +#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0, d1 + s0), domain: d0 in [1, 2], d1 in [5, 8], s0 in [0, 32], d0 mod 2 in [0, 1], d0 + s0 in [1, 10], is_simplified: false"> func.func @cannot_have_constraints(%d0: index, %d1: index, %s0: index) -> (index, index) { // expected-error @+1 {{apply indexing op cannot have any constraints}} %0:2 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0] @@ -34,7 +18,7 @@ func.func @cannot_have_constraints(%d0: index, %d1: index, %s0: index) -> (index // ----- -#map = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @loop_result_num_mismatch(%input: tensor<1024x32xf32>, %init: f32) -> (f32) { // expected-error @+1 {{mismatch in number of loop-carried values and results}} @@ -52,7 +36,7 @@ func.func @loop_result_num_mismatch(%input: tensor<1024x32xf32>, // ----- -#map = #xla_gpu.indexing_map<()[s0] -> (s0, s0), domain: s0 in [0, 1024], is_simplified: false> +#map = #xla_gpu.indexing_map<"()[s0] -> (s0, s0), domain: s0 in [0, 1024], is_simplified: false"> func.func @loop_iv_num_mismatch(%input: tensor<1024x32xf32>, %init: f32) -> (f32) { // expected-error @+1 {{mismatch in number of induction variables 2 and RangeVars}} @@ -70,8 +54,7 @@ func.func @loop_iv_num_mismatch(%input: tensor<1024x32xf32>, // ----- -#map = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1), - domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @loop_types_mismatch(%input: tensor<1024x32xf32>, %init: f32) -> (i32) { // expected-error @+1 {{block iter arg type = 'f32', result type = 'i32' and init operand type = 'f32' should match}} @@ -89,8 +72,7 @@ func.func @loop_types_mismatch(%input: tensor<1024x32xf32>, %init: f32) -> (i32) // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, s1), - domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) { // expected-error @+1 {{mismatch in number of dims operands 0 and DimVars in the indexing map}} @@ -105,9 +87,7 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32 // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @indicies_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map> { @@ -119,10 +99,8 @@ func.func @indicies_mismatch(%input: tensor<32x64xf32>, %thread_id: index, // ----- -#map = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1), - domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @no_thread_id_in(%input: tensor<32x64xf32>, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { @@ -134,10 +112,8 @@ func.func @no_thread_id_in(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> -#map1 = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1), - domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @no_thread_id_out(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { @@ -149,10 +125,8 @@ func.func @no_thread_id_out(%input: tensor<32x64xf32>, %thread_id: index, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 64], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 64], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @thread_id_bounds_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { // expected-error @+1 {{thread_id dimension must have the same bounds in both indexing maps}} %0 = xla_gpu.materialize @exp(%input) at #map(%thread_id) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> @@ -161,11 +135,8 @@ func.func @thread_id_bounds_mismatch(%input: tensor<32x64xf32>, %thread_id: inde // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], d0 + s0 in [0, 1024], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], d0 + s0 in [0, 1024], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @thread_id_constraints_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) @@ -178,10 +149,8 @@ func.func @thread_id_constraints_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 + s0, s0), - domain: d0 in [0, 32], s0 in [0, 1024], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 + s0, s0), domain: d0 in [0, 32], s0 in [0, 1024], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @symbol_count_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { // expected-error @+1 {{number of symbols in both indexing_maps must match}} %0 = xla_gpu.materialize @exp(%input) at #map(%thread_id) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> @@ -190,10 +159,8 @@ func.func @symbol_count_mismatch(%input: tensor<32x64xf32>, %thread_id: index, % // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @symbol_domain_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { // expected-error @+1 {{domain of symbols of indexing_maps must match}} %0 = xla_gpu.materialize @exp(%input) at #map(%thread_id) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> @@ -202,12 +169,8 @@ func.func @symbol_domain_mismatch(%input: tensor<32x64xf32>, %thread_id: index, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], is_simplified: false"> func.func @symbol_constraints_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { @@ -219,12 +182,8 @@ func.func @symbol_constraints_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 mod 2 in [0, 0], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 mod 2 in [0, 0], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], is_simplified: false"> func.func @symbol_constraint_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) @@ -236,12 +195,8 @@ func.func @symbol_constraint_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], is_simplified: false"> func.func @symbol_constraint_interval_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) @@ -254,12 +209,8 @@ func.func @symbol_constraint_interval_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1), - domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], is_simplified: false"> func.func @vector_mapping_depends_on_block_id(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> { @@ -271,13 +222,8 @@ func.func @vector_mapping_depends_on_block_id(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], - d1 mod 2 in [0, 0], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 2 in [0, 0], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false"> func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %block_id: index, %output: tensor<32x64xf32>) @@ -290,13 +236,8 @@ func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], - d1 mod 2 in [0, 0], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 2 in [0, 0], is_simplified: false"> func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %block_id: index, %output: tensor<32x64xf32>) @@ -309,14 +250,8 @@ func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], - d1 mod 2 in [0, 0], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1), - domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], - d1 mod 4 in [0, 0], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 2 in [0, 0], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 4 in [0, 0], is_simplified: false"> func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %block_id: index, %output: tensor<32x64xf32>) @@ -329,12 +264,8 @@ func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>, // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d0 mod 16 + s0, d1), - domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0 mod 16 + s0, d1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], is_simplified: false"> func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> { @@ -346,12 +277,8 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 mod 16, d1, d2), - domain: d0 in [0, 32], d1 in [0, 2], d2 in [0, 5], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 mod 16, d1, d2), domain: d0 in [0, 32], d1 in [0, 2], d2 in [0, 5], is_simplified: false"> func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> { diff --git a/xla/service/gpu/fusions/ir/tests/ops.mlir b/xla/service/gpu/fusions/ir/tests/ops.mlir index 572202bf148ce..81e08968db759 100644 --- a/xla/service/gpu/fusions/ir/tests/ops.mlir +++ b/xla/service/gpu/fusions/ir/tests/ops.mlir @@ -56,19 +56,13 @@ func.func @caller(%a: f32, %b: f32) -> f32 { // ----- -#map0 = #xla_gpu.indexing_map< -(d0, d1)[s0] -> (d0, d1 + s0), - domain: - d0 in [1, 2], - d1 in [5, 8], - s0 in [0, 32], - is_simplified: false -> +#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0, d1 + s0)," + "domain: d0 in [1, 2], d1 in [5, 8], s0 in [0, 32], is_simplified: false"> func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index) { %0:2 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0] func.return %0#0, %0#1 : index, index } -// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map< +// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<" // CHECK-SAME: (d0, d1)[s0] -> (d0, d1 + s0) // CHECK-SAME: domain: // CHECK-SAME: d0 in [1, 2] @@ -83,18 +77,13 @@ func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index) // ----- -#map0 = #xla_gpu.indexing_map< -(d0, d1) -> (d0, d1), - domain: - d0 in [0, 2], - d1 in [1, 3], - is_simplified: false -> +#map0 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1)," + "domain: d0 in [0, 2], d1 in [1, 3], is_simplified: false"> func.func @apply_indexing_no_symbols(%d0: index, %d1: index) -> (index, index) { %0:2 = xla_gpu.apply_indexing #map0 (%d0, %d1) func.return %0#0, %0#1 : index, index } -// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map< +// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<" // CHECK-SAME: (d0, d1) -> (d0, d1) // CHECK-SAME: domain: // CHECK-SAME: d0 in [0, 2] @@ -108,17 +97,13 @@ func.func @apply_indexing_no_symbols(%d0: index, %d1: index) -> (index, index) { // ----- -#map0 = #xla_gpu.indexing_map< - ()[s0] -> (s0, s0), - domain: - s0 in [2, 4], - is_simplified: false -> +#map0 = #xla_gpu.indexing_map<"()[s0] -> (s0, s0)," + "domain: s0 in [2, 4], is_simplified: false"> func.func @apply_indexing_no_dims(%s0: index) -> (index, index) { %0:2 = xla_gpu.apply_indexing #map0 [%s0] func.return %0#0, %0#1 : index, index } -// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map< +// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<" // CHECK-SAME: ()[s0] -> (s0, s0) // CHECK-SAME: domain: // CHECK-SAME: s0 in [2, 4] @@ -130,8 +115,8 @@ func.func @apply_indexing_no_dims(%s0: index) -> (index, index) { // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, s1), - domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), " + "domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) { %sum = xla_gpu.loop (%dim)[%i, %j] -> (%r0, %r1) @@ -155,15 +140,12 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, func.func private @exp(%p0: tensor<32x64xf32>, %i: index, %j: index) -> f32 -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1), - domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (s0, s1), - domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], - is_simplified: false> -#map2 = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1), - domain: d0 in [0, 32], d1 in [0, 2], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1)," + "domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (s0, s1)," + "domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], is_simplified: false"> +#map2 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1)," + "domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false"> func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> { @@ -174,11 +156,11 @@ func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index, func.return %1 : tensor<32x64xf32> } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1) +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1) // CHECK-SAME: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32] -// CHECK: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (s0, s1) +// CHECK: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (s0, s1) // CHECK-SAME: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32] -// CHECK: #[[$MAP2:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1) +// CHECK: #[[$MAP2:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1) // CHECK-SAME: d0 in [0, 32], d1 in [0, 2], // CHECK-LABEL: @materialize_and_insert // CHECK: %[[MATERIALIZED:.*]] = xla_gpu.materialize @exp(%{{.*}}) at @@ -233,13 +215,14 @@ func.func @reduce_middle_dim(%in: tensor<16x8x4xf32>, %init: f32) // ----- -#map = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1), domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1)," + "domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false"> func.func @reindex(%in0: tensor<1024xf32>) -> tensor<16x64xf32> { %0 = xla_gpu.reindex %in0 at #map : tensor<1024xf32> -> tensor<16x64xf32> func.return %0 : tensor<16x64xf32> } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1) +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1) // CHECK-LABEL: func.func @reindex( // CHECK-SAME: %[[IN1:.*]]: tensor<1024xf32> // CHECK: xla_gpu.reindex %[[IN1]] at #[[$MAP]] : @@ -247,7 +230,8 @@ func.func @reindex(%in0: tensor<1024xf32>) -> tensor<16x64xf32> { // ----- -#map = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1), domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1)," + "domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false"> func.func @reindex_pad(%in0: tensor<1022xf32>) -> tensor<16x64xf32> { %c0 = arith.constant 0.0 : f32 %0 = xla_gpu.reindex %in0 at #map default %c0 @@ -255,7 +239,7 @@ func.func @reindex_pad(%in0: tensor<1022xf32>) -> tensor<16x64xf32> { func.return %0 : tensor<16x64xf32> } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1) +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1) // CHECK-LABEL: func.func @reindex_pad( // CHECK-SAME: %[[IN1:.*]]: tensor<1022xf32> // CHECK: %[[C0:.*]] = arith.constant 0.00 @@ -278,4 +262,4 @@ func.func @shuffler(%a: f32, %b: i32) -> (f32, i32) { // CHECK: xla_gpu.shuffle_reduce(%[[IN1]], %[[IN2]]) to 4 // CHECK-SAME: combiner=@do_nothing {xla.range = [0 : index, 42 : index]} -// CHECK-SAME: : f32, i32 \ No newline at end of file +// CHECK-SAME: : f32, i32 diff --git a/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc b/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc index cb9ba368702c9..577ec1262970c 100644 --- a/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc +++ b/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc @@ -14,10 +14,10 @@ limitations under the License. ==============================================================================*/ #include +#include #include #include -#include "absl/strings/str_format.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TypeSwitch.h" // IWYU pragma: keep #include "llvm/Support/LogicalResult.h" @@ -30,6 +30,7 @@ limitations under the License. #include "mlir/Support/LLVM.h" #include "xla/service/gpu/fusions/ir/xla_gpu_ops.h" #include "xla/service/gpu/model/indexing_map.h" +#include "xla/service/gpu/model/indexing_map_serialization.h" namespace xla { namespace gpu { @@ -43,144 +44,36 @@ using mlir::AsmPrinter; using mlir::failure; using mlir::success; -constexpr llvm::StringRef kIsSimplifiedKeyword = "is_simplified"; - -ParseResult ParseInterval(AsmParser& parser, Interval& interval) { - // ParseResult converts to `true` if parsing failed. - return failure(parser.parseLSquare() || parser.parseInteger(interval.lower) || - parser.parseComma() || parser.parseInteger(interval.upper) || - parser.parseRSquare()); -} - -ParseResult parseBool(AsmParser& parser, bool* result) { - if (succeeded(parser.parseOptionalKeyword("true"))) { - *result = true; - return success(); +// Parses a chain of string attributes into an indexing map. +// Example: +// "()[s0, s1] -> (1 + s0 + s1 mod 3 - s1, s0 mod 2)," +// " domain: s0 in [-10, 10], s1 in [0, 2]," +// " is_simplified: false" +// will be parsed as 3 StringAttrs, concatenated into a single string, and then +// parsed into an IndexingMap. +std::optional parseChainOfStringsAsIndexingMap( + mlir::AsmParser& parser) { + mlir::StringAttr indexing_map_attr; + std::string indexing_map_str; + while (parser.parseOptionalAttribute(indexing_map_attr).has_value()) { + indexing_map_str.append(indexing_map_attr.getValue()); } - if (succeeded(parser.parseOptionalKeyword("false"))) { - *result = false; - return success(); - } - return failure(); -} - -void PrintDimVars(AsmPrinter& p, ArrayRef dim_vars) { - for (const auto [index, dim_var] : llvm::enumerate(dim_vars)) { - p << "d" << index << " in " << dim_var.bounds << ", "; - } -} - -ParseResult ParseDimVars(AsmParser& parser, ArrayRef dim_names, - SmallVector& dim_vars) { - dim_vars.reserve(dim_names.size()); - for (const auto& [index, dim_name] : llvm::enumerate(dim_names)) { - if (parser.parseKeyword(dim_name) || parser.parseKeyword("in") || - ParseInterval(parser, dim_vars.emplace_back().bounds) || - parser.parseComma()) { - return failure(); - } - } - return success(); -} - -void PrintRangeVars(AsmPrinter& p, ArrayRef range_vars) { - for (const auto [index, range_var] : llvm::enumerate(range_vars)) { - p << "s" << index << " in " << range_var.range << ", "; - } -} - -ParseResult ParseRangeVars(AsmParser& parser, - ArrayRef range_symbol_names, - SmallVector& range_vars) { - range_vars.reserve(range_symbol_names.size()); - for (const auto& [index, range_symbol_name] : - llvm::enumerate(range_symbol_names)) { - if (parser.parseKeyword(range_symbol_name) || parser.parseKeyword("in") || - ParseInterval(parser, range_vars.emplace_back().range) || - parser.parseComma()) { - return failure(); - } - } - return success(); -} - -void PrintConstraints(AsmPrinter& p, - ArrayRef> constraints) { - for (const auto& [expr, interval] : constraints) { - p << expr << " in " << interval << ", "; - } -} - -mlir::Attribute parseIndexingMapImpl(mlir::AsmParser& parser) { - mlir::AffineMap map; - if (parser.parseAffineMap(map)) { - return {}; - } - - // Store real strings to back up StringRef throughout ParseConstraints. - SmallVector dim_strings(map.getNumDims()); - SmallVector symbol_strings(map.getNumSymbols()); - SmallVector> symbolSet; - symbolSet.reserve(map.getNumDims() + map.getNumSymbols()); - for (int i = 0; i < map.getNumDims(); ++i) { - dim_strings[i] = absl::StrFormat("d%d", i); - symbolSet.push_back( - {dim_strings[i], mlir::getAffineDimExpr(i, parser.getContext())}); - } - for (int i = 0; i < map.getNumSymbols(); ++i) { - symbol_strings[i] = absl::StrFormat("s%d", i); - symbolSet.push_back( - {symbol_strings[i], mlir::getAffineSymbolExpr(i, parser.getContext())}); - } - if (map.getNumDims() + map.getNumSymbols() == 0) { - if (parser.parseGreater()) return {}; - return IndexingMapAttr::get(parser.getContext(), map, /*dim_vars=*/{}, - /*range_vars=*/{}, - /*constraints=*/{}, /*is_simplified=*/true); - } - if (parser.parseComma() || parser.parseKeyword("domain") || - parser.parseColon()) { - return {}; - } - - SmallVector dim_vars; - if (ParseDimVars(parser, dim_strings, dim_vars)) { - return {}; - } - SmallVector range_vars; - if (ParseRangeVars(parser, symbol_strings, range_vars)) { - return {}; - } - - SmallVector> constraints; - while (failed(parser.parseOptionalKeyword(kIsSimplifiedKeyword))) { - auto& constraint = constraints.emplace_back(); - if (parser.parseAffineExpr(symbolSet, constraint.first) || - parser.parseKeyword("in") || ParseInterval(parser, constraint.second) || - parser.parseComma()) { - return {}; - } - constraints.push_back(constraint); - } - - bool is_simplified = false; - if (parser.parseColon() || parseBool(parser, &is_simplified) || - parser.parseGreater()) { - return {}; - } - return IndexingMapAttr::get(parser.getContext(), map, dim_vars, range_vars, - constraints, is_simplified); + return ParseIndexingMap(indexing_map_str, parser.getContext()); } mlir::Attribute IndexingMapAttr::parse(mlir::AsmParser& parser, mlir::Type) { if (parser.parseLess()) { return {}; } - return parseIndexingMapImpl(parser); + auto indexing_map = parseChainOfStringsAsIndexingMap(parser); + if (!indexing_map.has_value() || parser.parseGreater()) { + return {}; + } + return IndexingMapAttr::get(parser.getContext(), *indexing_map); } void IndexingMapAttr::print(mlir::AsmPrinter& printer) const { - printer << "<" << getIndexingMap().ToString() << ">"; + printer << "<\"" << getIndexingMap().ToString() << "\">"; } IndexingMapAttr IndexingMapAttr::get(mlir::MLIRContext* context, @@ -230,18 +123,19 @@ mlir::Attribute LayoutAttr::parse(mlir::AsmParser& parser, mlir::Type) { if (!memspace.has_value()) { return {}; } - auto thread_map = mlir::cast(parseIndexingMapImpl(parser)); - if (!thread_map) { + std::optional indexing_map = + parseChainOfStringsAsIndexingMap(parser); + if (!indexing_map.has_value() || parser.parseGreater()) { return {}; } - mlir::MLIRContext* context = parser.getContext(); - auto memory_space_attr = MemorySpaceAttr::get(context, *memspace); - return LayoutAttr::get(context, memory_space_attr, thread_map); + auto* context = parser.getContext(); + return LayoutAttr::get(context, MemorySpaceAttr::get(context, *memspace), + IndexingMapAttr::get(context, *indexing_map)); } void LayoutAttr::print(mlir::AsmPrinter& printer) const { printer << "<\"" << stringifyMemorySpace(getMemorySpace().getValue()) - << "\", " << getThreadMap().getIndexingMap().ToString() << '>'; + << "\", \"" << getThreadMap().getIndexingMap().ToString() << "\">"; } } // namespace gpu diff --git a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc index e683e199ed03c..5c87db0045dac 100644 --- a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc +++ b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc @@ -234,10 +234,10 @@ TEST_F(ElementalHloToMlirTest, ReduceWindow) { // CHECK: %[[INIT:.*]] = tensor.extract %[[ARG1]][] // CHECK: %[[RET:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[C7]] // CHECK-SAME: step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) - // CHECK: %[[J0:.*]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 * 4), domain: d0 in [0, 2], is_simplified: true>(%[[Y]]) + // CHECK: %[[J0:.*]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 * 4), domain: d0 in [0, 2], is_simplified: true">(%[[Y]]) // CHECK: %[[J1:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 - 3), - // CHECK-SAME: d0 in [0, 7], d1 in [0, 6], is_simplified: true>(%[[Z]], %[[I]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 - 3), + // CHECK-SAME: d0 in [0, 7], d1 in [0, 6], is_simplified: true">(%[[Z]], %[[I]]) // CHECK: %[[VAL:.*]] = tensor.extract %[[ARG0]] // CHECK-SAME: [%[[X]], %[[J0]], %[[J1]]] // CHECK: %[[UPD:.*]] = func.call @add_sum(%[[ACC]], @@ -284,8 +284,8 @@ TEST_F(ElementalHloToMlirTest, ReduceWindowWithRescaling) { // If symbol rescaling wasn't working we would have a // `d1 floordiv ` in the map: // CHECK: %[[K:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1), - // CHECK-SAME: d0 in [0, 18], d1 in [0, 3], is_simplified: true>(%[[X]], %[[I]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1), + // CHECK-SAME: d0 in [0, 18], d1 in [0, 3], is_simplified: true">(%[[X]], %[[I]]) // CHECK: tensor.extract %[[ARG0]][%[[K]], %[[Y]], %[[Z]]] )")); @@ -505,7 +505,7 @@ TEST_F(ElementalHloToMlirTest, Pad) { // CHECK-DAG: %[[C4:.*]] = arith.constant 4 // CHECK-DAG: %[[C7:.*]] = arith.constant 7 // CHECK: %[[CONSTRAINT_VAL:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: <(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]]) + // CHECK-SAME: <"(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]]) // CHECK: %[[CONSTRAINT:.*]] = arith.cmpi eq, %[[CONSTRAINT_VAL]], %[[C0]] // CHECK-DAG: %[[X_L:.*]] = arith.cmpi sge, %[[X]], %[[C1]] // CHECK-DAG: %[[X_H:.*]] = arith.cmpi sle, %[[X]], %[[C7]] @@ -517,9 +517,9 @@ TEST_F(ElementalHloToMlirTest, Pad) { // CHECK: %[[FROM_INPUT:.*]] = arith.andi %[[X_AND_CONSTRAINT]], %[[Y_BOUNDS]] // CHECK: %[[RET:.*]] = scf.if %[[FROM_INPUT]] // CHECK: %[[IN0:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: <(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]]) + // CHECK-SAME: <"(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]]) // CHECK: %[[IN1:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: <(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true>(%[[Y]]) + // CHECK-SAME: <"(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true">(%[[Y]]) // CHECK: %[[VAL:.*]] = tensor.extract %[[ARG0]][%[[IN0]], %[[IN1]]] // CHECK: scf.yield %[[VAL]] // CHECK: } else { @@ -547,7 +547,7 @@ TEST_F(ElementalHloToMlirTest, PadUnsigned) { // CHECK-DAG: %[[C4:.*]] = arith.constant 4 // CHECK-DAG: %[[C7:.*]] = arith.constant 7 // CHECK: %[[CONSTRAINT_VAL:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: <(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]]) + // CHECK-SAME: <"(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]]) // CHECK: %[[CONSTRAINT:.*]] = arith.cmpi eq, %[[CONSTRAINT_VAL]], %[[C0]] // CHECK-DAG: %[[X_L:.*]] = arith.cmpi sge, %[[X]], %[[C1]] // CHECK-DAG: %[[X_H:.*]] = arith.cmpi sle, %[[X]], %[[C7]] @@ -559,9 +559,9 @@ TEST_F(ElementalHloToMlirTest, PadUnsigned) { // CHECK: %[[FROM_INPUT:.*]] = arith.andi %[[X_AND_CONSTRAINT]], %[[Y_BOUNDS]] // CHECK: %[[RET:.*]] = scf.if %[[FROM_INPUT]] // CHECK: %[[IN0:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: <(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]]) + // CHECK-SAME: <"(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]]) // CHECK: %[[IN1:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: <(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true>(%[[Y]]) + // CHECK-SAME: <"(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true">(%[[Y]]) // CHECK: %[[VAL:.*]] = tensor.extract %[[ARG0]][%[[IN0]], %[[IN1]]] // CHECK: scf.yield %[[VAL]] // CHECK: } else { @@ -878,11 +878,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionSimple) { // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) { // CHECK: %[[R3:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - // CHECK-SAME: d0 in [0, 5], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), + // CHECK-SAME: d0 in [0, 5], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - // CHECK-SAME: d0 in [0, 7], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), + // CHECK-SAME: d0 in [0, 7], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -924,11 +924,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithWindowStrides) { // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) { // CHECK: %[[R3:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1), - // CHECK-SAME: d0 in [0, 2], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1), + // CHECK-SAME: d0 in [0, 2], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1), - // CHECK-SAME: d0 in [0, 3], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1), + // CHECK-SAME: d0 in [0, 3], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -971,21 +971,21 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithPadding) { // CHECK: %[[R0:.+]] = scf.for %[[X:.+]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[A0:.+]] = %[[INIT]]) -> (f32) { // CHECK-NEXT: %[[R1:.+]] = scf.for %[[Y:.+]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[A1:.+]] = %[[A0]]) -> (f32) { // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) { - // CHECK-DAG: %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), domain: d0 in [0, 7], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-DAG: %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), domain: d0 in [0, 7], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK-DAG: %[[TXGE:.+]] = arith.cmpi sge, %[[TESTX]], %[[C1]] : index // CHECK-DAG: %[[TXLE:.+]] = arith.cmpi sle, %[[TESTX]], %[[C8]] : index // CHECK-DAG: %[[TX:.+]] = arith.andi %[[TXGE]], %[[TXLE]] : i1 - // CHECK-DAG: %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), domain: d0 in [0, 11], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-DAG: %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), domain: d0 in [0, 11], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[TYGE:.+]] = arith.cmpi sge, %[[TESTY]], %[[C2]] : index // CHECK-DAG: %[[TYLE:.+]] = arith.cmpi sle, %[[TESTY]], %[[C13]] : index // CHECK-DAG: %[[TY:.+]] = arith.andi %[[TYGE]], %[[TYLE]] : i1 // CHECK: %[[R3:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 - 1), - // CHECK-SAME: d0 in [0, 7], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 - 1), + // CHECK-SAME: d0 in [0, 7], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 - 2), - // CHECK-SAME: d0 in [0, 11], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 - 2), + // CHECK-SAME: d0 in [0, 11], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -1025,17 +1025,17 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithLhsDilation) { // CHECK: %[[R0:.+]] = scf.for %[[X:.+]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[A0:.+]] = %[[INIT]]) -> (f32) { // CHECK-NEXT: %[[R1:.+]] = scf.for %[[Y:.+]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[A1:.+]] = %[[A0]]) -> (f32) { // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) { - // CHECK-DAG: %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 12], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-DAG: %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 12], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK-DAG: %[[TX:.+]] = arith.cmpi eq, %[[TESTX]], %[[C0]] : index - // CHECK-DAG: %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 18], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-DAG: %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 18], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[TY:.+]] = arith.cmpi eq, %[[TESTY]], %[[C0]] : index // CHECK: %[[R3:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) floordiv 2), - // CHECK-SAME: d0 in [0, 12], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) floordiv 2), + // CHECK-SAME: d0 in [0, 12], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) floordiv 2), - // CHECK-SAME: d0 in [0, 18], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) floordiv 2), + // CHECK-SAME: d0 in [0, 18], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -1077,11 +1077,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithRhsDilation) { // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) { // CHECK: %[[R3:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0), - // CHECK-SAME: d0 in [0, 3], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0), + // CHECK-SAME: d0 in [0, 3], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0), - // CHECK-SAME: d0 in [0, 3], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0), + // CHECK-SAME: d0 in [0, 3], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -1123,14 +1123,14 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithFeatureGroupCount) { // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) { // CHECK: %[[R3:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - // CHECK-SAME: d0 in [0, 5], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), + // CHECK-SAME: d0 in [0, 5], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - // CHECK-SAME: d0 in [0, 7], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), + // CHECK-SAME: d0 in [0, 7], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK: %[[XX2:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> ((d0 floordiv 8) * 2 + d1), - // CHECK-SAME: d0 in [0, 15], d1 in [0, 1], is_simplified: true>(%[[O]], %[[I]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> ((d0 floordiv 8) * 2 + d1), + // CHECK-SAME: d0 in [0, 15], d1 in [0, 1], is_simplified: true">(%[[O]], %[[I]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[XX2]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<2x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -1174,11 +1174,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithBatchGroupCount) { // CHECK-NEXT: %[[R3:.+]] = scf.for %[[G:.+]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A2]]) -> (f32) { // CHECK: %[[R4:.+]] = scf.if {{.+}} -> (f32) { // CHECK: %[[XX0:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - // CHECK-SAME: d0 in [0, 5], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), + // CHECK-SAME: d0 in [0, 5], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]]) // CHECK: %[[XX1:.+]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), - // CHECK-SAME: d0 in [0, 7], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), + // CHECK-SAME: d0 in [0, 7], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]]) // CHECK-DAG: %[[VL:.+]] = tensor.extract %[[LHS]][%[[G]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32> // CHECK-DAG: %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32> // CHECK: %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32 @@ -1644,8 +1644,8 @@ TEST_F(ElementalHloToMlirTest, MixedIndexingTuple) { // CHECK-SAME: %[[X:.*]]: index {{{.*}}}, %[[Y:.*]]: index {{{.*}}} // CHECK: %[[A:.*]] = tensor.extract %[[P0]][%[[X]], %[[Y]]] // CHECK: %[[IDX:.*]] = xla_gpu.apply_indexing - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 * 10 + d1), - // CHECK-SAME: d0 in [0, 9], d1 in [0, 9], is_simplified: true>(%[[X]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 10 + d1), + // CHECK-SAME: d0 in [0, 9], d1 in [0, 9], is_simplified: true">(%[[X]], %[[Y]]) // CHECK: %[[B:.*]] = tensor.extract %[[P1]][%[[IDX]]] // CHECK: return %[[A]], %[[B]] )")); @@ -1668,8 +1668,8 @@ TEST_F(ElementalHloToMlirTest, NestedTuple) { // CHECK-SAME: %[[X:.*]]: index {{{.*}}}, %[[Y:.*]]: index {{{.*}}} // CHECK: %[[P0_V:.*]] = xla_gpu.pure_call @main_p0 // CHECK: %[[IDX:.*]] = - // CHECK-SAME: #xla_gpu.indexing_map<(d0, d1) -> (d0 * 10 + d1), - // CHECK-SAME: d0 in [0, 9], d1 in [0, 9], is_simplified: true>(%[[X]], %[[Y]]) + // CHECK-SAME: #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 10 + d1), + // CHECK-SAME: d0 in [0, 9], d1 in [0, 9], is_simplified: true">(%[[X]], %[[Y]]) // CHECK: %[[P1_V:.*]] = xla_gpu.pure_call @main_p1 // CHECK-SAME: (%[[P0]], %[[P1]], %[[IDX]]) // CHECK: return %[[P0_V]], %[[P1_V]], %[[P1_V]], %[[P1_V]], %[[P0_V]] diff --git a/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo b/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo index f99ff371ef38d..5ac91b201c616 100644 --- a/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo +++ b/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo @@ -8,10 +8,10 @@ fusion { param2 = f32[300] parameter(2) ROOT concat = f32[900] concatenate(param0, param1, param2), dimensions={0} } -// CHECK-DAG: #[[MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d1 * 128 + d0) -// CHECK-DAG: #[[LOOPMAP_1:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0) -// CHECK-DAG: #[[LOOPMAP_2:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 200) -// CHECK-DAG: #[[LOOPMAP_3:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 600) +// CHECK-DAG: #[[MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 128 + d0) +// CHECK-DAG: #[[LOOPMAP_1:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0) +// CHECK-DAG: #[[LOOPMAP_2:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 200) +// CHECK-DAG: #[[LOOPMAP_3:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 600) // CHECK: func.func @main // CHECK-SAME: %[[ARG_0:[a-zA-Z0-9]*]]: {{[^,]*}}, diff --git a/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo b/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo index 3b5e454584137..4f93eacbfab93 100644 --- a/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo +++ b/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo @@ -12,8 +12,8 @@ fusion { ROOT tuple = (f64[8], f64[2,4]) tuple(minimum, bc) } -// CHECK: #[[MAJOR:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 4), -// CHECK: #[[MINOR:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 mod 4), +// CHECK: #[[MAJOR:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 4), +// CHECK: #[[MINOR:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 mod 4), // CHECK: xla_gpu.loop ({{.*}})[{{.*}}] -> (%[[RA:.*]]) in // CHECK-DAG: %[[MAJOR_IDX:.*]] = xla_gpu.apply_indexing #[[MAJOR]] diff --git a/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo b/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo index a0663dd88308f..88043829ebc8f 100644 --- a/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo +++ b/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo @@ -24,7 +24,7 @@ scatter { unique_indices=true, to_apply=add } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 2) +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 2) // CHECK-LABEL: func.func @main( // CHECK-SAME: %[[OPERAND:[a-zA-Z0-9]*]]: tensor<10x5xf32> @@ -60,4 +60,4 @@ scatter { // CHECK: %[[COMBINED:.*]] = arith.addf %[[CURRENT]], %[[UPD_ELEM]] // CHECK: %[[UPDATED:.*]] = tensor.insert %[[COMBINED]] // CHECK-SAME: into %{{[a-z0-9]+}}[%{{.*}}, %[[RC]]] : tensor<10x5xf32> -// CHECK: xla_gpu.yield %[[UPDATED]] : tensor<10x5xf32> \ No newline at end of file +// CHECK: xla_gpu.yield %[[UPDATED]] : tensor<10x5xf32> diff --git a/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir b/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir index 1691d3fd748c2..e88324f698d48 100644 --- a/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir +++ b/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir @@ -8,7 +8,7 @@ func.func @tensor_extract( : tensor<2x3xf32, dense<[0, 1]> : tensor<2xi64>> func.return %v : f32 } -// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0), domain: d0 in [0, 1], d1 in [0, 2], is_simplified: true> +// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0), domain: d0 in [0, 1], d1 in [0, 2], is_simplified: true"> // CHECK-LABEL: func.func @tensor_extract( // CHECK-SAME: %[[SRC:.*]]: tensor<6xf32>, @@ -67,7 +67,7 @@ func.func @atomic_rmw(%in: tensor<2x4xf32>, %i: index, %j: index) } return %ret : tensor<2x4xf32> } -// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 4 + d1), domain: d0 in [0, 1], d1 in [0, 3], is_simplified: true> +// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 4 + d1), domain: d0 in [0, 1], d1 in [0, 3], is_simplified: true"> // CHECK-LABEL: func.func @atomic_rmw( // CHECK-SAME: %[[TENSOR:.*]]: tensor<8xf32>, %[[I:.*]]: index, // CHECK-SAME: %[[J:.*]]: index) -> tensor<8xf32> { @@ -93,8 +93,8 @@ func.func @for_loop(%t0: tensor<32x1024xf32>, %t1: tensor<64x8x4xf32>) } {some_attr} return %for#0, %for#1, %c0_f32 : tensor<32x1024xf32>, tensor<64x8x4xf32>, f32 } -// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1024) -// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 * 32 + 5) +// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1024) +// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 32 + 5) // CHECK-LABEL: func.func @for_loop( // CHECK-SAME: %[[T0:.*]]: tensor<32768xf32>, // CHECK-SAME: %[[T1:.*]]: tensor<2048xf32>) -> (tensor<32768xf32>, tensor<2048xf32>, f32) { @@ -114,12 +114,9 @@ func.func @for_loop(%t0: tensor<32x1024xf32>, %t1: tensor<64x8x4xf32>) // ----- -#map = #xla_gpu.indexing_map<(d0, d1) -> ((d1 * 128 + d0) floordiv 36), - domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true> -#map1 = #xla_gpu.indexing_map<(d0, d1) -> (((d1 * 128 + d0) floordiv 9) mod 4), - domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true> -#map2 = #xla_gpu.indexing_map<(d0, d1) -> ((d1 * 128 + d0) mod 9), - domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0, d1) -> ((d1 * 128 + d0) floordiv 36), domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true"> +#map1 = #xla_gpu.indexing_map<"(d0, d1) -> (((d1 * 128 + d0) floordiv 9) mod 4), domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true"> +#map2 = #xla_gpu.indexing_map<"(d0, d1) -> ((d1 * 128 + d0) mod 9), domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true"> func.func @if_op(%arg0: tensor<4000x4x9xf32>, %arg1: tensor<1400x1xi32>, %arg2: tensor<1400x1x4x9xf32>, %arg3: tensor<4000x4x9xf32>) -> tensor<4000x4x9xf32> { @@ -225,7 +222,7 @@ func.func @vector_extract(%arg0: vector<2x3xf32>, %arg1: index) -> f32 { %v = vector.extract %arg0[%arg1, 2] : f32 from vector<2x3xf32> func.return %v : f32 } -// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 * 3 + 2), +// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 3 + 2), // CHECK-SAME: domain: d0 in [0, 1] // CHECK-LABEL: func.func @vector_extract( @@ -241,7 +238,7 @@ func.func @vector_insert(%arg0: vector<10x24xf32>, %i: index) %out = vector.insert %scalar, %arg0 [1, %i] : f32 into vector<10x24xf32> func.return %out : vector<10x24xf32> } -// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 + 24), +// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 24), // CHECK-SAME: domain: d0 in [0, 23] // CHECK-LABEL: func.func @vector_insert( // CHECK-SAME: %[[VECTOR:.*]]: vector<240xf32>, %[[I:.*]]: index) -> @@ -290,8 +287,8 @@ func.func @for_loop_vector(%t0: vector<32x1024xf32>, %t1: vector<64x8x4xf32>) return %for#0, %for#1, %c0_f32 : vector<32x1024xf32>, vector<64x8x4xf32>, f32 } -// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1024) -// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 * 32 + 5) +// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1024) +// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 32 + 5) // CHECK-LABEL: func.func @for_loop_vector( // CHECK-SAME: %[[V0:.*]]: vector<32768xf32>, // CHECK-SAME: %[[V1:.*]]: vector<2048xf32>) -> diff --git a/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir b/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir index 557335b6a7ff7..594c8e1deec7d 100644 --- a/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir +++ b/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir @@ -1,24 +1,24 @@ // RUN: mlir_fusions_opt -split-input-file %s -xla-gpu-fuse-loops \ // RUN: | FileCheck %s -#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (d1 floordiv 30, - ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, - (d1 mod 6) * 32 + d0 mod 32), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (0, - d0 mod 32, - d0 floordiv 32 + s0 * 4), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> +#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (d1 floordiv 30," +" ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32," +" (d1 mod 6) * 32 + d0 mod 32)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (0," +" d0 mod 32," +" d0 floordiv 32 + s0 * 4)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> func.func @fuse_loops(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32> %c0 = arith.constant 0 : index @@ -43,7 +43,7 @@ func.func @fuse_loops(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { } -// CHECK: #[[$FUSED_MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> +// CHECK: #[[$FUSED_MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> // CHECK-SAME: (d1 floordiv 30, ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, // CHECK-SAME: (d1 mod 6) * 32 + d0 mod 32, 0, d0 mod 32, d0 floordiv 32 + s0 * 4), // CHECK-SAME: domain: d0 in [0, 127], d1 in [0, 599], @@ -60,24 +60,24 @@ func.func @fuse_loops(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { // ----- -#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (d1 floordiv 30, - ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, - (d1 mod 6) * 32 + d0 mod 32), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (0, - d0 mod 32, - d0 floordiv 32 + s0 * 4), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> +#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (d1 floordiv 30," +" ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32," +" (d1 mod 6) * 32 + d0 mod 32)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (0," +" d0 mod 32," +" d0 floordiv 32 + s0 * 4)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> func.func @do_not_fuse_index_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32> %c0 = arith.constant 0 : index @@ -108,24 +108,24 @@ func.func @do_not_fuse_index_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1 // ----- -#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (d1 floordiv 30, - ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, - (d1 mod 6) * 32 + d0 mod 32), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (0, - d0 mod 32, - d0 floordiv 32 + s0 * 4), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> +#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (d1 floordiv 30," +" ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32," +" (d1 mod 6) * 32 + d0 mod 32)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (0," +" d0 mod 32," +" d0 floordiv 32 + s0 * 4)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> func.func @do_not_fuse_multiple_uses(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32> %c0 = arith.constant 0 : index @@ -158,24 +158,24 @@ func.func @do_not_fuse_multiple_uses(%arg0: tensor<20x160x170xf32>) -> tensor<1x // ----- -#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (d1 floordiv 30, - ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, - (d1 mod 6) * 32 + d0 mod 32), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (0, - d0 mod 32, - d0 floordiv 32 + s0 * 4), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 5], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> +#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (d1 floordiv 30," +" ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32," +" (d1 mod 6) * 32 + d0 mod 32)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (0," +" d0 mod 32," +" d0 floordiv 32 + s0 * 4)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 5], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> func.func @do_not_fuse_map_domain_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32> %c0 = arith.constant 0 : index @@ -207,24 +207,24 @@ func.func @do_not_fuse_map_domain_mismatch(%arg0: tensor<20x160x170xf32>) -> ten // ----- -#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (d1 floordiv 30, - ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, - (d1 mod 6) * 32 + d0 mod 32), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> - (0, - d0 mod 32, - d0 floordiv 32 + s0 * 4), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], - (d1 mod 5) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> +#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (d1 floordiv 30," +" ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32," +" (d1 mod 6) * 32 + d0 mod 32)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->" +" (0," +" d0 mod 32," +" d0 floordiv 32 + s0 * 4)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0]," +" (d1 mod 5) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> func.func @do_not_fuse_map_constraint_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32> %c0 = arith.constant 0 : index @@ -256,24 +256,24 @@ func.func @do_not_fuse_map_constraint_mismatch(%arg0: tensor<20x160x170xf32>) -> // ----- -#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1, s2] -> - (d1 floordiv 30, - ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32, - (d1 mod 6) * 32 + d0 mod 32), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], s2 in [0, 1], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> -#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1, s2] -> - (0, - d0 mod 32, - d0 floordiv 32 + s0 * 4), - domain: - d0 in [0, 127], d1 in [0, 599], - s0 in [0, 7], s1 in [0, 0], s2 in [0, 1], - (d1 mod 6) * 32 + d0 mod 32 in [0, 169], - is_simplified: true> +#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1, s2] ->" +" (d1 floordiv 30," +" ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32," +" (d1 mod 6) * 32 + d0 mod 32)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0], s2 in [0, 1]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> +#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1, s2] ->" +" (0," +" d0 mod 32," +" d0 floordiv 32 + s0 * 4)," +" domain:" +" d0 in [0, 127], d1 in [0, 599]," +" s0 in [0, 7], s1 in [0, 0], s2 in [0, 1]," +" (d1 mod 6) * 32 + d0 mod 32 in [0, 169]," +" is_simplified: true"> func.func @do_not_fuse_unused_loop_iv(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> { %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32> %c0 = arith.constant 0 : index diff --git a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir index f02f7012b80cf..427e764d12b91 100644 --- a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir +++ b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir @@ -1,9 +1,9 @@ // RUN: mlir_fusions_opt %s -xla-gpu-lower-xla-gpu-loops-to-scf \ // RUN: --split-input-file | FileCheck %s -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + 1, s1 - 1), - domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + 1, s1 - 1)," + "domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90]," + "is_simplified: false"> func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) { %sum = xla_gpu.loop (%dim)[%i, %j] -> (%ra, %rb) @@ -15,9 +15,9 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32 func.return %sum : f32 } -// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + s1), -// CHECK-DAG: #[[$MAPA:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + 1), -// CHECK-DAG: #[[$MAPB:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s1 - 1), +// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + s1), +// CHECK-DAG: #[[$MAPA:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + 1), +// CHECK-DAG: #[[$MAPB:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s1 - 1), // CHECK-LABEL: func.func @loop_op( // CHECK-SAME: %[[IN:.*]]: tensor<1024x32xf32>, @@ -60,9 +60,9 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32 // ----- -#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + 1, s1 - 1), - domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + 1, s1 - 1)," + "domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90]," + "is_simplified: false"> func.func @loop_yields_value_from_above(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) { diff --git a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir index dd15bdaafc533..347ed9a943ef8 100644 --- a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir +++ b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir @@ -124,12 +124,8 @@ func.func @predicated_extract( func.func private @exp(%p0: tensor<32x64xf32>, %i: index, %j: index) -> f32 -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> func.func @materialize(%input: tensor<32x64xf32>, %i: index, %j: index) -> !xla_gpu.indexed_vector<32x2x2xf32, #map1> { @@ -137,8 +133,8 @@ func.func @materialize(%input: tensor<32x64xf32>, %i: index, %j: index) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x2x2xf32, #map1> func.return %0 : !xla_gpu.indexed_vector<32x2x2xf32, #map1> } -// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1) -// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0, s1) +// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1) +// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 * 2 + s0, s1) // CHECK: @materialize(%[[INPUT:.*]]: tensor<32x64xf32>, %[[INDEX1:.*]]: index, %[[INDEX2:.*]]: index) @@ -153,12 +149,8 @@ func.func @materialize(%input: tensor<32x64xf32>, %i: index, %j: index) // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1) -> (d0 mod 16, d1), - domain: d0 in [0, 32], d1 in [0, 2], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1) -> (d0 mod 16, d1), domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false"> func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> { @@ -166,8 +158,8 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, : !xla_gpu.indexed_vector<32x64xf32, #map> -> tensor<32x64xf32> func.return %0 : tensor<32x64xf32> } -// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1) -// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 mod 16, d1) +// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1) +// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 mod 16, d1) // CHECK: @insert(%[[INPUT:.*]]: !xla_gpu.indexed_vector<32x64xf32, #[[$MAP]]>, // CHECK-SAME: %[[I:.*]]: index, %[[J:.*]]: index, @@ -179,7 +171,7 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, // CHECK: %[[SCALAR:.*]] = vector.extract %{{.*}}[%[[S0]], %[[S1]]] // CHECK-SAME: : f32 from vector<2x2xf32> -// CHECK: %[[MAP1_RESULT:.*]]:2 = xla_gpu.apply_indexing +// CHECK: %[[MAP1_RESULT:.*]]:2 = xla_gpu.apply_indexing // CHECK-SAME: #[[$MAP1]](%[[MAP_RESULT1]], %[[MAP_RESULT2]]) // CHECK: %[[NEW_TENSOR:.*]] = tensor.insert %[[SCALAR]] // CHECK-SAME: into %[[TENSOR]][%[[MAP1_RESULT]]#0, %[[MAP1_RESULT]]#1] @@ -189,15 +181,9 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>, func.func private @exp(%p0: tensor<32x64xf32>, %i: index, %j: index) -> f32 -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1], - is_simplified: false> -#map2 = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1), - domain: d0 in [0, 32], d1 in [0, 2], - is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1], is_simplified: false"> +#map2 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1), domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false"> func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> { @@ -213,12 +199,8 @@ func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index, func.func private @exp(%p0: tensor<32x64xcomplex>, %i: index, %j: index) -> complex -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 8], - s0 in [0, 2], s1 in [0, 3], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 2], - s0 in [0, 2], s1 in [0, 3], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 2], s1 in [0, 3], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 2], s1 in [0, 3], is_simplified: false"> func.func @materialize_complex( %input: tensor<32x64xcomplex>, %output: tensor<32x64xcomplex>, @@ -245,11 +227,8 @@ func.func @materialize_complex( // ----- -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1), - domain: d0 in [0, 32], d1 in [0, 2], - s0 in [0, 2], s1 in [0, 3], is_simplified: false> -#map2 = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1), - domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 2], s1 in [0, 3], is_simplified: false"> +#map2 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1), domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false"> func.func @insert_complex( %input: !xla_gpu.indexed_vector<32x3x4xcomplex, #map1>, %output: tensor<32x64xcomplex>, @@ -274,4 +253,4 @@ func.func @insert_complex( // CHECK: %[[IMAG:.*]] = vector.extract %[[VECTOR]][%[[C1]], %[[I]], %[[J]]] // CHECK: %[[COMPLEX:.*]] = complex.create %[[REAL]], %[[IMAG]] // CHECK: %[[INSERTED:.*]] = tensor.insert %[[COMPLEX]] into %[[ITER]] -// CHECK: xla_gpu.yield %[[INSERTED]] : tensor<32x64xcomplex> \ No newline at end of file +// CHECK: xla_gpu.yield %[[INSERTED]] : tensor<32x64xcomplex> diff --git a/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir b/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir index dd7d639e3273e..17f478b2838dd 100644 --- a/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir +++ b/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir @@ -1,11 +1,7 @@ // RUN: mlir_fusions_opt %s -split-input-file -xla-gpu-optimize-loops | FileCheck %s -#map = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 8), - domain: d0 in [0, 31], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0) -> (d0 mod 8), - domain: d0 in [0, 31], is_simplified: false> -#map2 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512), - domain: d0 in [0, 1], d1 in [0, 255], s0 in [0, 7], is_simplified: false> +#map = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 8), domain: d0 in [0, 31], is_simplified: false"> #map1 = #xla_gpu.indexing_map<"(d0) -> (d0 mod 8), domain: d0 in [0, 31], is_simplified: false"> +#map2 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512), domain: d0 in [0, 1], d1 in [0, 255], s0 in [0, 7], is_simplified: false"> module { func.func @fully_unroll(%arg0: tensor<4x8x4096xf32>, %arg1: tensor<4096xbf16>, %arg2: tensor<4x8xf32>, %arg3: tensor<4096xbf16>, @@ -127,7 +123,7 @@ module { } } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1), // CHECK-LABEL: @pipeline_extract // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C30:.*]] = arith.constant 30 : index @@ -154,7 +150,7 @@ module { %cst = arith.constant dense<[0.0, 0.0]> : vector<2xf32> %cst0 = arith.constant 0.0 : f32 %ret = scf.for %i = %c0 to %c17 step %c1 iter_args (%iter = %cst) -> (vector<2xf32>) { - %base = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 * 2), domain: d0 in [0, 15], is_simplified: false>(%i) + %base = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 * 2), domain: d0 in [0, 15], is_simplified: false">(%i) %val = vector.transfer_read %arg[%base], %cst0 : tensor<34xf32>, vector<2xf32> %log = math.log %val : vector<2xf32> %add = arith.addf %log, %iter : vector<2xf32> @@ -164,8 +160,8 @@ module { } } -// CHECK-DAG: #[[$MAP0:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 2), -// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1), +// CHECK-DAG: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 2), +// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1), // CHECK-LABEL: @pipeline_transfer // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index diff --git a/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir b/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir index 8959fbb826bdd..f965b069a772c 100644 --- a/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir +++ b/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir @@ -1,16 +1,9 @@ // RUN: mlir_fusions_opt -split-input-file %s -xla-gpu-peel-loops \ // RUN: | FileCheck %s -#map = #xla_gpu.indexing_map< - (d0)[s0, s1] -> (s0, s1), - domain: - d0 in [0, 3], - s0 in [0, 7], - s1 in [0, 10], - d0 + s0 in [0, 9], - d0 + s1 in [0, 12], - is_simplified: false -> +#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), domain:" + "d0 in [0, 3], s0 in [0, 7], s1 in [0, 10], d0 + s0 in [0, 9]," + "d0 + s1 in [0, 12], is_simplified: false"> func.func @peel_both_loops(%input: tensor<16x32xf32>, %init: f32, %dim: index) -> (f32) { %sum = xla_gpu.loop (%dim)[%i, %j] -> (%r0, %r1) @@ -21,9 +14,9 @@ func.func @peel_both_loops(%input: tensor<16x32xf32>, } func.return %sum : f32 } -// CHECK: #[[$PEELED_MAP:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, s1), domain: d0 in [0, 3], s0 in [0, 6], s1 in [0, 9], is_simplified: true> -// CHECK: #[[$TAIL_MAP0:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (7, s1), domain: d0 in [0, 2], s0 in [7, 7], s1 in [0, 9], is_simplified: true> -// CHECK: #[[$TAIL_MAP1:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, 10), domain: d0 in [0, 2], s0 in [0, 7], s1 in [10, 10], is_simplified: true> +// CHECK: #[[$PEELED_MAP:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), domain: d0 in [0, 3], s0 in [0, 6], s1 in [0, 9], is_simplified: true"> +// CHECK: #[[$TAIL_MAP0:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (7, s1), domain: d0 in [0, 2], s0 in [7, 7], s1 in [0, 9], is_simplified: true"> +// CHECK: #[[$TAIL_MAP1:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, 10), domain: d0 in [0, 2], s0 in [0, 7], s1 in [10, 10], is_simplified: true"> // CHECK-LABEL: func.func @peel_both_loops( // CHECK-SAME: %[[INPUT:.*]]: tensor<16x32xf32>, @@ -48,13 +41,8 @@ func.func @peel_both_loops(%input: tensor<16x32xf32>, // ----- -#map = #xla_gpu.indexing_map< - (d0)[s0] -> (s0), - domain: - d0 in [0, 3], - s0 in [0, 7], - is_simplified: false -> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (s0)," + "domain: d0 in [0, 3], s0 in [0, 7], is_simplified: false"> func.func @not_constrained_symbol(%input: tensor<16xf32>, %init: f32, %dim: index) -> (f32) { %sum = xla_gpu.loop (%dim)[%i] -> (%r0) @@ -72,12 +60,12 @@ func.func @not_constrained_symbol(%input: tensor<16xf32>, %init: f32, // ----- #map = #xla_gpu.indexing_map< - (d0)[s0] -> (s0), - domain: - d0 in [0, 3], - s0 in [0, 7], - s0 mod 5 in [0, 1], - is_simplified: false +" (d0)[s0] -> (s0)," +" domain:" +" d0 in [0, 3]," +" s0 in [0, 7]," +" s0 mod 5 in [0, 1]," +" is_simplified: false" > func.func @constraint_exists_after_peeling(%input: tensor<16xf32>, %init: f32, %dim: index) -> (f32) { @@ -91,4 +79,4 @@ func.func @constraint_exists_after_peeling(%input: tensor<16xf32>, %init: f32, } // CHECK-LABEL: func.func @constraint_exists_after_peeling // CHECK: xla_gpu.loop -// CHECK-NOT: xla_gpu.loop \ No newline at end of file +// CHECK-NOT: xla_gpu.loop diff --git a/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir b/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir index 94c6cddd4a8a4..5f8b9ba5413d8 100644 --- a/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir +++ b/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir @@ -19,7 +19,7 @@ func.func @row_reduction(%arg0: tensor<128x1027xf32>) return %0 : tensor<128xf32> } -// CHECK: #[[$PAD_AND_RESHAPE:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3) -> (d0, d1 * 128 + d2 * 32 + d3), +// CHECK: #[[$PAD_AND_RESHAPE:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3) -> (d0, d1 * 128 + d2 * 32 + d3), // CHECK-SAME: domain: d0 in [0, 127], d1 in [0, 8], d2 in [0, 3], d3 in [0, 31], d1 * 128 + d2 * 32 + d3 in [0, 1026] // CHECK-LABEL: @row_reduction // CHECK-SAME: %[[IN:.*]]: tensor<128x1027xf32> @@ -77,9 +77,9 @@ func.func @column(%arg0: tensor<2x32x32xf32>) return %0 : tensor<2x32xf32> } -// CHECK: #[[$RESHAPE:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3) +// CHECK: #[[$RESHAPE:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3) // CHECK-SAME: d1 * 4 + d2 in [0, 31] -// CHECK: #[[$TRANSPOSE:.*]] = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0, d2, d1) +// CHECK: #[[$TRANSPOSE:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0, d2, d1) // CHECK-LABEL: @column // CHECK-SAME: %[[IN:.*]]: tensor<2x32x32xf32> // CHECK: %[[C0:.*]] = arith.constant 0.00 diff --git a/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir b/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir index db78b88abd51e..bfddbd60e2bde 100644 --- a/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir +++ b/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir @@ -63,8 +63,9 @@ func.func @op_and_for_ranges(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.pt %1 = gpu.block_id x scf.for %i = %c0 to %c4 step %c1 { %2 = xla_gpu.apply_indexing - #xla_gpu.indexing_map<()[s0, s1, s2] -> (s0 * 512 + s1 * 4 + s2 + (s1 floordiv 128) + (s2 floordiv 4)), - domain: s0 in [0, 3071], s1 in [0, 127], s2 in [0, 3], is_simplified: false>[%1, %0, %i] + #xla_gpu.indexing_map< + "()[s0, s1, s2] -> (s0 * 512 + s1 * 4 + s2 + (s1 floordiv 128) + (s2 floordiv 4))," + "domain: s0 in [0, 3071], s1 in [0, 127], s2 in [0, 3], is_simplified: false">[%1, %0, %i] %3 = arith.index_castui %2 : index to i64 %4 = llvm.getelementptr %arg0[%3] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.load %4 invariant : !llvm.ptr -> f32 @@ -92,8 +93,9 @@ func.func @op_and_for_ranges(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.pt func.func @arg_ranges(%arg0: index, %arg1: index) -> index { %0 = xla_gpu.apply_indexing - #xla_gpu.indexing_map<()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100), - domain: s0 in [0, 42], s1 in [0, 1000], is_simplified: false>[%arg0, %arg1] + #xla_gpu.indexing_map< + "()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100)," + "domain: s0 in [0, 42], s1 in [0, 1000], is_simplified: false">[%arg0, %arg1] return %0 : index } @@ -106,8 +108,8 @@ func.func @arg_ranges(%arg0: index, %arg1: index) -> index { func.func @cant_lower(%arg0: index, %arg1: index) -> (index, index) { %0:2 = xla_gpu.apply_indexing - #xla_gpu.indexing_map<()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100, s0 + s1), - domain: s0 in [-10, 42], s1 in [0, 1000], is_simplified: false>[%arg0, %arg1] + #xla_gpu.indexing_map<"()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100, s0 + s1)," + "domain: s0 in [-10, 42], s1 in [0, 1000], is_simplified: false">[%arg0, %arg1] return %0#0, %0#1 : index, index } @@ -124,8 +126,9 @@ func.func @order_summands(%arg1: index) { scf.for %arg2 = %c0 to %c4 step %c1 { scf.for %arg3 = %c0 to %c4 step %c1 { %0 = xla_gpu.apply_indexing - #xla_gpu.indexing_map<()[s0, s1, s2] -> ((s0 + s1) floordiv 3 + s0 * 512 + s1 * 4 + s2 * 10), - domain: s0 in [0, 3], s1 in [0, 3], s2 in [0, 3], is_simplified: false>[%arg2, %arg1, %arg3] + #xla_gpu.indexing_map< + "()[s0, s1, s2] -> ((s0 + s1) floordiv 3 + s0 * 512 + s1 * 4 + s2 * 10)," + "domain: s0 in [0, 3], s1 in [0, 3], s2 in [0, 3], is_simplified: false">[%arg2, %arg1, %arg3] "dummy.op"(%0) : (index) -> () } } diff --git a/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir b/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir index aaeb665815dcc..9524c3d32cc6c 100644 --- a/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir +++ b/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir @@ -248,7 +248,8 @@ func.func @refine_constraints(%tensor: tensor<100xf32>) -> tensor<100xf32> { %c42_f32 = arith.constant 42.0 : f32 %loop = scf.for %i = %c0 to %c3 step %c1 iter_args(%in_ = %tensor) -> (tensor<100xf32>) { - %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 mod 4), domain: d0 in [0, 9], is_simplified: false>(%i) + %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 mod 4)," + "domain: d0 in [0, 9], is_simplified: false">(%i) %updated = tensor.insert %c42_f32 into %in_[%0] : tensor<100xf32> scf.yield %updated :tensor<100xf32> } @@ -262,10 +263,11 @@ func.func @refine_constraints(%tensor: tensor<100xf32>) -> tensor<100xf32> { // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (((d0 * 4 + d1 * 512 + s1) floordiv 9 + s0 * 32768) mod 2400000), - domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 73], s1 in [0, 3], is_simplified: false> -#map1 = #xla_gpu.indexing_map<(d0, d1)[s0] -> ((d0 * 4 + d1 * 512 + s0) mod 9), - domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 3], is_simplified: false> +#map = #xla_gpu.indexing_map< + "(d0, d1)[s0, s1] -> (((d0 * 4 + d1 * 512 + s1) floordiv 9 + s0 * 32768) mod 2400000)," + "domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 73], s1 in [0, 3], is_simplified: false"> +#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> ((d0 * 4 + d1 * 512 + s0) mod 9)," + "domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 3], is_simplified: false"> func.func @refine_constraints_for_symbol(%arg0: tensor<2400000x9xf32>, %arg1: tensor<2400000x9xf32>) -> tensor<2400000x9xf32> { %c0 = arith.constant 0 : index @@ -289,12 +291,23 @@ func.func @refine_constraints_for_symbol(%arg0: tensor<2400000x9xf32>, } return %0 : tensor<2400000x9xf32> } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3) -> (d2 * 32768 + (d0 * 4 + d1 * 512 + d3) floordiv 9), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3) -> (d2 * 32768 + (d0 * 4 + d1 * 512 + d3) floordiv 9), // CHECK-LABEL: func.func @refine_constraints_for_symbol // ----- -#map = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0] -> ((d0 * 4 + s0) floordiv 6, (d0 * 4 + s0) mod 6), domain: d0 in [0, 29], d1 in [0, 0], d2 in [0, 0], d3 in [0, 0], d4 in [0, 0], d5 in [0, 0], s0 in [0, 3], d0 * 4 + s0 in [0, 29], is_simplified: false> +#map = #xla_gpu.indexing_map< + "(d0, d1, d2, d3, d4, d5)[s0] -> ((d0 * 4 + s0) floordiv 6, (d0 * 4 + s0) mod 6)," + "domain:" + "d0 in [0, 29]," + "d1 in [0, 0]," + "d2 in [0, 0]," + "d3 in [0, 0]," + "d4 in [0, 0]," + "d5 in [0, 0]," + "s0 in [0, 3]," + "d0 * 4 + s0 in [0, 29]," + "is_simplified: false"> func.func @dus(%arg0: tensor<20x30xf32>, %arg1: tensor<5x6xf32>, %arg2: i32, %arg3: i32, %arg4: tensor<20x30xf32>) -> tensor<20x30xf32> { %c24 = arith.constant 24 : index %c15 = arith.constant 15 : index diff --git a/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir b/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir index c77d035e6271b..0c734ca19882e 100644 --- a/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir +++ b/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir @@ -1,8 +1,8 @@ // RUN: mlir_fusions_opt -allow-unregistered-dialect %s -split-input-file \ // RUN: -xla-gpu-vectorize-loads-stores -cse -canonicalize | FileCheck %s -#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @simple_read(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -20,7 +20,7 @@ func.func @simple_read(%arg0: tensor<128xf32>) -> (f32) { } return %outer : f32 } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 2), domain: d0 in [0, 63], is_simplified: true> +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 2), domain: d0 in [0, 63], is_simplified: true"> // CHECK-LABEL: @simple_read // CHECK-SAME: (%[[ARG0:.*]]: tensor // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index @@ -36,8 +36,8 @@ func.func @simple_read(%arg0: tensor<128xf32>) -> (f32) { // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0 + 1), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0 + 1)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @misaligned_indexing_map(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -60,8 +60,8 @@ func.func @misaligned_indexing_map(%arg0: tensor<128xf32>) -> (f32) { // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 3 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 3 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @misaligned_indexing_map_2(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -84,8 +84,8 @@ func.func @misaligned_indexing_map_2(%arg0: tensor<128xf32>) -> (f32) { // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> (3 * d0 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (3 * d0 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @misaligned_shape(%arg0: tensor<192xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -108,8 +108,8 @@ func.func @misaligned_shape(%arg0: tensor<192xf32>) -> (f32) { // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 + s0 * 2), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 + s0 * 2)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @wrong_stride(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -134,8 +134,8 @@ func.func @wrong_stride(%arg0: tensor<128xf32>) -> (f32) { // We could vectorize this as a float vector load of double the size, but we // don't currently. -#map = #xla_gpu.indexing_map<(d0)[s0] -> (2 * d0 + s0), - domain: d0 in [0, 127], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> (2 * d0 + s0)," + "domain: d0 in [0, 127], s0 in [0, 1], is_simplified: true"> func.func @simple_read_complex(%arg0: tensor<128xcomplex>, %i: index) -> (complex) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -250,10 +250,12 @@ func.func @write_not_yielded(%arg0: tensor<64xf32>) -> tensor<64xf32> { // ----- -#map = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512), - domain: d0 in [0, 7], d1 in [0, 255], s0 in [0, 7], is_simplified: true> -#map1 = #xla_gpu.indexing_map<(d0, d1, d2)[s0] -> (d0 * 32 + d2 * 2 + d1 + s0 * 512), - domain: d0 in [0, 7], d1 in [0, 1], d2 in [0, 255], s0 in [0, 7], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512)," + "domain: d0 in [0, 7], d1 in [0, 255], s0 in [0, 7], is_simplified: true"> +#map1 = #xla_gpu.indexing_map< + "(d0, d1, d2)[s0] -> (d0 * 32 + d2 * 2 + d1 + s0 * 512)," + "domain: d0 in [0, 7], d1 in [0, 1], d2 in [0, 255], s0 in [0, 7]," + "is_simplified: true"> func.func @multiple(%arg0: tensor<131072xf32>, %arg1: tensor<4096xbf16>, %arg2: tensor<32xf32>, %arg3: tensor<131072xf32>, %arg4: index) -> (tensor<131072xf32>, f32) { @@ -280,8 +282,8 @@ func.func @multiple(%arg0: tensor<131072xf32>, %arg1: tensor<4096xbf16>, } return %0#0, %0#1 : tensor<131072xf32>, f32 } -// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1 * 512), domain: d0 in [0, 255], d1 in [0, 7], is_simplified: true> -// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2 * 512), domain: d0 in [0, 7], d1 in [0, 255], d2 in [0, 7], is_simplified: true> +// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1 * 512), domain: d0 in [0, 255], d1 in [0, 7], is_simplified: true"> +// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2 * 512), domain: d0 in [0, 7], d1 in [0, 255], d2 in [0, 7], is_simplified: true"> // CHECK-LABEL: @multiple // CHECK-SAME: (%[[ARG0:.*]]: tensor{{.*}}, %[[ARG1:.*]]: tensor{{.*}}, %[[ARG2:.*]]: tensor{{.*}}, %[[ARG3:.*]]: tensor{{.*}}, %[[ARG4:.*]]: index) // CHECK: %[[C0:.*]] = arith.constant 0 : index @@ -304,8 +306,8 @@ func.func @multiple(%arg0: tensor<131072xf32>, %arg1: tensor<4096xbf16>, // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> ((d0 * 4) mod 64 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> ((d0 * 4) mod 64 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @remainder_with_modulo(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -323,7 +325,7 @@ func.func @remainder_with_modulo(%arg0: tensor<128xf32>) -> (f32) { } return %outer : f32 } -// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> ((d0 mod 16) * 4), +// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> ((d0 mod 16) * 4), // CHECK-LABEL: @remainder_with_modulo // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: scf.for %[[I:.*]] = %[[C0]] @@ -332,8 +334,8 @@ func.func @remainder_with_modulo(%arg0: tensor<128xf32>) -> (f32) { // ----- -#map = #xla_gpu.indexing_map<(d0)[s0] -> ((d0 * 4) mod 65 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map = #xla_gpu.indexing_map<"(d0)[s0] -> ((d0 * 4) mod 65 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> func.func @remainder_with_modulo_misaligned(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -356,10 +358,10 @@ func.func @remainder_with_modulo_misaligned(%arg0: tensor<128xf32>) -> (f32) { // ----- -#map0 = #xla_gpu.indexing_map<(d0) -> (d0 + 5), - domain: d0 in [0, 63], is_simplified: true> -#map1 = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map0 = #xla_gpu.indexing_map<"(d0) -> (d0 + 5)," + "domain: d0 in [0, 63], is_simplified: true"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> module { func.func @apply_indexing_sequence(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index @@ -381,8 +383,8 @@ module { } } -// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 2 + 10), -// CHECK-SAME: domain: d0 in [0, 63], is_simplified: true> +// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 2 + 10), +// CHECK-SAME: domain: d0 in [0, 63], is_simplified: true"> // CHECK-LABEL: @apply_indexing_sequence // CHECK: %[[BASE:.*]] = xla_gpu.apply_indexing #[[$MAP0]] // CHECK: vector.transfer_read {{.*}}[%[[BASE]]] @@ -390,10 +392,10 @@ module { // ----- -#map0 = #xla_gpu.indexing_map<(d0) -> (d0 + 5), - domain: d0 in [0, 63], is_simplified: true> -#map1 = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0), - domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true> +#map0 = #xla_gpu.indexing_map<"(d0) -> (d0 + 5)," + "domain: d0 in [0, 63], is_simplified: true"> +#map1 = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0)," + "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true"> module { func.func @apply_indexing_sequence_same_block(%arg0: tensor<128xf32>) -> (f32) { %c0 = arith.constant 0 : index @@ -418,4 +420,4 @@ module { } // CHECK-LABEL: @apply_indexing_sequence_same_block -// CHECK-NOT: vector.transfer_read \ No newline at end of file +// CHECK-NOT: vector.transfer_read diff --git a/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc b/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc index a2de97c39bfe0..f136f7190d1a6 100644 --- a/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc +++ b/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc @@ -213,7 +213,7 @@ ENTRY main { "num_warps":"1"}}}})"; TF_EXPECT_OK(CreateTritonIrAndFileCheck(this, kHloText, "triton_softmax_computation", R"( -CHECK: #indexing_map = #xla_gpu.indexing_map<(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true> +CHECK: #indexing_map = #xla_gpu.indexing_map<"(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true"> CHECK: tt.func @triton_fn(%[[P0:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}, %[[P1:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}) { CHECK-DAG: %[[ZERO:.*]] = arith.constant 0 : i32 CHECK-DAG: %[[C125:.*]] = arith.constant 125 : i64 @@ -278,7 +278,7 @@ ENTRY main { "num_warps":"1"}}}})"; TF_EXPECT_OK(CreateTritonIrAndFileCheck(this, kHloText, "triton_softmax_computation", R"( -CHECK: #indexing_map = #xla_gpu.indexing_map<(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true> +CHECK: #indexing_map = #xla_gpu.indexing_map<"(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true"> CHECK: tt.func @triton_fn( CHECK-SAME: %[[P0:[A-Za-z0-9_]*]]: !tt.ptr CHECK-SAME: %[[P1:[A-Za-z0-9_]*]]: !tt.ptr @@ -349,9 +349,9 @@ ENTRY main { TF_EXPECT_OK(CreateTritonIrAndFileCheck(this, kHloText, "triton_softmax_computation", R"( -CHECK: #[[MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 125), domain: d0 in [0, 1249], is_simplified: true> -CHECK: #[[MAP1:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 mod 125), domain: d0 in [0, 1249], is_simplified: true> -CHECK: #[[MAP2:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 127), domain: d0 in [0, 1249], is_simplified: true> +CHECK: #[[MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 125), domain: d0 in [0, 1249], is_simplified: true"> +CHECK: #[[MAP1:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 mod 125), domain: d0 in [0, 1249], is_simplified: true"> +CHECK: #[[MAP2:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 127), domain: d0 in [0, 1249], is_simplified: true"> CHECK: tt.func @triton_fn(%[[P0:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}, %[[P1:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}, %[[P2:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}, %[[P3:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}) { CHECK-DAG: %[[ZERO:.*]] = arith.constant 0 : i32 CHECK-DAG: %[[ZERO_64:.*]] = arith.constant 0 : i64 @@ -542,8 +542,8 @@ ENTRY main { TF_ASSERT_OK(CreateTritonIrAndFileCheck(this, kHloText, "triton_softmax_computation", R"( -// CHECK: #xla_gpu.indexing_map<(d0) -> (d0 floordiv 32), domain: d0 in [0, 2047], is_simplified: true> -// CHECK: #xla_gpu.indexing_map<(d0) -> (d0 mod 32), domain: d0 in [0, 2047], is_simplified: true> +// CHECK: #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 32), domain: d0 in [0, 2047], is_simplified: true"> +// CHECK: #xla_gpu.indexing_map<"(d0) -> (d0 mod 32), domain: d0 in [0, 2047], is_simplified: true"> // CHECK-LABEL: tt.func @triton_fn( // CHECK-SAME: %[[P0:[A-Za-z0-9_]*]]: !tt.ptr // CHECK-SAME: %[[P1:[A-Za-z0-9_]*]]: !tt.ptr