diff --git a/xla/service/gpu/fusions/ir/BUILD b/xla/service/gpu/fusions/ir/BUILD
index 91c6eeea6da66..5a3de755dd660 100644
--- a/xla/service/gpu/fusions/ir/BUILD
+++ b/xla/service/gpu/fusions/ir/BUILD
@@ -135,7 +135,7 @@ cc_library(
         ":xla_gpu_ops_inc_gen",
         ":xla_gpu_types_inc_gen",
         "//xla/service/gpu/model:indexing_analysis",
-        "@com_google_absl//absl/strings:str_format",
+        "//xla/service/gpu/model:indexing_map_serialization",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:ArithDialect",
         "@llvm-project//mlir:BytecodeOpInterface",
diff --git a/xla/service/gpu/fusions/ir/tests/attrs.mlir b/xla/service/gpu/fusions/ir/tests/attrs.mlir
index bc37a3ac56fc7..b990103ea2cfa 100644
--- a/xla/service/gpu/fusions/ir/tests/attrs.mlir
+++ b/xla/service/gpu/fusions/ir/tests/attrs.mlir
@@ -9,17 +9,17 @@
 // CHECK-SAME: s0 in [0, 32],
 // CHECK-SAME: d0 + s0 in [1, 10],
 // CHECK-SAME: d0 mod 2 in [0, 1],
-// CHECK-SAME: is_simplified: true
+// CHECK-SAME: is_simplified: true"
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<(d0, d1, d2)[s0] -> (d0),
-                            domain:
-                            d0 in [1, 2],
-                            d1 in [5, 8],
-                            d2 in [10, 12],
-                            s0 in [0, 32],
-                            d0 mod 2 in [0, 1],
-                            d0 + s0 in [1, 10],
-                            is_simplified: true
+#map = #xla_gpu.indexing_map<"(d0, d1, d2)[s0] -> (d0),"
+                             "domain:"
+                             "d0 in [1, 2],"
+                             "d1 in [5, 8],"
+                             "d2 in [10, 12],"
+                             "s0 in [0, 32],"
+                             "d0 mod 2 in [0, 1],"
+                             "d0 + s0 in [1, 10],"
+                             "is_simplified: true"
                             >
 
 func.func private @indexing_map_attr(!xla_gpu.indexed_vector<64x64x32xf64, #map>)
@@ -39,20 +39,21 @@ func.func private @indexing_map_attr(!xla_gpu.indexed_vector<64x64x32xf64, #map>
 // CHECK-SAME: d0 + s0 in [1, 10]
 // CHECK-SAME: d0 mod 2 in [0, 1]
 // CHECK-SAME: d1 + s1 + s2 in [1, 32]
-// CHECK-SAME: is_simplified: false
+// CHECK-SAME: is_simplified: false"
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1, s2] -> (d0 + s0, d1 + s1, d1 + s2),
-                            domain:
-                            d0 in [1, 2],
-                            d1 in [5, 8],
-                            s0 in [0, 10],
-                            s1 in [0, 5],
-                            s2 in [0, 32],
-                            d0 mod 2 in [0, 1],
-                            d0 + s0 in [1, 10],
-                            d1 + s1 + s2 in [1, 32],
-                            is_simplified: false
-                            >
+#map = #xla_gpu.indexing_map<
+  "(d0, d1)[s0, s1, s2] -> (d0 + s0, d1 + s1, d1 + s2),"
+  "domain:"
+  "d0 in [1, 2],"
+  "d1 in [5, 8],"
+  "s0 in [0, 10],"
+  "s1 in [0, 5],"
+  "s2 in [0, 32],"
+  "d0 mod 2 in [0, 1],"
+  "d0 + s0 in [1, 10],"
+  "d1 + s1 + s2 in [1, 32],"
+  "is_simplified: false"
+  >
 func.func private @more_range_vars(!xla_gpu.indexed_vector<100x32xf64, #map>)
 // CHECK-LABEL: @more_range_vars
 // CHECK: !xla_gpu.indexed_vector<100x32xf64, #[[$INDEX_MAP]]>
@@ -64,13 +65,13 @@ func.func private @more_range_vars(!xla_gpu.indexed_vector<100x32xf64, #map>)
 // CHECK-SAME: domain:
 // CHECK-SAME: d0 in [0, 100]
 // CHECK-SAME: s0 in [-3, -1]
-// CHECK-SAME: is_simplified: false
+// CHECK-SAME: is_simplified: false"
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0),
-                            domain:
-                            d0 in [0, 100],
-                            s0 in [-3, -1],
-                            is_simplified: false
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0),"
+                             "domain:"
+                             "d0 in [0, 100],"
+                             "s0 in [-3, -1],"
+                             "is_simplified: false"
                             >
 func.func private @indexing_map_small(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK-LABEL: @indexing_map_small
@@ -85,15 +86,15 @@ func.func private @indexing_map_small(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK-SAME: d1 in [5, 8]
 // CHECK-SAME: d2 in [10, 12]
 // CHECK-SAME: s0 in [0, 32]
-// CHECK-SAME: is_simplified: false
+// CHECK-SAME: is_simplified: false"
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<(d0, d1, d2)[s0] -> (d0),
-                            domain:
-                            d0 in [1, 2],
-                            d1 in [5, 8],
-                            d2 in [10, 12],
-                            s0 in [0, 32],
-                            is_simplified: false
+#map = #xla_gpu.indexing_map<"(d0, d1, d2)[s0] -> (d0),"
+                             "domain:"
+                             "d0 in [1, 2],"
+                             "d1 in [5, 8],"
+                             "d2 in [10, 12],"
+                             "s0 in [0, 32],"
+                             "is_simplified: false"
                             >
 func.func private @no_constraints(!xla_gpu.indexed_vector<32xf64, #map>)
 // CHECK-LABEL: @no_constraints
@@ -106,13 +107,13 @@ func.func private @no_constraints(!xla_gpu.indexed_vector<32xf64, #map>)
 // CHECK-SAME: domain:
 // CHECK-SAME: s0 in [3, 5]
 // CHECK-SAME: s0 mod 2 in [0, 1]
-// CHECK-SAME: is_simplified: false
+// CHECK-SAME: is_simplified: false"
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<()[s0] -> (s0),
-                            domain:
-                            s0 in [3, 5],
-                            s0 mod 2 in [0, 1],
-                            is_simplified: false
+#map = #xla_gpu.indexing_map<"()[s0] -> (s0),"
+                            "domain:"
+                            "s0 in [3, 5],"
+                            "s0 mod 2 in [0, 1],"
+                            "is_simplified: false"
                             >
 func.func private @no_dimensions(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK-LABEL: @no_dimensions
@@ -125,13 +126,13 @@ func.func private @no_dimensions(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK-SAME: domain:
 // CHECK-SAME: d0 in [3, 5]
 // CHECK-SAME: d0 mod 2 in [0, 1]
-// CHECK-SAME: is_simplified: false
+// CHECK-SAME: is_simplified: false"
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<(d0) -> (d0),
-                            domain:
-                            d0 in [3, 5],
-                            d0 mod 2 in [0, 1],
-                            is_simplified: false
+#map = #xla_gpu.indexing_map<"(d0) -> (d0),"
+                            "domain:"
+                            "d0 in [3, 5],"
+                            "d0 mod 2 in [0, 1],"
+                            "is_simplified: false"
                             >
 func.func private @no_symbols(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK-LABEL: @no_symbols
@@ -142,7 +143,7 @@ func.func private @no_symbols(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK: #[[$INDEX_MAP:.*]] = #xla_gpu.indexing_map<
 // CHECK-SAME: () -> ()
 // CHECK-SAME: >
-#map = #xla_gpu.indexing_map<() -> ()>
+#map = #xla_gpu.indexing_map<"() -> ()">
 func.func private @empty(!xla_gpu.indexed_vector<100xf64, #map>)
 // CHECK-LABEL: @empty
 // CHECK: !xla_gpu.indexed_vector<100xf64, #[[$INDEX_MAP]]>
@@ -151,7 +152,8 @@ func.func private @empty(!xla_gpu.indexed_vector<100xf64, #map>)
 
 func.func private @tensor_layout(
   %in0: tensor<42xf32, #xla_gpu.layout<"shmem",
-     (d0) -> (), domain: d0 in [0, 42], is_simplified: true>>)
-// CHECK:      #layout = #xla_gpu.layout<"shmem", (d0) -> (),
-// CHECK-SAME:   domain: d0 in [0, 42], is_simplified: true>
-// CHECK: tensor<42xf32, #layout>
\ No newline at end of file
+     "(d0) -> (),"
+     "domain: d0 in [0, 42], is_simplified: true">>)
+// CHECK:      #layout = #xla_gpu.layout<"shmem", "(d0) -> (),
+// CHECK-SAME:   domain: d0 in [0, 42], is_simplified: true">
+// CHECK: tensor<42xf32, #layout>
diff --git a/xla/service/gpu/fusions/ir/tests/canonicalize.mlir b/xla/service/gpu/fusions/ir/tests/canonicalize.mlir
index 495456a5ab36d..bfca90e5c64f5 100644
--- a/xla/service/gpu/fusions/ir/tests/canonicalize.mlir
+++ b/xla/service/gpu/fusions/ir/tests/canonicalize.mlir
@@ -1,15 +1,13 @@
 // RUN: mlir_fusions_opt %s --split-input-file -canonicalize | FileCheck %s
 
-#map0 = #xla_gpu.indexing_map<()[s0, s1] -> (1 + s0 + s1 mod 3 - s1, s0 mod 2),
-                                domain: s0 in [-10, 10], s1 in [0, 2],
-                                is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"()[s0, s1] -> (1 + s0 + s1 mod 3 - s1, s0 mod 2), domain: s0 in [-10, 10], s1 in [0, 2], is_simplified: false">
 func.func @simplify_apply_indexing(%s0: index, %s1: index) -> (index, index) {
   %0:2 = xla_gpu.apply_indexing #map0 [%s0, %s1]
   func.return %0#0, %0#1 : index, index
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1, d0 mod 2),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1, d0 mod 2),
 // CHECK-SAME:                     domain: d0 in [-10, 10]
-// CHECK-SAME:                     is_simplified: true>
+// CHECK-SAME:                     is_simplified: true">
 
 // CHECK-LABEL: func.func @simplify_apply_indexing
 // CHECK-SAME:      %[[ARG_0:.*]]: index, %[[ARG_1:.*]]: index)
@@ -17,14 +15,13 @@ func.func @simplify_apply_indexing(%s0: index, %s1: index) -> (index, index) {
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0, d1, d2)[s0, s1] -> (1 + s0 + s1 mod 4 - s1, s0 mod 2, d0 + d2),
-                               domain: d0 in [0, 1], d1 in [0, 2], d2 in [0, 3], s0 in [-11, 11], s1 in [0, 3], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0, d1, d2)[s0, s1] -> (1 + s0 + s1 mod 4 - s1, s0 mod 2, d0 + d2), domain: d0 in [0, 1], d1 in [0, 2], d2 in [0, 3], s0 in [-11, 11], s1 in [0, 3], is_simplified: false">
 func.func @simplify_apply_indexing_remove_dims(%d0: index, %d1: index,
     %d2: index, %s0: index, %s1: index) -> (index, index, index) {
   %0:3 = xla_gpu.apply_indexing #map0(%d0, %d1, %d2)[%s0, %s1]
   func.return %0#0, %0#1, %0#2 : index, index, index
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1, d2) -> (d2 + 1, d2 mod 2, d0 + d1),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d2 + 1, d2 mod 2, d0 + d1),
 // CHECK-SAME:                     domain: d0 in [0, 1], d1 in [0, 3], d2 in [-11, 11]
 
 // CHECK-LABEL: func.func @simplify_apply_indexing_remove_dims
@@ -38,23 +35,22 @@ func.func @simplify_apply_indexing_remove_dims(%d0: index, %d1: index,
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0) -> (d0 mod 10), domain: d0 in [0, 9], is_simplified: true>
+#map0 = #xla_gpu.indexing_map<"(d0) -> (d0 mod 10), domain: d0 in [0, 9], is_simplified: true">
 func.func @do_not_simplify_if_is_simplified_is_true(%d0: index) -> (index) {
   %0 = xla_gpu.apply_indexing #map0(%d0)
   func.return %0 : index
 }
-// CHECK: #xla_gpu.indexing_map<(d0) -> (d0 mod 10)
+// CHECK: #xla_gpu.indexing_map<"(d0) -> (d0 mod 10)
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d0 + s0, 4, d1, 1, s0),
-                                domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0 + s0, 4, d1, 1, s0), domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false">
 func.func @fold_indexing_map_results(%d0: index, %d1: index, %s0: index)
     -> (index, index, index, index, index) {
   %0:5 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0]
   func.return %0#0, %0#1, %0#2, %0#3, %0#4  : index, index, index, index, index
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
 
 // CHECK-LABEL: func.func @fold_indexing_map_results
 // CHECK-SAME:  %[[ARG_0:.*]]: index, %[[ARG_1:.*]]: index, %[[ARG_2:.*]]: index)
@@ -67,13 +63,13 @@ func.func @fold_indexing_map_results(%d0: index, %d1: index, %s0: index)
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d0 + s0, s0 + 4, d1 mod 2, 1 + d1, s0),
-                                domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0 + s0, s0 + 4, d1 mod 2, 1 + d1, s0),"
+  "domain: d0 in [-10, 10], d1 in [0, 2], s0 in [-1, 1], is_simplified: false">
 func.func @remove_unused_results(%d0: index, %d1: index, %s0: index) -> (index) {
   %0:5 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0]
   func.return %0#2 : index
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 mod 2),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 mod 2),
 // CHECK-SAME:                     domain: d0 in [0, 2]
 
 // CHECK-LABEL: func.func @remove_unused_results
@@ -84,8 +80,9 @@ func.func @remove_unused_results(%d0: index, %d1: index, %s0: index) -> (index)
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + d1 + s0 + s1 mod 3),
-                                domain: d0 in [0, 10], d1 in [0, 5], s0 in [-10, 10], s1 in [0, 4], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + d1 + s0 + s1 mod 3),"
+  "domain: d0 in [0, 10], d1 in [0, 5], s0 in [-10, 10], s1 in [0, 4],"
+  "is_simplified: false">
 func.func @fold_operands(%d0: index) -> index {
   %d1 = arith.constant 1 : index
   %s0 = arith.constant 2 : index
@@ -93,7 +90,7 @@ func.func @fold_operands(%d0: index) -> index {
   %0 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0, %s1]
   func.return %0 : index
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 3),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 3),
 // CHECK-SAME:                     domain: d0 in [0, 10]
 
 // CHECK-LABEL: func.func @fold_operands
@@ -104,8 +101,8 @@ func.func @fold_operands(%d0: index) -> index {
 
 func.func @fold_operands_and_results(%arg0: index, %arg1: index)
   -> (index, index) {
-  %0:2 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (0, d1),
-    domain: d0 in [0, 4], d1 in [0, 5], is_simplified: false>(%arg0, %arg1)
+  %0:2 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (0, d1),"
+    "domain: d0 in [0, 4], d1 in [0, 5], is_simplified: false">(%arg0, %arg1)
   return %0#0, %0#1 : index, index
 }
 
@@ -117,14 +114,15 @@ func.func @fold_operands_and_results(%arg0: index, %arg1: index)
 // -----
 
 func.func @fold_sequence(%arg0: index, %arg1: index) -> index {
-  %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false>(%arg0, %arg1)
-  %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 mod 100 + 42),
-    domain: d0 in [0, 10000], is_simplified: false>(%0)
+  %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<
+    "(d0, d1) -> (d0 + d1), domain: d0 in [0, 5], d1 in [0, 4],"
+    "is_simplified: false">(%arg0, %arg1)
+  %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 mod 100 + 42),"
+    "domain: d0 in [0, 10000], is_simplified: false">(%0)
   func.return %1 : index
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 + 42),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 + 42),
 // CHECK-SAME:                     domain: d0 in [0, 5], d1 in [0, 4]
 // CHECK-LABEL: func.func @fold_sequence
 // CHECK-SAME:      %[[ARG0:.*]]: index, %[[ARG1:.*]]: index)
@@ -134,14 +132,15 @@ func.func @fold_sequence(%arg0: index, %arg1: index) -> index {
 // -----
 
 func.func @fold_sequence_sym(%arg0: index, %arg1: index) -> index {
-  %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false>(%arg0, %arg1)
-  %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<()[s0] -> (s0 mod 100 + 42),
-    domain: s0 in [0, 10000], is_simplified: false>(%0)
+  %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), "
+    "domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false">(%arg0, %arg1)
+  %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<
+    "()[s0] -> (s0 mod 100 + 42), domain: s0 in [0, 10000],"
+    "is_simplified: false">(%0)
   func.return %1 : index
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 + 42),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 + 42),
 // CHECK-SAME:                     domain: d0 in [0, 5], d1 in [0, 4]
 // CHECK-LABEL: func.func @fold_sequence_sym
 // CHECK-SAME:      %[[ARG0:.*]]: index, %[[ARG1:.*]]: index)
@@ -150,12 +149,11 @@ func.func @fold_sequence_sym(%arg0: index, %arg1: index) -> index {
 
 // -----
 
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0 + 8512),
-  domain: d0 in [0, 1], d1 in [0, 607], is_simplified: false>
-#indexing_map2 = #xla_gpu.indexing_map<
-  (d0, d1, d2) -> (((d1 floordiv 32 + 1) mod 3) * 64
-                  + (d1 mod 32) * 2 + (d0 floordiv 192) * 192 + d2),
-  domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false>
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0 + 8512),"
+  "domain: d0 in [0, 1], d1 in [0, 607], is_simplified: false">
+#indexing_map2 = #xla_gpu.indexing_map<"(d0, d1, d2) -> ("
+  "((d1 floordiv 32 + 1) mod 3) * 64 + (d1 mod 32) * 2 + (d0 floordiv 192) * 192 + d2),"
+  "domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false">
 
 func.func @fold_sequence_no_simplification_needed(%i: index) -> index {
   %thread_id_x = gpu.thread_id  x {xla.range = [0 : index, 607 : index]}
@@ -168,12 +166,12 @@ func.func @fold_sequence_no_simplification_needed(%i: index) -> index {
 
 // -----
 
-#indexing_map1 = #xla_gpu.indexing_map<(d0) -> (3 * d0),
-  domain: d0 in [0, 9407], is_simplified: false>
-#indexing_map2 = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 floordiv 32 + 1),
-  domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false>
-#indexing_map3 = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 floordiv 32 + 2),
-  domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false>
+#indexing_map1 = #xla_gpu.indexing_map<
+  "(d0) -> (3 * d0), domain: d0 in [0, 9407], is_simplified: false">
+#indexing_map2 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 floordiv 32 + 1),"
+  "domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false">
+#indexing_map3 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 floordiv 32 + 2),"
+  "domain: d0 in [0, 9407], d1 in [0, 607], d2 in [0, 1], is_simplified: false">
 
 func.func @no_fold_when_producer_has_two_users(%i: index) -> (index, index) {
   %thread_id_x = gpu.thread_id  x {xla.range = [0 : index, 607 : index]}
@@ -187,14 +185,14 @@ func.func @no_fold_when_producer_has_two_users(%i: index) -> (index, index) {
 // -----
 
 func.func @fold_sequence_shared_operands(%arg0: index, %arg1: index) -> index {
-  %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false>(%arg0, %arg1)
-  %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    domain: d0 in [0, 4], d1 in [0, 10000], is_simplified: false>(%arg1, %0)
+  %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),"
+    "domain: d0 in [0, 5], d1 in [0, 4], is_simplified: false">(%arg0, %arg1)
+  %1 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),"
+    "domain: d0 in [0, 4], d1 in [0, 10000], is_simplified: false">(%arg1, %0)
   func.return %1 : index
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1),
 // CHECK-SAME:                     domain: d0 in [0, 4], d1 in [0, 5]
 // CHECK-LABEL: func.func @fold_sequence_shared_operands
 // CHECK-SAME:      %[[ARG0:.*]]: index, %[[ARG1:.*]]: index)
@@ -235,15 +233,15 @@ func.func @atomic_rmw_cst(%in: tensor<2x3xf32>, %i: index, %j: index)
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0)[s0] -> (2 * d0 * s0),
-                              domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0)[s0] -> (2 * d0 * s0),"
+  "domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false">
 func.func @apply_indexing_move_syms_to_dims(%dim0: index, %sym0: index)
     -> index {
   %0 = xla_gpu.apply_indexing #map0(%dim0)[%sym0]
   func.return %0 : index
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> ((d0 * d1) * 2),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> ((d0 * d1) * 2),
 // CHECK-SAME:                     domain: d0 in [0, 3], d1 in [0, 2]
 // CHECK-LABEL: func.func @apply_indexing_move_syms_to_dims
 // CHECK-NEXT:  xla_gpu.apply_indexing #[[$MAP]]
@@ -251,8 +249,10 @@ func.func @apply_indexing_move_syms_to_dims(%dim0: index, %sym0: index)
 
 // // -----
 
-#map0 = #xla_gpu.indexing_map<(d0) -> (4 * d0), domain: d0 in [0, 3], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0) -> (4 * d0), domain: d0 in [0, 3],"
+  "is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1),"
+  "domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @loop_of_apply_indexing(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) {
   %idx = xla_gpu.apply_indexing #map0(%dim)
   %sum = xla_gpu.loop (%idx)[%i, %j] -> (%r0, %r1) in #map1 iter_args(%sum_ = %init) -> (f32) {
@@ -263,7 +263,7 @@ func.func @loop_of_apply_indexing(%input: tensor<1024x32xf32>, %init: f32, %dim:
   func.return %sum : f32
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 * 4 + s0, s1),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 * 4 + s0, s1),
 // CHECK-SAME:                     domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32]
 // CHECK-LABEL: func.func @loop_of_apply_indexing
 // CHECK-SAME:      %[[ARG0:.*]]: tensor<1024x32xf32>, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: index)
@@ -272,8 +272,10 @@ func.func @loop_of_apply_indexing(%input: tensor<1024x32xf32>, %init: f32, %dim:
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0)[s0] -> (2 * d0 * s0), domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0 + s1), domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map0 = #xla_gpu.indexing_map<"(d0)[s0] -> (2 * d0 * s0),"
+  "domain: d0 in [0, 3], s0 in [0, 2], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0 + s1),"
+  "domain: d0 in [0, 12], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @loop_of_apply_indexing_with_syms(%dim0: index, %sym0: index, %input: tensor<1024x32xf32>, %init: f32) -> (f32) {
   %0 = xla_gpu.apply_indexing #map0(%dim0)[%sym0]
   %sum = xla_gpu.loop (%0)[%i, %j] -> (%r0) in #map1 iter_args(%sum_ = %init) -> (f32) {
@@ -284,7 +286,7 @@ func.func @loop_of_apply_indexing_with_syms(%dim0: index, %sym0: index, %input:
   func.return %sum : f32
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> ((d0 * d1) * 2 + s0 + s1),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> ((d0 * d1) * 2 + s0 + s1),
 // CHECK-SAME:                     domain: d0 in [0, 3], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32]
 // CHECK-LABEL: func.func @loop_of_apply_indexing_with_syms
 // CHECK-SAME:      %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
diff --git a/xla/service/gpu/fusions/ir/tests/invalid.mlir b/xla/service/gpu/fusions/ir/tests/invalid.mlir
index 922b3f3bbfff0..3c50b5afcd806 100644
--- a/xla/service/gpu/fusions/ir/tests/invalid.mlir
+++ b/xla/service/gpu/fusions/ir/tests/invalid.mlir
@@ -1,13 +1,6 @@
 // RUN: mlir_fusions_opt  %s -split-input-file -verify-diagnostics
 
-#map0 = #xla_gpu.indexing_map<
- (d0, d1)[s0] -> (d0, d1 + s0),
- domain:
- d0 in [1, 2],
- d1 in [5, 8],
- s0 in [0, 32],
- is_simplified: false
->
+#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0, d1 + s0), domain: d0 in [1, 2], d1 in [5, 8], s0 in [0, 32], is_simplified: false">
 func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index) {
   // expected-error @+1 {{operand count must match the number of dimensions and symbols in the affine map}}
   %0:2 = xla_gpu.apply_indexing #map0 (%d0)
@@ -16,16 +9,7 @@ func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index)
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<
- (d0, d1)[s0] -> (d0, d1 + s0),
- domain:
- d0 in [1, 2],
- d1 in [5, 8],
- s0 in [0, 32],
- d0 mod 2 in [0, 1],
- d0 + s0 in [1, 10],
- is_simplified: false
->
+#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0, d1 + s0), domain: d0 in [1, 2], d1 in [5, 8], s0 in [0, 32], d0 mod 2 in [0, 1], d0 + s0 in [1, 10], is_simplified: false">
 func.func @cannot_have_constraints(%d0: index, %d1: index, %s0: index) -> (index, index) {
   // expected-error @+1 {{apply indexing op cannot have any constraints}}
   %0:2 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0]
@@ -34,7 +18,7 @@ func.func @cannot_have_constraints(%d0: index, %d1: index, %s0: index) -> (index
 
 // -----
 
-#map = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @loop_result_num_mismatch(%input: tensor<1024x32xf32>,
     %init: f32) -> (f32) {
   // expected-error @+1 {{mismatch in number of loop-carried values and results}}
@@ -52,7 +36,7 @@ func.func @loop_result_num_mismatch(%input: tensor<1024x32xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<()[s0] -> (s0, s0), domain: s0 in [0, 1024], is_simplified: false>
+#map = #xla_gpu.indexing_map<"()[s0] -> (s0, s0), domain: s0 in [0, 1024], is_simplified: false">
 func.func @loop_iv_num_mismatch(%input: tensor<1024x32xf32>,
     %init: f32) -> (f32) {
   // expected-error @+1 {{mismatch in number of induction variables 2 and RangeVars}}
@@ -70,8 +54,7 @@ func.func @loop_iv_num_mismatch(%input: tensor<1024x32xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1),
-  domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 
 func.func @loop_types_mismatch(%input: tensor<1024x32xf32>, %init: f32) -> (i32) {
   // expected-error @+1 {{block iter arg type = 'f32', result type = 'i32' and init operand type = 'f32' should match}}
@@ -89,8 +72,7 @@ func.func @loop_types_mismatch(%input: tensor<1024x32xf32>, %init: f32) -> (i32)
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, s1),
-  domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 
 func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) {
   // expected-error @+1 {{mismatch in number of dims operands 0 and DimVars in the indexing map}}
@@ -105,9 +87,7 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 
 func.func @indicies_mismatch(%input: tensor<32x64xf32>, %thread_id: index,
     %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map> {
@@ -119,10 +99,8 @@ func.func @indicies_mismatch(%input: tensor<32x64xf32>, %thread_id: index,
 
 // -----
 
-#map = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1),
-  domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 
 func.func @no_thread_id_in(%input: tensor<32x64xf32>,
     %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
@@ -134,10 +112,8 @@ func.func @no_thread_id_in(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<()[s0, s1] -> (s0, s1),
-  domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"()[s0, s1] -> (s0, s1), domain: s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 
 func.func @no_thread_id_out(%input: tensor<32x64xf32>, %thread_id: index,
     %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
@@ -149,10 +125,8 @@ func.func @no_thread_id_out(%input: tensor<32x64xf32>, %thread_id: index,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 64], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 64], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @thread_id_bounds_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
   // expected-error @+1 {{thread_id dimension must have the same bounds in both indexing maps}}
   %0 = xla_gpu.materialize @exp(%input) at #map(%thread_id) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1>
@@ -161,11 +135,8 @@ func.func @thread_id_bounds_mismatch(%input: tensor<32x64xf32>, %thread_id: inde
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], d0 + s0 in [0, 1024],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], d0 + s0 in [0, 1024], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 
 func.func @thread_id_constraints_mismatch(%input: tensor<32x64xf32>,
     %thread_id: index, %output: tensor<32x64xf32>)
@@ -178,10 +149,8 @@ func.func @thread_id_constraints_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 + s0, s0),
-  domain: d0 in [0, 32], s0 in [0, 1024], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 + s0, s0), domain: d0 in [0, 32], s0 in [0, 1024], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @symbol_count_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
   // expected-error @+1 {{number of symbols in both indexing_maps must match}}
   %0 = xla_gpu.materialize @exp(%input) at #map(%thread_id) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1>
@@ -190,10 +159,8 @@ func.func @symbol_count_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @symbol_domain_mismatch(%input: tensor<32x64xf32>, %thread_id: index, %output: tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
   // expected-error @+1 {{domain of symbols of indexing_maps must match}}
   %0 = xla_gpu.materialize @exp(%input) at #map(%thread_id) : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x64xf32, #map1>
@@ -202,12 +169,8 @@ func.func @symbol_domain_mismatch(%input: tensor<32x64xf32>, %thread_id: index,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], is_simplified: false">
 func.func @symbol_constraints_mismatch(%input: tensor<32x64xf32>,
     %thread_id: index, %output: tensor<32x64xf32>)
     -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
@@ -219,12 +182,8 @@ func.func @symbol_constraints_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 mod 2 in [0, 0],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 mod 2 in [0, 0], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], is_simplified: false">
 
 func.func @symbol_constraint_mismatch(%input: tensor<32x64xf32>,
   %thread_id: index, %output: tensor<32x64xf32>)
@@ -236,12 +195,8 @@ func.func @symbol_constraint_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024],
-    is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32],
-    is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 1024], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], s0 + s1 in [0, 32], is_simplified: false">
 
 func.func @symbol_constraint_interval_mismatch(%input: tensor<32x64xf32>,
     %thread_id: index, %output: tensor<32x64xf32>)
@@ -254,12 +209,8 @@ func.func @symbol_constraint_interval_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1),
-  domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], is_simplified: false">
 func.func @vector_mapping_depends_on_block_id(%input: tensor<32x64xf32>,
     %thread_id: index, %output: tensor<32x64xf32>)
     -> !xla_gpu.indexed_vector<32x64xf32, #map1> {
@@ -271,13 +222,8 @@ func.func @vector_mapping_depends_on_block_id(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64],
-          d1 mod 2 in [0, 0],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 2 in [0, 0], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false">
 
 func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>,
     %thread_id: index, %block_id: index, %output: tensor<32x64xf32>)
@@ -290,13 +236,8 @@ func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64],
-          d1 mod 2 in [0, 0],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], s0 in [0, 1024], s1 in [0, 64], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 2 in [0, 0], is_simplified: false">
 
 func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>,
     %thread_id: index, %block_id: index, %output: tensor<32x64xf32>)
@@ -309,14 +250,8 @@ func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64],
-    d1 mod 2 in [0, 0],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64],
-    d1 mod 4 in [0, 0],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 2 in [0, 0], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, s1), domain: d0 in [0, 32], d1 in [0, 64], s0 in [0, 1024], s1 in [0, 64], d1 mod 4 in [0, 0], is_simplified: false">
 
 func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>,
     %thread_id: index, %block_id: index, %output: tensor<32x64xf32>)
@@ -329,12 +264,8 @@ func.func @block_id_constraints_mismatch(%input: tensor<32x64xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d0 mod 16 + s0, d1),
-  domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0 mod 16 + s0, d1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], is_simplified: false">
 
 func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
     %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> {
@@ -346,12 +277,8 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 mod 16, d1, d2),
-  domain: d0 in [0, 32], d1 in [0, 2], d2 in [0, 5],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 mod 16, d1, d2), domain: d0 in [0, 32], d1 in [0, 2], d2 in [0, 5], is_simplified: false">
 
 func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
     %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> {
diff --git a/xla/service/gpu/fusions/ir/tests/ops.mlir b/xla/service/gpu/fusions/ir/tests/ops.mlir
index 572202bf148ce..81e08968db759 100644
--- a/xla/service/gpu/fusions/ir/tests/ops.mlir
+++ b/xla/service/gpu/fusions/ir/tests/ops.mlir
@@ -56,19 +56,13 @@ func.func @caller(%a: f32, %b: f32) -> f32 {
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<
-(d0, d1)[s0] -> (d0, d1 + s0),
- domain:
- d0 in [1, 2],
- d1 in [5, 8],
- s0 in [0, 32],
- is_simplified: false
->
+#map0 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d0, d1 + s0),"
+  "domain: d0 in [1, 2], d1 in [5, 8], s0 in [0, 32], is_simplified: false">
 func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index) {
   %0:2 = xla_gpu.apply_indexing #map0 (%d0, %d1)[%s0]
   func.return %0#0, %0#1 : index, index
 }
-// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<
+// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"
 // CHECK-SAME:              (d0, d1)[s0] -> (d0, d1 + s0)
 // CHECK-SAME:              domain:
 // CHECK-SAME:              d0 in [1, 2]
@@ -83,18 +77,13 @@ func.func @apply_indexing(%d0: index, %d1: index, %s0: index) -> (index, index)
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<
-(d0, d1) -> (d0, d1),
- domain:
- d0 in [0, 2],
- d1 in [1, 3],
- is_simplified: false
->
+#map0 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1),"
+  "domain: d0 in [0, 2], d1 in [1, 3], is_simplified: false">
 func.func @apply_indexing_no_symbols(%d0: index, %d1: index) -> (index, index) {
   %0:2 = xla_gpu.apply_indexing #map0 (%d0, %d1)
   func.return %0#0, %0#1 : index, index
 }
-// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<
+// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"
 // CHECK-SAME:              (d0, d1) -> (d0, d1)
 // CHECK-SAME:              domain:
 // CHECK-SAME:              d0 in [0, 2]
@@ -108,17 +97,13 @@ func.func @apply_indexing_no_symbols(%d0: index, %d1: index) -> (index, index) {
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<
- ()[s0] -> (s0, s0),
- domain:
- s0 in [2, 4],
- is_simplified: false
->
+#map0 = #xla_gpu.indexing_map<"()[s0] -> (s0, s0),"
+  "domain: s0 in [2, 4], is_simplified: false">
 func.func @apply_indexing_no_dims(%s0: index) -> (index, index) {
   %0:2 = xla_gpu.apply_indexing #map0 [%s0]
   func.return %0#0, %0#1 : index, index
 }
-// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<
+// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"
 // CHECK-SAME:              ()[s0] -> (s0, s0)
 // CHECK-SAME:              domain:
 // CHECK-SAME:              s0 in [2, 4]
@@ -130,8 +115,8 @@ func.func @apply_indexing_no_dims(%s0: index) -> (index, index) {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, s1),
-         domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), "
+  "domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
 func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32,
                    %dim: index) -> (f32) {
   %sum = xla_gpu.loop (%dim)[%i, %j] -> (%r0, %r1)
@@ -155,15 +140,12 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32,
 
 func.func private @exp(%p0: tensor<32x64xf32>, %i: index, %j: index) -> f32
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1),
-  domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32],
-  is_simplified: false>
-#map2 = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1),
-  domain: d0 in [0, 32], d1 in [0, 2],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1),"
+  "domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (s0, s1),"
+  "domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32], is_simplified: false">
+#map2 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1),"
+  "domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false">
 
 func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index,
     %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> {
@@ -174,11 +156,11 @@ func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index,
   func.return %1 : tensor<32x64xf32>
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1)
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 + s0, d1 + s1)
 // CHECK-SAME: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32]
-// CHECK: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (s0, s1)
+// CHECK: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (s0, s1)
 // CHECK-SAME: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1024], s1 in [0, 32]
-// CHECK: #[[$MAP2:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1)
+// CHECK: #[[$MAP2:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1)
 // CHECK-SAME: d0 in [0, 32], d1 in [0, 2],
 // CHECK-LABEL: @materialize_and_insert
 // CHECK: %[[MATERIALIZED:.*]] = xla_gpu.materialize @exp(%{{.*}}) at
@@ -233,13 +215,14 @@ func.func @reduce_middle_dim(%in: tensor<16x8x4xf32>, %init: f32)
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1), domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1),"
+  "domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false">
 func.func @reindex(%in0: tensor<1024xf32>) -> tensor<16x64xf32> {
   %0 = xla_gpu.reindex %in0 at #map : tensor<1024xf32> -> tensor<16x64xf32>
   func.return %0 : tensor<16x64xf32>
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1)
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1)
 // CHECK-LABEL: func.func @reindex(
 // CHECK-SAME:    %[[IN1:.*]]: tensor<1024xf32>
 // CHECK:         xla_gpu.reindex %[[IN1]] at #[[$MAP]] :
@@ -247,7 +230,8 @@ func.func @reindex(%in0: tensor<1024xf32>) -> tensor<16x64xf32> {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1), domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1),"
+  "domain: d0 in [0, 15], d1 in [0, 63], is_simplified: false">
 func.func @reindex_pad(%in0: tensor<1022xf32>) -> tensor<16x64xf32> {
   %c0 = arith.constant 0.0 : f32
   %0 = xla_gpu.reindex %in0 at #map default %c0
@@ -255,7 +239,7 @@ func.func @reindex_pad(%in0: tensor<1022xf32>) -> tensor<16x64xf32> {
   func.return %0 : tensor<16x64xf32>
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 64 + d1)
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 64 + d1)
 // CHECK-LABEL: func.func @reindex_pad(
 // CHECK-SAME:    %[[IN1:.*]]: tensor<1022xf32>
 // CHECK:         %[[C0:.*]] = arith.constant 0.00
@@ -278,4 +262,4 @@ func.func @shuffler(%a: f32, %b: i32) -> (f32, i32) {
 
 // CHECK:        xla_gpu.shuffle_reduce(%[[IN1]], %[[IN2]]) to 4
 // CHECK-SAME:    combiner=@do_nothing {xla.range = [0 : index, 42 : index]}
-// CHECK-SAME:    : f32, i32
\ No newline at end of file
+// CHECK-SAME:    : f32, i32
diff --git a/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc b/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc
index cb9ba368702c9..577ec1262970c 100644
--- a/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc
+++ b/xla/service/gpu/fusions/ir/xla_gpu_attrs.cc
@@ -14,10 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include <cstdint>
+#include <optional>
 #include <string>
 #include <utility>
 
-#include "absl/strings/str_format.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TypeSwitch.h"  // IWYU pragma: keep
 #include "llvm/Support/LogicalResult.h"
@@ -30,6 +30,7 @@ limitations under the License.
 #include "mlir/Support/LLVM.h"
 #include "xla/service/gpu/fusions/ir/xla_gpu_ops.h"
 #include "xla/service/gpu/model/indexing_map.h"
+#include "xla/service/gpu/model/indexing_map_serialization.h"
 
 namespace xla {
 namespace gpu {
@@ -43,144 +44,36 @@ using mlir::AsmPrinter;
 using mlir::failure;
 using mlir::success;
 
-constexpr llvm::StringRef kIsSimplifiedKeyword = "is_simplified";
-
-ParseResult ParseInterval(AsmParser& parser, Interval& interval) {
-  // ParseResult converts to `true` if parsing failed.
-  return failure(parser.parseLSquare() || parser.parseInteger(interval.lower) ||
-                 parser.parseComma() || parser.parseInteger(interval.upper) ||
-                 parser.parseRSquare());
-}
-
-ParseResult parseBool(AsmParser& parser, bool* result) {
-  if (succeeded(parser.parseOptionalKeyword("true"))) {
-    *result = true;
-    return success();
+// Parses a chain of string attributes into an indexing map.
+// Example:
+// "()[s0, s1] -> (1 + s0 + s1 mod 3 - s1, s0 mod 2),"
+//   " domain: s0 in [-10, 10], s1 in [0, 2],"
+//   " is_simplified: false"
+// will be parsed as 3 StringAttrs, concatenated into a single string, and then
+// parsed into an IndexingMap.
+std::optional<IndexingMap> parseChainOfStringsAsIndexingMap(
+    mlir::AsmParser& parser) {
+  mlir::StringAttr indexing_map_attr;
+  std::string indexing_map_str;
+  while (parser.parseOptionalAttribute(indexing_map_attr).has_value()) {
+    indexing_map_str.append(indexing_map_attr.getValue());
   }
-  if (succeeded(parser.parseOptionalKeyword("false"))) {
-    *result = false;
-    return success();
-  }
-  return failure();
-}
-
-void PrintDimVars(AsmPrinter& p, ArrayRef<DimVar> dim_vars) {
-  for (const auto [index, dim_var] : llvm::enumerate(dim_vars)) {
-    p << "d" << index << " in " << dim_var.bounds << ", ";
-  }
-}
-
-ParseResult ParseDimVars(AsmParser& parser, ArrayRef<std::string> dim_names,
-                         SmallVector<DimVar>& dim_vars) {
-  dim_vars.reserve(dim_names.size());
-  for (const auto& [index, dim_name] : llvm::enumerate(dim_names)) {
-    if (parser.parseKeyword(dim_name) || parser.parseKeyword("in") ||
-        ParseInterval(parser, dim_vars.emplace_back().bounds) ||
-        parser.parseComma()) {
-      return failure();
-    }
-  }
-  return success();
-}
-
-void PrintRangeVars(AsmPrinter& p, ArrayRef<RangeVar> range_vars) {
-  for (const auto [index, range_var] : llvm::enumerate(range_vars)) {
-    p << "s" << index << " in " << range_var.range << ", ";
-  }
-}
-
-ParseResult ParseRangeVars(AsmParser& parser,
-                           ArrayRef<std::string> range_symbol_names,
-                           SmallVector<RangeVar>& range_vars) {
-  range_vars.reserve(range_symbol_names.size());
-  for (const auto& [index, range_symbol_name] :
-       llvm::enumerate(range_symbol_names)) {
-    if (parser.parseKeyword(range_symbol_name) || parser.parseKeyword("in") ||
-        ParseInterval(parser, range_vars.emplace_back().range) ||
-        parser.parseComma()) {
-      return failure();
-    }
-  }
-  return success();
-}
-
-void PrintConstraints(AsmPrinter& p,
-                      ArrayRef<std::pair<AffineExpr, Interval>> constraints) {
-  for (const auto& [expr, interval] : constraints) {
-    p << expr << " in " << interval << ", ";
-  }
-}
-
-mlir::Attribute parseIndexingMapImpl(mlir::AsmParser& parser) {
-  mlir::AffineMap map;
-  if (parser.parseAffineMap(map)) {
-    return {};
-  }
-
-  // Store real strings to back up StringRef throughout ParseConstraints.
-  SmallVector<std::string> dim_strings(map.getNumDims());
-  SmallVector<std::string> symbol_strings(map.getNumSymbols());
-  SmallVector<std::pair<llvm::StringRef, AffineExpr>> symbolSet;
-  symbolSet.reserve(map.getNumDims() + map.getNumSymbols());
-  for (int i = 0; i < map.getNumDims(); ++i) {
-    dim_strings[i] = absl::StrFormat("d%d", i);
-    symbolSet.push_back(
-        {dim_strings[i], mlir::getAffineDimExpr(i, parser.getContext())});
-  }
-  for (int i = 0; i < map.getNumSymbols(); ++i) {
-    symbol_strings[i] = absl::StrFormat("s%d", i);
-    symbolSet.push_back(
-        {symbol_strings[i], mlir::getAffineSymbolExpr(i, parser.getContext())});
-  }
-  if (map.getNumDims() + map.getNumSymbols() == 0) {
-    if (parser.parseGreater()) return {};
-    return IndexingMapAttr::get(parser.getContext(), map, /*dim_vars=*/{},
-                                /*range_vars=*/{},
-                                /*constraints=*/{}, /*is_simplified=*/true);
-  }
-  if (parser.parseComma() || parser.parseKeyword("domain") ||
-      parser.parseColon()) {
-    return {};
-  }
-
-  SmallVector<DimVar> dim_vars;
-  if (ParseDimVars(parser, dim_strings, dim_vars)) {
-    return {};
-  }
-  SmallVector<RangeVar> range_vars;
-  if (ParseRangeVars(parser, symbol_strings, range_vars)) {
-    return {};
-  }
-
-  SmallVector<std::pair<AffineExpr, Interval>> constraints;
-  while (failed(parser.parseOptionalKeyword(kIsSimplifiedKeyword))) {
-    auto& constraint = constraints.emplace_back();
-    if (parser.parseAffineExpr(symbolSet, constraint.first) ||
-        parser.parseKeyword("in") || ParseInterval(parser, constraint.second) ||
-        parser.parseComma()) {
-      return {};
-    }
-    constraints.push_back(constraint);
-  }
-
-  bool is_simplified = false;
-  if (parser.parseColon() || parseBool(parser, &is_simplified) ||
-      parser.parseGreater()) {
-    return {};
-  }
-  return IndexingMapAttr::get(parser.getContext(), map, dim_vars, range_vars,
-                              constraints, is_simplified);
+  return ParseIndexingMap(indexing_map_str, parser.getContext());
 }
 
 mlir::Attribute IndexingMapAttr::parse(mlir::AsmParser& parser, mlir::Type) {
   if (parser.parseLess()) {
     return {};
   }
-  return parseIndexingMapImpl(parser);
+  auto indexing_map = parseChainOfStringsAsIndexingMap(parser);
+  if (!indexing_map.has_value() || parser.parseGreater()) {
+    return {};
+  }
+  return IndexingMapAttr::get(parser.getContext(), *indexing_map);
 }
 
 void IndexingMapAttr::print(mlir::AsmPrinter& printer) const {
-  printer << "<" << getIndexingMap().ToString() << ">";
+  printer << "<\"" << getIndexingMap().ToString() << "\">";
 }
 
 IndexingMapAttr IndexingMapAttr::get(mlir::MLIRContext* context,
@@ -230,18 +123,19 @@ mlir::Attribute LayoutAttr::parse(mlir::AsmParser& parser, mlir::Type) {
   if (!memspace.has_value()) {
     return {};
   }
-  auto thread_map = mlir::cast<IndexingMapAttr>(parseIndexingMapImpl(parser));
-  if (!thread_map) {
+  std::optional<IndexingMap> indexing_map =
+      parseChainOfStringsAsIndexingMap(parser);
+  if (!indexing_map.has_value() || parser.parseGreater()) {
     return {};
   }
-  mlir::MLIRContext* context = parser.getContext();
-  auto memory_space_attr = MemorySpaceAttr::get(context, *memspace);
-  return LayoutAttr::get(context, memory_space_attr, thread_map);
+  auto* context = parser.getContext();
+  return LayoutAttr::get(context, MemorySpaceAttr::get(context, *memspace),
+                         IndexingMapAttr::get(context, *indexing_map));
 }
 
 void LayoutAttr::print(mlir::AsmPrinter& printer) const {
   printer << "<\"" << stringifyMemorySpace(getMemorySpace().getValue())
-          << "\", " << getThreadMap().getIndexingMap().ToString() << '>';
+          << "\", \"" << getThreadMap().getIndexingMap().ToString() << "\">";
 }
 
 }  // namespace gpu
diff --git a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc
index e683e199ed03c..5c87db0045dac 100644
--- a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc
+++ b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir_test.cc
@@ -234,10 +234,10 @@ TEST_F(ElementalHloToMlirTest, ReduceWindow) {
     // CHECK:      %[[INIT:.*]] = tensor.extract %[[ARG1]][]
     // CHECK:      %[[RET:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[C7]]
     // CHECK-SAME:   step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]])
-    // CHECK:      %[[J0:.*]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 * 4), domain: d0 in [0, 2], is_simplified: true>(%[[Y]])
+    // CHECK:      %[[J0:.*]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 * 4), domain: d0 in [0, 2], is_simplified: true">(%[[Y]])
     // CHECK:      %[[J1:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 - 3),
-    // CHECK-SAME:              d0 in [0, 7], d1 in [0, 6], is_simplified: true>(%[[Z]], %[[I]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 - 3),
+    // CHECK-SAME:              d0 in [0, 7], d1 in [0, 6], is_simplified: true">(%[[Z]], %[[I]])
     // CHECK:          %[[VAL:.*]] = tensor.extract %[[ARG0]]
     // CHECK-SAME:        [%[[X]], %[[J0]], %[[J1]]]
     // CHECK:          %[[UPD:.*]] = func.call @add_sum(%[[ACC]],
@@ -284,8 +284,8 @@ TEST_F(ElementalHloToMlirTest, ReduceWindowWithRescaling) {
     // If symbol rescaling wasn't working we would have a
     // `d1 floordiv <base_dilation>` in the map:
     // CHECK:      %[[K:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:   #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1),
-    // CHECK-SAME:   d0 in [0, 18], d1 in [0, 3], is_simplified: true>(%[[X]], %[[I]])
+    // CHECK-SAME:   #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1),
+    // CHECK-SAME:   d0 in [0, 18], d1 in [0, 3], is_simplified: true">(%[[X]], %[[I]])
 
     // CHECK:      tensor.extract %[[ARG0]][%[[K]], %[[Y]], %[[Z]]]
   )"));
@@ -505,7 +505,7 @@ TEST_F(ElementalHloToMlirTest, Pad) {
     // CHECK-DAG:    %[[C4:.*]] = arith.constant 4
     // CHECK-DAG:    %[[C7:.*]] = arith.constant 7
     // CHECK:        %[[CONSTRAINT_VAL:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     <(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]])
+    // CHECK-SAME:     <"(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]])
     // CHECK:        %[[CONSTRAINT:.*]] = arith.cmpi eq, %[[CONSTRAINT_VAL]], %[[C0]]
     // CHECK-DAG:        %[[X_L:.*]] = arith.cmpi sge, %[[X]], %[[C1]]
     // CHECK-DAG:        %[[X_H:.*]] = arith.cmpi sle, %[[X]], %[[C7]]
@@ -517,9 +517,9 @@ TEST_F(ElementalHloToMlirTest, Pad) {
     // CHECK:        %[[FROM_INPUT:.*]] = arith.andi %[[X_AND_CONSTRAINT]], %[[Y_BOUNDS]]
     // CHECK:        %[[RET:.*]] = scf.if %[[FROM_INPUT]]
     // CHECK:          %[[IN0:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:         <(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]])
+    // CHECK-SAME:         <"(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]])
     // CHECK:          %[[IN1:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:         <(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true>(%[[Y]])
+    // CHECK-SAME:         <"(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true">(%[[Y]])
     // CHECK:          %[[VAL:.*]] = tensor.extract %[[ARG0]][%[[IN0]], %[[IN1]]]
     // CHECK:          scf.yield %[[VAL]]
     // CHECK:        } else {
@@ -547,7 +547,7 @@ TEST_F(ElementalHloToMlirTest, PadUnsigned) {
     // CHECK-DAG:    %[[C4:.*]] = arith.constant 4
     // CHECK-DAG:    %[[C7:.*]] = arith.constant 7
     // CHECK:        %[[CONSTRAINT_VAL:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     <(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]])
+    // CHECK-SAME:     <"(d0) -> ((d0 - 1) mod 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]])
     // CHECK:        %[[CONSTRAINT:.*]] = arith.cmpi eq, %[[CONSTRAINT_VAL]], %[[C0]]
     // CHECK-DAG:        %[[X_L:.*]] = arith.cmpi sge, %[[X]], %[[C1]]
     // CHECK-DAG:        %[[X_H:.*]] = arith.cmpi sle, %[[X]], %[[C7]]
@@ -559,9 +559,9 @@ TEST_F(ElementalHloToMlirTest, PadUnsigned) {
     // CHECK:        %[[FROM_INPUT:.*]] = arith.andi %[[X_AND_CONSTRAINT]], %[[Y_BOUNDS]]
     // CHECK:        %[[RET:.*]] = scf.if %[[FROM_INPUT]]
     // CHECK:          %[[IN0:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:         <(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true>(%[[X]])
+    // CHECK-SAME:         <"(d0) -> ((d0 - 1) floordiv 2), domain: d0 in [1, 7], is_simplified: true">(%[[X]])
     // CHECK:          %[[IN1:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:         <(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true>(%[[Y]])
+    // CHECK-SAME:         <"(d0) -> (d0 - 4), domain: d0 in [4, 7], is_simplified: true">(%[[Y]])
     // CHECK:          %[[VAL:.*]] = tensor.extract %[[ARG0]][%[[IN0]], %[[IN1]]]
     // CHECK:          scf.yield %[[VAL]]
     // CHECK:        } else {
@@ -878,11 +878,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionSimple) {
     // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) {
     // CHECK:      %[[R3:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:      %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:   #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    // CHECK-SAME:   d0 in [0, 5], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:   #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
+    // CHECK-SAME:   d0 in [0, 5], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:      %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:   #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    // CHECK-SAME:   d0 in [0, 7], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:   #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
+    // CHECK-SAME:   d0 in [0, 7], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -924,11 +924,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithWindowStrides) {
     // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) {
     // CHECK:      %[[R3:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:        %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1),
-    // CHECK-SAME:     d0 in [0, 2], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1),
+    // CHECK-SAME:     d0 in [0, 2], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:        %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1),
-    // CHECK-SAME:     d0 in [0, 3], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1),
+    // CHECK-SAME:     d0 in [0, 3], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -971,21 +971,21 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithPadding) {
     // CHECK:      %[[R0:.+]] = scf.for %[[X:.+]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[A0:.+]] = %[[INIT]]) -> (f32) {
     // CHECK-NEXT: %[[R1:.+]] = scf.for %[[Y:.+]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[A1:.+]] = %[[A0]]) -> (f32) {
     // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) {
-    // CHECK-DAG:  %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), domain: d0 in [0, 7], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-DAG:  %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), domain: d0 in [0, 7], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK-DAG:  %[[TXGE:.+]] = arith.cmpi sge, %[[TESTX]], %[[C1]] : index
     // CHECK-DAG:  %[[TXLE:.+]] = arith.cmpi sle, %[[TESTX]], %[[C8]] : index
     // CHECK-DAG:  %[[TX:.+]] = arith.andi %[[TXGE]], %[[TXLE]] : i1
-    // CHECK-DAG:  %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1), domain: d0 in [0, 11], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-DAG:  %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1), domain: d0 in [0, 11], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:  %[[TYGE:.+]] = arith.cmpi sge, %[[TESTY]], %[[C2]] : index
     // CHECK-DAG:  %[[TYLE:.+]] = arith.cmpi sle, %[[TESTY]], %[[C13]] : index
     // CHECK-DAG:  %[[TY:.+]] = arith.andi %[[TYGE]], %[[TYLE]] : i1
     // CHECK:      %[[R3:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:        %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 - 1),
-    // CHECK-SAME:     d0 in [0, 7], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 - 1),
+    // CHECK-SAME:     d0 in [0, 7], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:        %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1 - 2),
-    // CHECK-SAME:     d0 in [0, 11], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1 - 2),
+    // CHECK-SAME:     d0 in [0, 11], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -1025,17 +1025,17 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithLhsDilation) {
     // CHECK:      %[[R0:.+]] = scf.for %[[X:.+]] = %[[C0]] to %[[C3]] step %[[C1]] iter_args(%[[A0:.+]] = %[[INIT]]) -> (f32) {
     // CHECK-NEXT: %[[R1:.+]] = scf.for %[[Y:.+]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[A1:.+]] = %[[A0]]) -> (f32) {
     // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) {
-    // CHECK-DAG:  %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 12], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-DAG:  %[[TESTX:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 12], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK-DAG:  %[[TX:.+]] = arith.cmpi eq, %[[TESTX]], %[[C0]] : index
-    // CHECK-DAG:  %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 18], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-DAG:  %[[TESTY:.+]] = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) mod 2), domain: d0 in [0, 18], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:  %[[TY:.+]] = arith.cmpi eq, %[[TESTY]], %[[C0]] : index
     // CHECK:      %[[R3:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:        %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) floordiv 2),
-    // CHECK-SAME:     d0 in [0, 12], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) floordiv 2),
+    // CHECK-SAME:     d0 in [0, 12], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:        %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> ((d0 + d1) floordiv 2),
-    // CHECK-SAME:     d0 in [0, 18], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> ((d0 + d1) floordiv 2),
+    // CHECK-SAME:     d0 in [0, 18], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -1077,11 +1077,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithRhsDilation) {
     // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C4]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) {
     // CHECK:      %[[R3:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:        %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0),
-    // CHECK-SAME:     d0 in [0, 3], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0),
+    // CHECK-SAME:     d0 in [0, 3], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:        %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0),
-    // CHECK-SAME:     d0 in [0, 3], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0),
+    // CHECK-SAME:     d0 in [0, 3], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -1123,14 +1123,14 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithFeatureGroupCount) {
     // CHECK-NEXT: %[[R2:.+]] = scf.for %[[I:.+]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A1]]) -> (f32) {
     // CHECK:      %[[R3:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:        %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    // CHECK-SAME:     d0 in [0, 5], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
+    // CHECK-SAME:     d0 in [0, 5], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:        %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    // CHECK-SAME:     d0 in [0, 7], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
+    // CHECK-SAME:     d0 in [0, 7], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK:        %[[XX2:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> ((d0 floordiv 8) * 2 + d1),
-    // CHECK-SAME:     d0 in [0, 15], d1 in [0, 1], is_simplified: true>(%[[O]], %[[I]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> ((d0 floordiv 8) * 2 + d1),
+    // CHECK-SAME:     d0 in [0, 15], d1 in [0, 1], is_simplified: true">(%[[O]], %[[I]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[B]], %[[XX0]], %[[XX1]], %[[XX2]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<2x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -1174,11 +1174,11 @@ TEST_F(ElementalHloToMlirTest, ConvolutionWithBatchGroupCount) {
     // CHECK-NEXT: %[[R3:.+]] = scf.for %[[G:.+]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ACC:.+]] = %[[A2]]) -> (f32) {
     // CHECK:      %[[R4:.+]] = scf.if {{.+}} -> (f32) {
     // CHECK:        %[[XX0:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    // CHECK-SAME:     d0 in [0, 5], d1 in [0, 2], is_simplified: true>(%[[W]], %[[X]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
+    // CHECK-SAME:     d0 in [0, 5], d1 in [0, 2], is_simplified: true">(%[[W]], %[[X]])
     // CHECK:        %[[XX1:.+]] = xla_gpu.apply_indexing
-    // CHECK-SAME:     #xla_gpu.indexing_map<(d0, d1) -> (d0 + d1),
-    // CHECK-SAME:     d0 in [0, 7], d1 in [0, 4], is_simplified: true>(%[[H]], %[[Y]])
+    // CHECK-SAME:     #xla_gpu.indexing_map<"(d0, d1) -> (d0 + d1),
+    // CHECK-SAME:     d0 in [0, 7], d1 in [0, 4], is_simplified: true">(%[[H]], %[[Y]])
     // CHECK-DAG:    %[[VL:.+]] = tensor.extract %[[LHS]][%[[G]], %[[XX0]], %[[XX1]], %[[I]]] : tensor<2x8x12x4xf32>
     // CHECK-DAG:    %[[VR:.+]] = tensor.extract %[[RHS]][%[[I]], %[[X]], %[[Y]], %[[O]]] : tensor<4x3x5x16xf32>
     // CHECK:        %[[MUL:.+]] = arith.mulf %[[VL]], %[[VR]] : f32
@@ -1644,8 +1644,8 @@ TEST_F(ElementalHloToMlirTest, MixedIndexingTuple) {
     // CHECK-SAME:     %[[X:.*]]: index {{{.*}}}, %[[Y:.*]]: index {{{.*}}}
     // CHECK:        %[[A:.*]] = tensor.extract %[[P0]][%[[X]], %[[Y]]]
     // CHECK:        %[[IDX:.*]] = xla_gpu.apply_indexing
-    // CHECK-SAME:       #xla_gpu.indexing_map<(d0, d1) -> (d0 * 10 + d1),
-    // CHECK-SAME:       d0 in [0, 9], d1 in [0, 9], is_simplified: true>(%[[X]], %[[Y]])
+    // CHECK-SAME:       #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 10 + d1),
+    // CHECK-SAME:       d0 in [0, 9], d1 in [0, 9], is_simplified: true">(%[[X]], %[[Y]])
     // CHECK:        %[[B:.*]] = tensor.extract %[[P1]][%[[IDX]]]
     // CHECK:        return %[[A]], %[[B]]
   )"));
@@ -1668,8 +1668,8 @@ TEST_F(ElementalHloToMlirTest, NestedTuple) {
     // CHECK-SAME:     %[[X:.*]]: index {{{.*}}}, %[[Y:.*]]: index {{{.*}}}
     // CHECK:          %[[P0_V:.*]] = xla_gpu.pure_call @main_p0
     // CHECK:          %[[IDX:.*]] =
-    // CHECK-SAME:       #xla_gpu.indexing_map<(d0, d1) -> (d0 * 10 + d1),
-    // CHECK-SAME:       d0 in [0, 9], d1 in [0, 9], is_simplified: true>(%[[X]], %[[Y]])
+    // CHECK-SAME:       #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 10 + d1),
+    // CHECK-SAME:       d0 in [0, 9], d1 in [0, 9], is_simplified: true">(%[[X]], %[[Y]])
     // CHECK:          %[[P1_V:.*]] = xla_gpu.pure_call @main_p1
     // CHECK-SAME:       (%[[P0]], %[[P1]], %[[IDX]])
     // CHECK:          return %[[P0_V]], %[[P1_V]], %[[P1_V]], %[[P1_V]], %[[P0_V]]
diff --git a/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo b/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo
index f99ff371ef38d..5ac91b201c616 100644
--- a/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo
+++ b/xla/service/gpu/fusions/tests/concatenate/concat_1d.hlo
@@ -8,10 +8,10 @@ fusion {
   param2 = f32[300] parameter(2)
   ROOT concat = f32[900] concatenate(param0, param1, param2), dimensions={0}
 }
-// CHECK-DAG: #[[MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d1 * 128 + d0)
-// CHECK-DAG: #[[LOOPMAP_1:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0)
-// CHECK-DAG: #[[LOOPMAP_2:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 200)
-// CHECK-DAG: #[[LOOPMAP_3:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 600)
+// CHECK-DAG: #[[MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 128 + d0)
+// CHECK-DAG: #[[LOOPMAP_1:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0)
+// CHECK-DAG: #[[LOOPMAP_2:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 200)
+// CHECK-DAG: #[[LOOPMAP_3:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3, d4, d5)[s0, s1] -> (d3 * 128 + d0 + 600)
 
 // CHECK:       func.func @main
 // CHECK-SAME:    %[[ARG_0:[a-zA-Z0-9]*]]: {{[^,]*}},
diff --git a/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo b/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo
index 3b5e454584137..4f93eacbfab93 100644
--- a/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo
+++ b/xla/service/gpu/fusions/tests/loop/tuple_heterogeneous.hlo
@@ -12,8 +12,8 @@ fusion {
   ROOT tuple = (f64[8], f64[2,4]) tuple(minimum, bc)
 }
 
-// CHECK: #[[MAJOR:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 4),
-// CHECK: #[[MINOR:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 mod 4),
+// CHECK: #[[MAJOR:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 4),
+// CHECK: #[[MINOR:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 mod 4),
 
 // CHECK:     xla_gpu.loop ({{.*}})[{{.*}}] -> (%[[RA:.*]]) in
 // CHECK-DAG:   %[[MAJOR_IDX:.*]] = xla_gpu.apply_indexing #[[MAJOR]]
diff --git a/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo b/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo
index a0663dd88308f..88043829ebc8f 100644
--- a/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo
+++ b/xla/service/gpu/fusions/tests/scatter/unique_indices.hlo
@@ -24,7 +24,7 @@ scatter {
     unique_indices=true,
     to_apply=add
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 2)
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 2)
 
 // CHECK-LABEL: func.func @main(
 // CHECK-SAME:    %[[OPERAND:[a-zA-Z0-9]*]]: tensor<10x5xf32>
@@ -60,4 +60,4 @@ scatter {
 // CHECK:        %[[COMBINED:.*]] = arith.addf %[[CURRENT]], %[[UPD_ELEM]]
 // CHECK:        %[[UPDATED:.*]] = tensor.insert %[[COMBINED]]
 // CHECK-SAME:     into %{{[a-z0-9]+}}[%{{.*}}, %[[RC]]] : tensor<10x5xf32>
-// CHECK:        xla_gpu.yield %[[UPDATED]] : tensor<10x5xf32>
\ No newline at end of file
+// CHECK:        xla_gpu.yield %[[UPDATED]] : tensor<10x5xf32>
diff --git a/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir b/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir
index 1691d3fd748c2..e88324f698d48 100644
--- a/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/flatten_tensors.mlir
@@ -8,7 +8,7 @@ func.func @tensor_extract(
       : tensor<2x3xf32, dense<[0, 1]> : tensor<2xi64>>
   func.return %v : f32
 }
-// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0, d1) -> (d1 * 2 + d0), domain: d0 in [0, 1], d1 in [0, 2], is_simplified: true>
+// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0, d1) -> (d1 * 2 + d0), domain: d0 in [0, 1], d1 in [0, 2], is_simplified: true">
 
 // CHECK-LABEL: func.func @tensor_extract(
 // CHECK-SAME:      %[[SRC:.*]]: tensor<6xf32>,
@@ -67,7 +67,7 @@ func.func @atomic_rmw(%in: tensor<2x4xf32>, %i: index, %j: index)
   }
   return %ret : tensor<2x4xf32>
 }
-// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 4 + d1), domain: d0 in [0, 1], d1 in [0, 3], is_simplified: true>
+// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 4 + d1), domain: d0 in [0, 1], d1 in [0, 3], is_simplified: true">
 // CHECK-LABEL: func.func @atomic_rmw(
 // CHECK-SAME:      %[[TENSOR:.*]]: tensor<8xf32>, %[[I:.*]]: index,
 // CHECK-SAME:      %[[J:.*]]: index) -> tensor<8xf32> {
@@ -93,8 +93,8 @@ func.func @for_loop(%t0: tensor<32x1024xf32>, %t1: tensor<64x8x4xf32>)
   } {some_attr}
     return %for#0, %for#1, %c0_f32 : tensor<32x1024xf32>, tensor<64x8x4xf32>, f32
 }
-// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1024)
-// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 * 32 + 5)
+// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1024)
+// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 32 + 5)
 // CHECK-LABEL: func.func @for_loop(
 // CHECK-SAME:      %[[T0:.*]]: tensor<32768xf32>,
 // CHECK-SAME:      %[[T1:.*]]: tensor<2048xf32>) -> (tensor<32768xf32>, tensor<2048xf32>, f32) {
@@ -114,12 +114,9 @@ func.func @for_loop(%t0: tensor<32x1024xf32>, %t1: tensor<64x8x4xf32>)
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1) -> ((d1 * 128 + d0) floordiv 36),
-  domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true>
-#map1 = #xla_gpu.indexing_map<(d0, d1) -> (((d1 * 128 + d0) floordiv 9) mod 4),
-  domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true>
-#map2 = #xla_gpu.indexing_map<(d0, d1) -> ((d1 * 128 + d0) mod 9),
-  domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0, d1) -> ((d1 * 128 + d0) floordiv 36), domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true">
+#map1 = #xla_gpu.indexing_map<"(d0, d1) -> (((d1 * 128 + d0) floordiv 9) mod 4), domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true">
+#map2 = #xla_gpu.indexing_map<"(d0, d1) -> ((d1 * 128 + d0) mod 9), domain: d0 in [0, 127], d1 in [0, 393749], is_simplified: true">
 func.func @if_op(%arg0: tensor<4000x4x9xf32>, %arg1: tensor<1400x1xi32>,
     %arg2: tensor<1400x1x4x9xf32>, %arg3: tensor<4000x4x9xf32>)
      -> tensor<4000x4x9xf32> {
@@ -225,7 +222,7 @@ func.func @vector_extract(%arg0: vector<2x3xf32>, %arg1: index) -> f32 {
   %v = vector.extract %arg0[%arg1, 2] : f32 from vector<2x3xf32>
   func.return %v : f32
 }
-// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 * 3 + 2),
+// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 3 + 2),
 // CHECK-SAME: domain: d0 in [0, 1]
 
 // CHECK-LABEL: func.func @vector_extract(
@@ -241,7 +238,7 @@ func.func @vector_insert(%arg0: vector<10x24xf32>, %i: index)
   %out = vector.insert %scalar, %arg0 [1, %i] : f32 into vector<10x24xf32>
   func.return %out : vector<10x24xf32>
 }
-// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 + 24),
+// CHECK: #[[$MAP:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 24),
 // CHECK-SAME: domain: d0 in [0, 23]
 // CHECK-LABEL: func.func @vector_insert(
 // CHECK-SAME:      %[[VECTOR:.*]]: vector<240xf32>, %[[I:.*]]: index) ->
@@ -290,8 +287,8 @@ func.func @for_loop_vector(%t0: vector<32x1024xf32>, %t1: vector<64x8x4xf32>)
     return %for#0, %for#1, %c0_f32 :
       vector<32x1024xf32>, vector<64x8x4xf32>, f32
 }
-// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1024)
-// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<(d0) -> (d0 * 32 + 5)
+// CHECK: #[[$MAP0:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1024)
+// CHECK: #[[$MAP1:.+]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 32 + 5)
 // CHECK-LABEL: func.func @for_loop_vector(
 // CHECK-SAME:      %[[V0:.*]]: vector<32768xf32>,
 // CHECK-SAME:      %[[V1:.*]]: vector<2048xf32>) ->
diff --git a/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir b/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir
index 557335b6a7ff7..594c8e1deec7d 100644
--- a/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/fuse_loops.mlir
@@ -1,24 +1,24 @@
 // RUN: mlir_fusions_opt -split-input-file %s -xla-gpu-fuse-loops \
 // RUN: | FileCheck %s
 
-#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (d1 floordiv 30,
-    ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
-    (d1 mod 6) * 32 + d0 mod 32),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (0,
-    d0 mod 32,
-    d0 floordiv 32 + s0 * 4),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
+#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (d1 floordiv 30,"
+"   ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,"
+"   (d1 mod 6) * 32 + d0 mod 32),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (0,"
+"   d0 mod 32,"
+"   d0 floordiv 32 + s0 * 4),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
 func.func @fuse_loops(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
   %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32>
   %c0 = arith.constant 0 : index
@@ -43,7 +43,7 @@ func.func @fuse_loops(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
 }
 
 
-// CHECK: #[[$FUSED_MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
+// CHECK: #[[$FUSED_MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->
 // CHECK-SAME: (d1 floordiv 30, ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
 // CHECK-SAME: (d1 mod 6) * 32 + d0 mod 32, 0, d0 mod 32, d0 floordiv 32 + s0 * 4),
 // CHECK-SAME: domain: d0 in [0, 127], d1 in [0, 599],
@@ -60,24 +60,24 @@ func.func @fuse_loops(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
 
 // -----
 
-#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (d1 floordiv 30,
-    ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
-    (d1 mod 6) * 32 + d0 mod 32),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (0,
-    d0 mod 32,
-    d0 floordiv 32 + s0 * 4),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
+#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (d1 floordiv 30,"
+"   ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,"
+"   (d1 mod 6) * 32 + d0 mod 32),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (0,"
+"   d0 mod 32,"
+"   d0 floordiv 32 + s0 * 4),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
 func.func @do_not_fuse_index_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
   %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32>
   %c0 = arith.constant 0 : index
@@ -108,24 +108,24 @@ func.func @do_not_fuse_index_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1
 
 // -----
 
-#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (d1 floordiv 30,
-    ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
-    (d1 mod 6) * 32 + d0 mod 32),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (0,
-    d0 mod 32,
-    d0 floordiv 32 + s0 * 4),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
+#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (d1 floordiv 30,"
+"   ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,"
+"   (d1 mod 6) * 32 + d0 mod 32),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (0,"
+"   d0 mod 32,"
+"   d0 floordiv 32 + s0 * 4),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
 func.func @do_not_fuse_multiple_uses(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
   %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32>
   %c0 = arith.constant 0 : index
@@ -158,24 +158,24 @@ func.func @do_not_fuse_multiple_uses(%arg0: tensor<20x160x170xf32>) -> tensor<1x
 
 // -----
 
-#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (d1 floordiv 30,
-    ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
-    (d1 mod 6) * 32 + d0 mod 32),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (0,
-    d0 mod 32,
-    d0 floordiv 32 + s0 * 4),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 5], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
+#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (d1 floordiv 30,"
+"   ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,"
+"   (d1 mod 6) * 32 + d0 mod 32),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (0,"
+"   d0 mod 32,"
+"   d0 floordiv 32 + s0 * 4),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 5], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
 func.func @do_not_fuse_map_domain_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
   %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32>
   %c0 = arith.constant 0 : index
@@ -207,24 +207,24 @@ func.func @do_not_fuse_map_domain_mismatch(%arg0: tensor<20x160x170xf32>) -> ten
 
 // -----
 
-#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (d1 floordiv 30,
-    ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
-    (d1 mod 6) * 32 + d0 mod 32),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] ->
-    (0,
-    d0 mod 32,
-    d0 floordiv 32 + s0 * 4),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0],
-    (d1 mod 5) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
+#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (d1 floordiv 30,"
+"   ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,"
+"   (d1 mod 6) * 32 + d0 mod 32),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] ->"
+"   (0,"
+"   d0 mod 32,"
+"   d0 floordiv 32 + s0 * 4),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0],"
+"   (d1 mod 5) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
 func.func @do_not_fuse_map_constraint_mismatch(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
   %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32>
   %c0 = arith.constant 0 : index
@@ -256,24 +256,24 @@ func.func @do_not_fuse_map_constraint_mismatch(%arg0: tensor<20x160x170xf32>) ->
 
 // -----
 
-#indexing_map = #xla_gpu.indexing_map<(d0, d1)[s0, s1, s2] ->
-    (d1 floordiv 30,
-    ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,
-    (d1 mod 6) * 32 + d0 mod 32),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0], s2 in [0, 1],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
-#indexing_map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1, s2] ->
-    (0,
-    d0 mod 32,
-    d0 floordiv 32 + s0 * 4),
-  domain:
-    d0 in [0, 127], d1 in [0, 599],
-    s0 in [0, 7], s1 in [0, 0], s2 in [0, 1],
-    (d1 mod 6) * 32 + d0 mod 32 in [0, 169],
-    is_simplified: true>
+#indexing_map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1, s2] ->"
+"   (d1 floordiv 30,"
+"   ((d1 floordiv 6) mod 5) * 32 + s0 * 4 + d0 floordiv 32,"
+"   (d1 mod 6) * 32 + d0 mod 32),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0], s2 in [0, 1],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
+#indexing_map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1, s2] ->"
+"   (0,"
+"   d0 mod 32,"
+"   d0 floordiv 32 + s0 * 4),"
+" domain:"
+"   d0 in [0, 127], d1 in [0, 599],"
+"   s0 in [0, 7], s1 in [0, 0], s2 in [0, 1],"
+"   (d1 mod 6) * 32 + d0 mod 32 in [0, 169],"
+"   is_simplified: true">
 func.func @do_not_fuse_unused_loop_iv(%arg0: tensor<20x160x170xf32>) -> tensor<1x32x33xf32> {
   %cst = arith.constant dense<0.000000e+00> : vector<8x1xf32>
   %c0 = arith.constant 0 : index
diff --git a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir
index f02f7012b80cf..427e764d12b91 100644
--- a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_loops_to_scf.mlir
@@ -1,9 +1,9 @@
 // RUN: mlir_fusions_opt %s -xla-gpu-lower-xla-gpu-loops-to-scf  \
 // RUN: --split-input-file | FileCheck %s
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + 1, s1 - 1),
-  domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + 1, s1 - 1),"
+  "domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90],"
+  "is_simplified: false">
 
 func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32) {
   %sum = xla_gpu.loop (%dim)[%i, %j] -> (%ra, %rb)
@@ -15,9 +15,9 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32
   func.return %sum : f32
 }
 
-// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + s1),
-// CHECK-DAG: #[[$MAPA:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + 1),
-// CHECK-DAG: #[[$MAPB:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s1 - 1),
+// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + s1),
+// CHECK-DAG: #[[$MAPA:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + 1),
+// CHECK-DAG: #[[$MAPB:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s1 - 1),
 
 // CHECK-LABEL: func.func @loop_op(
 // CHECK-SAME:    %[[IN:.*]]: tensor<1024x32xf32>,
@@ -60,9 +60,9 @@ func.func @loop_op(%input: tensor<1024x32xf32>, %init: f32, %dim: index) -> (f32
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0 + 1, s1 - 1),
-  domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0 + 1, s1 - 1),"
+  "domain: d0 in [0, 3], s0 in [0, 1024], s1 in [0, 32], s0 + s1 in [0, 90],"
+  "is_simplified: false">
 
 func.func @loop_yields_value_from_above(%input: tensor<1024x32xf32>, %init: f32,
     %dim: index) -> (f32) {
diff --git a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir
index dd15bdaafc533..347ed9a943ef8 100644
--- a/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/lower_xla_gpu_to_scf.mlir
@@ -124,12 +124,8 @@ func.func @predicated_extract(
 
 func.func private @exp(%p0: tensor<32x64xf32>, %i: index, %j: index) -> f32
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
 
 func.func @materialize(%input: tensor<32x64xf32>, %i: index, %j: index)
     -> !xla_gpu.indexed_vector<32x2x2xf32, #map1> {
@@ -137,8 +133,8 @@ func.func @materialize(%input: tensor<32x64xf32>, %i: index, %j: index)
     : (tensor<32x64xf32>) -> !xla_gpu.indexed_vector<32x2x2xf32, #map1>
   func.return %0 : !xla_gpu.indexed_vector<32x2x2xf32, #map1>
 }
-// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1)
-// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0, s1)
+// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1)
+// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0 * 2 + s0, s1)
 
 // CHECK: @materialize(%[[INPUT:.*]]: tensor<32x64xf32>, %[[INDEX1:.*]]: index, %[[INDEX2:.*]]: index)
 
@@ -153,12 +149,8 @@ func.func @materialize(%input: tensor<32x64xf32>, %i: index, %j: index)
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1) -> (d0 mod 16, d1),
-  domain: d0 in [0, 32], d1 in [0, 2],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1) -> (d0 mod 16, d1), domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false">
 
 func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
     %i: index, %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> {
@@ -166,8 +158,8 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
     : !xla_gpu.indexed_vector<32x64xf32, #map> -> tensor<32x64xf32>
   func.return %0 : tensor<32x64xf32>
 }
-// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1)
-// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 mod 16, d1)
+// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1 * 32 + d0 * 2 + s0, s1)
+// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 mod 16, d1)
 
 // CHECK:      @insert(%[[INPUT:.*]]: !xla_gpu.indexed_vector<32x64xf32, #[[$MAP]]>,
 // CHECK-SAME:   %[[I:.*]]: index, %[[J:.*]]: index,
@@ -179,7 +171,7 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
 
 // CHECK: %[[SCALAR:.*]] = vector.extract %{{.*}}[%[[S0]], %[[S1]]]
 // CHECK-SAME: : f32 from vector<2x2xf32>
-// CHECK: %[[MAP1_RESULT:.*]]:2 = xla_gpu.apply_indexing 
+// CHECK: %[[MAP1_RESULT:.*]]:2 = xla_gpu.apply_indexing
 // CHECK-SAME: #[[$MAP1]](%[[MAP_RESULT1]], %[[MAP_RESULT2]])
 // CHECK: %[[NEW_TENSOR:.*]] = tensor.insert %[[SCALAR]]
 // CHECK-SAME: into %[[TENSOR]][%[[MAP1_RESULT]]#0, %[[MAP1_RESULT]]#1]
@@ -189,15 +181,9 @@ func.func @insert(%input: !xla_gpu.indexed_vector<32x64xf32, #map>,
 
 func.func private @exp(%p0: tensor<32x64xf32>, %i: index, %j: index) -> f32
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1],
-  is_simplified: false>
-#map2 = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1),
-  domain: d0 in [0, 32], d1 in [0, 2],
-  is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 1], s1 in [0, 1], is_simplified: false">
+#map2 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1), domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false">
 
 func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index,
     %j: index, %output: tensor<32x64xf32>) -> tensor<32x64xf32> {
@@ -213,12 +199,8 @@ func.func @materialize_and_insert(%input: tensor<32x64xf32>, %i: index,
 
 func.func private @exp(%p0: tensor<32x64xcomplex<f32>>, %i: index, %j: index) -> complex<f32>
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 8],
-  s0 in [0, 2], s1 in [0, 3], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 2],
-  s0 in [0, 2], s1 in [0, 3], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d1*32+d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 8], s0 in [0, 2], s1 in [0, 3], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 2], s1 in [0, 3], is_simplified: false">
 func.func @materialize_complex(
   %input: tensor<32x64xcomplex<f32>>,
   %output: tensor<32x64xcomplex<f32>>,
@@ -245,11 +227,8 @@ func.func @materialize_complex(
 
 // -----
 
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (d0*2+s0, s1),
-  domain: d0 in [0, 32], d1 in [0, 2],
-  s0 in [0, 2], s1 in [0, 3], is_simplified: false>
-#map2 = #xla_gpu.indexing_map<(d0, d1) -> (d0, d1),
-  domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false>
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0, s1] -> (d0*2+s0, s1), domain: d0 in [0, 32], d1 in [0, 2], s0 in [0, 2], s1 in [0, 3], is_simplified: false">
+#map2 = #xla_gpu.indexing_map<"(d0, d1) -> (d0, d1), domain: d0 in [0, 32], d1 in [0, 2], is_simplified: false">
 func.func @insert_complex(
   %input: !xla_gpu.indexed_vector<32x3x4xcomplex<f32>, #map1>,
   %output: tensor<32x64xcomplex<f32>>,
@@ -274,4 +253,4 @@ func.func @insert_complex(
 // CHECK: %[[IMAG:.*]] = vector.extract %[[VECTOR]][%[[C1]], %[[I]], %[[J]]]
 // CHECK: %[[COMPLEX:.*]] = complex.create %[[REAL]], %[[IMAG]]
 // CHECK: %[[INSERTED:.*]] = tensor.insert %[[COMPLEX]] into %[[ITER]]
-// CHECK: xla_gpu.yield %[[INSERTED]] : tensor<32x64xcomplex<f32>>
\ No newline at end of file
+// CHECK: xla_gpu.yield %[[INSERTED]] : tensor<32x64xcomplex<f32>>
diff --git a/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir b/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir
index dd7d639e3273e..17f478b2838dd 100644
--- a/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/optimize_loops.mlir
@@ -1,11 +1,7 @@
 // RUN: mlir_fusions_opt %s -split-input-file -xla-gpu-optimize-loops | FileCheck %s
 
-#map = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 8),
-                              domain: d0 in [0, 31], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0) -> (d0 mod 8),
-                              domain: d0 in [0, 31], is_simplified: false>
-#map2 = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512),
-                              domain: d0 in [0, 1], d1 in [0, 255], s0 in [0, 7], is_simplified: false>
+#map = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 8), domain: d0 in [0, 31], is_simplified: false"> #map1 = #xla_gpu.indexing_map<"(d0) -> (d0 mod 8), domain: d0 in [0, 31], is_simplified: false">
+#map2 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512), domain: d0 in [0, 1], d1 in [0, 255], s0 in [0, 7], is_simplified: false">
 module {
   func.func @fully_unroll(%arg0: tensor<4x8x4096xf32>, %arg1: tensor<4096xbf16>,
       %arg2: tensor<4x8xf32>, %arg3: tensor<4096xbf16>,
@@ -127,7 +123,7 @@ module {
   }
 }
 
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1),
 // CHECK-LABEL: @pipeline_extract
 // CHECK-DAG:  %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG:  %[[C30:.*]] = arith.constant 30 : index
@@ -154,7 +150,7 @@ module {
     %cst = arith.constant dense<[0.0, 0.0]> : vector<2xf32>
     %cst0  = arith.constant 0.0 : f32
     %ret = scf.for %i = %c0 to %c17 step %c1 iter_args (%iter = %cst) -> (vector<2xf32>) {
-      %base = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 * 2), domain: d0 in [0, 15], is_simplified: false>(%i)
+      %base = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 * 2), domain: d0 in [0, 15], is_simplified: false">(%i)
       %val = vector.transfer_read %arg[%base], %cst0 : tensor<34xf32>, vector<2xf32>
       %log = math.log %val : vector<2xf32>
       %add = arith.addf %log, %iter : vector<2xf32>
@@ -164,8 +160,8 @@ module {
   }
 }
 
-// CHECK-DAG: #[[$MAP0:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 2),
-// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 + 1),
+// CHECK-DAG: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 2),
+// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 + 1),
 // CHECK-LABEL: @pipeline_transfer
 // CHECK-DAG:  %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG:  %[[C16:.*]] = arith.constant 16 : index
diff --git a/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir b/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir
index 8959fbb826bdd..f965b069a772c 100644
--- a/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/peel_loops.mlir
@@ -1,16 +1,9 @@
 // RUN: mlir_fusions_opt -split-input-file %s -xla-gpu-peel-loops \
 // RUN: | FileCheck %s
 
-#map = #xla_gpu.indexing_map<
-  (d0)[s0, s1] -> (s0, s1),
-  domain:
-  d0 in [0, 3],
-  s0 in [0, 7],
-  s1 in [0, 10],
-  d0 + s0 in [0, 9],
-  d0 + s1 in [0, 12],
-  is_simplified: false
->
+#map = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), domain:"
+  "d0 in [0, 3], s0 in [0, 7], s1 in [0, 10], d0 + s0 in [0, 9],"
+  "d0 + s1 in [0, 12], is_simplified: false">
 func.func @peel_both_loops(%input: tensor<16x32xf32>,
     %init: f32, %dim: index) -> (f32) {
   %sum = xla_gpu.loop (%dim)[%i, %j] -> (%r0, %r1)
@@ -21,9 +14,9 @@ func.func @peel_both_loops(%input: tensor<16x32xf32>,
   }
   func.return %sum : f32
 }
-// CHECK: #[[$PEELED_MAP:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, s1), domain: d0 in [0, 3], s0 in [0, 6], s1 in [0, 9], is_simplified: true>
-// CHECK: #[[$TAIL_MAP0:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (7, s1), domain: d0 in [0, 2], s0 in [7, 7], s1 in [0, 9], is_simplified: true>
-// CHECK: #[[$TAIL_MAP1:.*]] = #xla_gpu.indexing_map<(d0)[s0, s1] -> (s0, 10), domain: d0 in [0, 2], s0 in [0, 7], s1 in [10, 10], is_simplified: true>
+// CHECK: #[[$PEELED_MAP:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, s1), domain: d0 in [0, 3], s0 in [0, 6], s1 in [0, 9], is_simplified: true">
+// CHECK: #[[$TAIL_MAP0:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (7, s1), domain: d0 in [0, 2], s0 in [7, 7], s1 in [0, 9], is_simplified: true">
+// CHECK: #[[$TAIL_MAP1:.*]] = #xla_gpu.indexing_map<"(d0)[s0, s1] -> (s0, 10), domain: d0 in [0, 2], s0 in [0, 7], s1 in [10, 10], is_simplified: true">
 
 // CHECK-LABEL: func.func @peel_both_loops(
 // CHECK-SAME:      %[[INPUT:.*]]: tensor<16x32xf32>,
@@ -48,13 +41,8 @@ func.func @peel_both_loops(%input: tensor<16x32xf32>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<
-  (d0)[s0] -> (s0),
-  domain:
-  d0 in [0, 3],
-  s0 in [0, 7],
-  is_simplified: false
->
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (s0),"
+  "domain: d0 in [0, 3], s0 in [0, 7], is_simplified: false">
 func.func @not_constrained_symbol(%input: tensor<16xf32>, %init: f32,
     %dim: index) -> (f32) {
   %sum = xla_gpu.loop (%dim)[%i] -> (%r0)
@@ -72,12 +60,12 @@ func.func @not_constrained_symbol(%input: tensor<16xf32>, %init: f32,
 // -----
 
 #map = #xla_gpu.indexing_map<
-  (d0)[s0] -> (s0),
-  domain:
-  d0 in [0, 3],
-  s0 in [0, 7],
-  s0 mod 5 in [0, 1],
-  is_simplified: false
+"  (d0)[s0] -> (s0),"
+"  domain:"
+"  d0 in [0, 3],"
+"  s0 in [0, 7],"
+"  s0 mod 5 in [0, 1],"
+"  is_simplified: false"
 >
 func.func @constraint_exists_after_peeling(%input: tensor<16xf32>, %init: f32,
     %dim: index) -> (f32) {
@@ -91,4 +79,4 @@ func.func @constraint_exists_after_peeling(%input: tensor<16xf32>, %init: f32,
 }
 // CHECK-LABEL: func.func @constraint_exists_after_peeling
 // CHECK:     xla_gpu.loop
-// CHECK-NOT: xla_gpu.loop
\ No newline at end of file
+// CHECK-NOT: xla_gpu.loop
diff --git a/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir b/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir
index 94c6cddd4a8a4..5f8b9ba5413d8 100644
--- a/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/rewrite_reductions.mlir
@@ -19,7 +19,7 @@ func.func @row_reduction(%arg0: tensor<128x1027xf32>)
   return %0 : tensor<128xf32>
 }
 
-// CHECK: #[[$PAD_AND_RESHAPE:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3) -> (d0, d1 * 128 + d2 * 32 + d3),
+// CHECK: #[[$PAD_AND_RESHAPE:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3) -> (d0, d1 * 128 + d2 * 32 + d3),
 // CHECK-SAME: domain: d0 in [0, 127], d1 in [0, 8], d2 in [0, 3], d3 in [0, 31], d1 * 128 + d2 * 32 + d3 in [0, 1026]
 // CHECK-LABEL: @row_reduction
 // CHECK-SAME:    %[[IN:.*]]: tensor<128x1027xf32>
@@ -77,9 +77,9 @@ func.func @column(%arg0: tensor<2x32x32xf32>)
   return %0 : tensor<2x32xf32>
 }
 
-// CHECK:       #[[$RESHAPE:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)
+// CHECK:       #[[$RESHAPE:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3) -> (d0, d1 * 4 + d2, d3)
 // CHECK-SAME:    d1 * 4 + d2 in [0, 31]
-// CHECK:       #[[$TRANSPOSE:.*]] = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0, d2, d1)
+// CHECK:       #[[$TRANSPOSE:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0, d2, d1)
 // CHECK-LABEL: @column
 // CHECK-SAME:    %[[IN:.*]]: tensor<2x32x32xf32>
 // CHECK:         %[[C0:.*]] = arith.constant 0.00
diff --git a/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir b/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir
index db78b88abd51e..bfddbd60e2bde 100644
--- a/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/simplify_affine.mlir
@@ -63,8 +63,9 @@ func.func @op_and_for_ranges(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.pt
   %1 = gpu.block_id  x
   scf.for %i = %c0 to %c4 step %c1 {
     %2 = xla_gpu.apply_indexing
-      #xla_gpu.indexing_map<()[s0, s1, s2] -> (s0 * 512 + s1 * 4 + s2 + (s1 floordiv 128) + (s2 floordiv 4)),
-      domain: s0 in [0, 3071], s1 in [0, 127], s2 in [0, 3], is_simplified: false>[%1, %0, %i]
+      #xla_gpu.indexing_map<
+        "()[s0, s1, s2] -> (s0 * 512 + s1 * 4 + s2 + (s1 floordiv 128) + (s2 floordiv 4)),"
+        "domain: s0 in [0, 3071], s1 in [0, 127], s2 in [0, 3], is_simplified: false">[%1, %0, %i]
     %3 = arith.index_castui %2 : index to i64
     %4 = llvm.getelementptr %arg0[%3] : (!llvm.ptr, i64) -> !llvm.ptr, f32
     %5 = llvm.load %4 invariant : !llvm.ptr -> f32
@@ -92,8 +93,9 @@ func.func @op_and_for_ranges(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.pt
 
 func.func @arg_ranges(%arg0: index, %arg1: index) -> index {
   %0 = xla_gpu.apply_indexing
-    #xla_gpu.indexing_map<()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100),
-    domain: s0 in [0, 42], s1 in [0, 1000], is_simplified: false>[%arg0, %arg1]
+    #xla_gpu.indexing_map<
+      "()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100),"
+      "domain: s0 in [0, 42], s1 in [0, 1000], is_simplified: false">[%arg0, %arg1]
   return %0 : index
 }
 
@@ -106,8 +108,8 @@ func.func @arg_ranges(%arg0: index, %arg1: index) -> index {
 
 func.func @cant_lower(%arg0: index, %arg1: index) -> (index, index) {
   %0:2 = xla_gpu.apply_indexing
-    #xla_gpu.indexing_map<()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100, s0 + s1),
-    domain: s0 in [-10, 42], s1 in [0, 1000], is_simplified: false>[%arg0, %arg1]
+    #xla_gpu.indexing_map<"()[s0, s1] -> (s0 floordiv 100 + s1 floordiv 100, s0 + s1),"
+  "domain: s0 in [-10, 42], s1 in [0, 1000], is_simplified: false">[%arg0, %arg1]
   return %0#0, %0#1 : index, index
 }
 
@@ -124,8 +126,9 @@ func.func @order_summands(%arg1: index) {
   scf.for %arg2 = %c0 to %c4 step %c1 {
     scf.for %arg3 = %c0 to %c4 step %c1 {
       %0 = xla_gpu.apply_indexing
-        #xla_gpu.indexing_map<()[s0, s1, s2] -> ((s0 + s1) floordiv 3 + s0 * 512 + s1 * 4 + s2 * 10),
-        domain: s0 in [0, 3], s1 in [0, 3], s2 in [0, 3], is_simplified: false>[%arg2, %arg1, %arg3]
+        #xla_gpu.indexing_map<
+          "()[s0, s1, s2] -> ((s0 + s1) floordiv 3 + s0 * 512 + s1 * 4 + s2 * 10),"
+          "domain: s0 in [0, 3], s1 in [0, 3], s2 in [0, 3], is_simplified: false">[%arg2, %arg1, %arg3]
       "dummy.op"(%0) : (index) -> ()
     }
   }
diff --git a/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir b/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir
index aaeb665815dcc..9524c3d32cc6c 100644
--- a/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/simplify_arith.mlir
@@ -248,7 +248,8 @@ func.func @refine_constraints(%tensor: tensor<100xf32>) -> tensor<100xf32> {
   %c42_f32 = arith.constant 42.0 : f32
   %loop = scf.for %i = %c0 to %c3 step %c1
       iter_args(%in_ = %tensor) -> (tensor<100xf32>) {
-    %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<(d0) -> (d0 mod 4), domain: d0 in [0, 9], is_simplified: false>(%i)
+    %0 = xla_gpu.apply_indexing #xla_gpu.indexing_map<"(d0) -> (d0 mod 4),"
+      "domain: d0 in [0, 9], is_simplified: false">(%i)
     %updated = tensor.insert %c42_f32 into %in_[%0] : tensor<100xf32>
     scf.yield %updated :tensor<100xf32>
   }
@@ -262,10 +263,11 @@ func.func @refine_constraints(%tensor: tensor<100xf32>) -> tensor<100xf32> {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0, s1] -> (((d0 * 4 + d1 * 512 + s1) floordiv 9 + s0 * 32768) mod 2400000),
-                             domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 73], s1 in [0, 3], is_simplified: false>
-#map1 = #xla_gpu.indexing_map<(d0, d1)[s0] -> ((d0 * 4 + d1 * 512 + s0) mod 9),
-                             domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 3], is_simplified: false>
+#map = #xla_gpu.indexing_map<
+  "(d0, d1)[s0, s1] -> (((d0 * 4 + d1 * 512 + s1) floordiv 9 + s0 * 32768) mod 2400000),"
+  "domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 73], s1 in [0, 3], is_simplified: false">
+#map1 = #xla_gpu.indexing_map<"(d0, d1)[s0] -> ((d0 * 4 + d1 * 512 + s0) mod 9),"
+  "domain: d0 in [0, 127], d1 in [0, 575], s0 in [0, 3], is_simplified: false">
 func.func @refine_constraints_for_symbol(%arg0: tensor<2400000x9xf32>,
     %arg1: tensor<2400000x9xf32>) -> tensor<2400000x9xf32> {
   %c0 = arith.constant 0 : index
@@ -289,12 +291,23 @@ func.func @refine_constraints_for_symbol(%arg0: tensor<2400000x9xf32>,
   }
   return %0 : tensor<2400000x9xf32>
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1, d2, d3) -> (d2 * 32768 + (d0 * 4 + d1 * 512 + d3) floordiv 9),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2, d3) -> (d2 * 32768 + (d0 * 4 + d1 * 512 + d3) floordiv 9),
 // CHECK-LABEL: func.func @refine_constraints_for_symbol
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1, d2, d3, d4, d5)[s0] -> ((d0 * 4 + s0) floordiv 6, (d0 * 4 + s0) mod 6), domain: d0 in [0, 29], d1 in [0, 0], d2 in [0, 0], d3 in [0, 0], d4 in [0, 0], d5 in [0, 0], s0 in [0, 3], d0 * 4 + s0 in [0, 29], is_simplified: false>
+#map = #xla_gpu.indexing_map<
+  "(d0, d1, d2, d3, d4, d5)[s0] -> ((d0 * 4 + s0) floordiv 6, (d0 * 4 + s0) mod 6),"
+  "domain:"
+  "d0 in [0, 29],"
+  "d1 in [0, 0],"
+  "d2 in [0, 0],"
+  "d3 in [0, 0],"
+  "d4 in [0, 0],"
+  "d5 in [0, 0],"
+  "s0 in [0, 3],"
+  "d0 * 4 + s0 in [0, 29],"
+  "is_simplified: false">
 func.func @dus(%arg0: tensor<20x30xf32>, %arg1: tensor<5x6xf32>, %arg2: i32, %arg3: i32, %arg4: tensor<20x30xf32>) -> tensor<20x30xf32> {
   %c24 = arith.constant 24 : index
   %c15 = arith.constant 15 : index
diff --git a/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir b/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir
index c77d035e6271b..0c734ca19882e 100644
--- a/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir
+++ b/xla/service/gpu/fusions/transforms/tests/vectorize_loads_stores.mlir
@@ -1,8 +1,8 @@
 // RUN: mlir_fusions_opt -allow-unregistered-dialect %s -split-input-file \
 // RUN:  -xla-gpu-vectorize-loads-stores -cse -canonicalize | FileCheck %s
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @simple_read(%arg0: tensor<128xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -20,7 +20,7 @@ func.func @simple_read(%arg0: tensor<128xf32>) -> (f32) {
   }
   return %outer : f32
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 2), domain: d0 in [0, 63], is_simplified: true>
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 2), domain: d0 in [0, 63], is_simplified: true">
 // CHECK-LABEL: @simple_read
 // CHECK-SAME:     (%[[ARG0:.*]]: tensor
 // CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
@@ -36,8 +36,8 @@ func.func @simple_read(%arg0: tensor<128xf32>) -> (f32) {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0 + 1),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0 + 1),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @misaligned_indexing_map(%arg0: tensor<128xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -60,8 +60,8 @@ func.func @misaligned_indexing_map(%arg0: tensor<128xf32>) -> (f32) {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 3 + s0),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 3 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @misaligned_indexing_map_2(%arg0: tensor<128xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -84,8 +84,8 @@ func.func @misaligned_indexing_map_2(%arg0: tensor<128xf32>) -> (f32) {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (3 * d0 + s0),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (3 * d0 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @misaligned_shape(%arg0: tensor<192xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -108,8 +108,8 @@ func.func @misaligned_shape(%arg0: tensor<192xf32>) -> (f32) {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (d0 + s0 * 2),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 + s0 * 2),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @wrong_stride(%arg0: tensor<128xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -134,8 +134,8 @@ func.func @wrong_stride(%arg0: tensor<128xf32>) -> (f32) {
 
 // We could vectorize this as a float vector load of double the size, but we
 // don't currently.
-#map = #xla_gpu.indexing_map<(d0)[s0] -> (2 * d0 + s0),
-                             domain: d0 in [0, 127], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> (2 * d0 + s0),"
+  "domain: d0 in [0, 127], s0 in [0, 1], is_simplified: true">
 func.func @simple_read_complex(%arg0: tensor<128xcomplex<f32>>, %i: index) -> (complex<f32>) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -250,10 +250,12 @@ func.func @write_not_yielded(%arg0: tensor<64xf32>) -> tensor<64xf32> {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512),
-                             domain: d0 in [0, 7], d1 in [0, 255], s0 in [0, 7], is_simplified: true>
-#map1 = #xla_gpu.indexing_map<(d0, d1, d2)[s0] -> (d0 * 32 + d2 * 2 + d1 + s0 * 512),
-                             domain: d0 in [0, 7], d1 in [0, 1], d2 in [0, 255], s0 in [0, 7], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0, d1)[s0] -> (d1 * 2 + d0 + s0 * 512),"
+  "domain: d0 in [0, 7], d1 in [0, 255], s0 in [0, 7], is_simplified: true">
+#map1 = #xla_gpu.indexing_map<
+  "(d0, d1, d2)[s0] -> (d0 * 32 + d2 * 2 + d1 + s0 * 512),"
+  "domain: d0 in [0, 7], d1 in [0, 1], d2 in [0, 255], s0 in [0, 7],"
+  "is_simplified: true">
 func.func @multiple(%arg0: tensor<131072xf32>, %arg1: tensor<4096xbf16>,
       %arg2: tensor<32xf32>, %arg3: tensor<131072xf32>,
       %arg4: index) -> (tensor<131072xf32>, f32) {
@@ -280,8 +282,8 @@ func.func @multiple(%arg0: tensor<131072xf32>, %arg1: tensor<4096xbf16>,
   }
   return %0#0, %0#1 : tensor<131072xf32>, f32
 }
-// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0, d1) -> (d0 * 2 + d1 * 512), domain: d0 in [0, 255], d1 in [0, 7], is_simplified: true>
-// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<(d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2 * 512), domain: d0 in [0, 7], d1 in [0, 255], d2 in [0, 7], is_simplified: true>
+// CHECK-DAG: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0, d1) -> (d0 * 2 + d1 * 512), domain: d0 in [0, 255], d1 in [0, 7], is_simplified: true">
+// CHECK-DAG: #[[$MAP1:.*]] = #xla_gpu.indexing_map<"(d0, d1, d2) -> (d0 * 32 + d1 * 2 + d2 * 512), domain: d0 in [0, 7], d1 in [0, 255], d2 in [0, 7], is_simplified: true">
 // CHECK-LABEL: @multiple
 // CHECK-SAME: (%[[ARG0:.*]]: tensor{{.*}}, %[[ARG1:.*]]: tensor{{.*}}, %[[ARG2:.*]]: tensor{{.*}}, %[[ARG3:.*]]: tensor{{.*}}, %[[ARG4:.*]]: index)
 // CHECK:      %[[C0:.*]] = arith.constant 0 : index
@@ -304,8 +306,8 @@ func.func @multiple(%arg0: tensor<131072xf32>, %arg1: tensor<4096xbf16>,
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> ((d0 * 4) mod 64 + s0),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> ((d0 * 4) mod 64 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @remainder_with_modulo(%arg0: tensor<128xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -323,7 +325,7 @@ func.func @remainder_with_modulo(%arg0: tensor<128xf32>) -> (f32) {
   }
   return %outer : f32
 }
-// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<(d0) -> ((d0 mod 16) * 4),
+// CHECK: #[[$MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> ((d0 mod 16) * 4),
 // CHECK-LABEL: @remainder_with_modulo
 // CHECK: %[[C0:.*]] = arith.constant 0 : index
 // CHECK: scf.for %[[I:.*]] = %[[C0]]
@@ -332,8 +334,8 @@ func.func @remainder_with_modulo(%arg0: tensor<128xf32>) -> (f32) {
 
 // -----
 
-#map = #xla_gpu.indexing_map<(d0)[s0] -> ((d0 * 4) mod 65 + s0),
-                             domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map = #xla_gpu.indexing_map<"(d0)[s0] -> ((d0 * 4) mod 65 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 func.func @remainder_with_modulo_misaligned(%arg0: tensor<128xf32>) -> (f32) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -356,10 +358,10 @@ func.func @remainder_with_modulo_misaligned(%arg0: tensor<128xf32>) -> (f32) {
 
 // -----
 
-#map0 = #xla_gpu.indexing_map<(d0) -> (d0 + 5),
-                              domain: d0 in [0, 63], is_simplified: true>
-#map1 = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0),
-                              domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map0 = #xla_gpu.indexing_map<"(d0) -> (d0 + 5),"
+  "domain: d0 in [0, 63], is_simplified: true">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 module {
   func.func @apply_indexing_sequence(%arg0: tensor<128xf32>) -> (f32) {
     %c0 = arith.constant 0 : index
@@ -381,8 +383,8 @@ module {
   }
 }
 
-// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 2 + 10),
-// CHECK-SAME:                                  domain: d0 in [0, 63], is_simplified: true>
+// CHECK: #[[$MAP0:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 2 + 10),
+// CHECK-SAME:                                  domain: d0 in [0, 63], is_simplified: true">
 // CHECK-LABEL: @apply_indexing_sequence
 // CHECK: %[[BASE:.*]] = xla_gpu.apply_indexing #[[$MAP0]]
 // CHECK: vector.transfer_read {{.*}}[%[[BASE]]]
@@ -390,10 +392,10 @@ module {
 // -----
 
 
-#map0 = #xla_gpu.indexing_map<(d0) -> (d0 + 5),
-                              domain: d0 in [0, 63], is_simplified: true>
-#map1 = #xla_gpu.indexing_map<(d0)[s0] -> (d0 * 2 + s0),
-                              domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true>
+#map0 = #xla_gpu.indexing_map<"(d0) -> (d0 + 5),"
+  "domain: d0 in [0, 63], is_simplified: true">
+#map1 = #xla_gpu.indexing_map<"(d0)[s0] -> (d0 * 2 + s0),"
+  "domain: d0 in [0, 63], s0 in [0, 1], is_simplified: true">
 module {
   func.func @apply_indexing_sequence_same_block(%arg0: tensor<128xf32>) -> (f32) {
     %c0 = arith.constant 0 : index
@@ -418,4 +420,4 @@ module {
 }
 
 // CHECK-LABEL: @apply_indexing_sequence_same_block
-// CHECK-NOT: vector.transfer_read
\ No newline at end of file
+// CHECK-NOT: vector.transfer_read
diff --git a/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc b/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc
index a2de97c39bfe0..f136f7190d1a6 100644
--- a/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc
+++ b/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_test.cc
@@ -213,7 +213,7 @@ ENTRY main {
                                    "num_warps":"1"}}}})";
   TF_EXPECT_OK(CreateTritonIrAndFileCheck(this, kHloText,
                                           "triton_softmax_computation", R"(
-CHECK:        #indexing_map = #xla_gpu.indexing_map<(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true>
+CHECK:        #indexing_map = #xla_gpu.indexing_map<"(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true">
 CHECK:        tt.func @triton_fn(%[[P0:[^:]*]]: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %[[P1:[^:]*]]: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
 CHECK-DAG:        %[[ZERO:.*]] = arith.constant 0 : i32
 CHECK-DAG:        %[[C125:.*]] = arith.constant 125 : i64
@@ -278,7 +278,7 @@ ENTRY main {
                                    "num_warps":"1"}}}})";
   TF_EXPECT_OK(CreateTritonIrAndFileCheck(this, kHloText,
                                           "triton_softmax_computation", R"(
-CHECK:         #indexing_map = #xla_gpu.indexing_map<(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true>
+CHECK:         #indexing_map = #xla_gpu.indexing_map<"(d0) -> (d0 * 127), domain: d0 in [0, 124], is_simplified: true">
 CHECK:         tt.func @triton_fn(
 CHECK-SAME:                      %[[P0:[A-Za-z0-9_]*]]: !tt.ptr<f32>
 CHECK-SAME:                      %[[P1:[A-Za-z0-9_]*]]: !tt.ptr<f32>
@@ -349,9 +349,9 @@ ENTRY main {
 
   TF_EXPECT_OK(CreateTritonIrAndFileCheck(this, kHloText,
                                           "triton_softmax_computation", R"(
-CHECK:        #[[MAP:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 floordiv 125), domain: d0 in [0, 1249], is_simplified: true>
-CHECK:        #[[MAP1:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 mod 125), domain: d0 in [0, 1249], is_simplified: true>
-CHECK:        #[[MAP2:.*]] = #xla_gpu.indexing_map<(d0) -> (d0 * 127), domain: d0 in [0, 1249], is_simplified: true>
+CHECK:        #[[MAP:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 125), domain: d0 in [0, 1249], is_simplified: true">
+CHECK:        #[[MAP1:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 mod 125), domain: d0 in [0, 1249], is_simplified: true">
+CHECK:        #[[MAP2:.*]] = #xla_gpu.indexing_map<"(d0) -> (d0 * 127), domain: d0 in [0, 1249], is_simplified: true">
 CHECK:        tt.func @triton_fn(%[[P0:[^:]*]]: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %[[P1:[^:]*]]: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %[[P2:[^:]*]]: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %[[P3:[^:]*]]: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
 CHECK-DAG:        %[[ZERO:.*]] = arith.constant 0 : i32
 CHECK-DAG:        %[[ZERO_64:.*]] = arith.constant 0 : i64
@@ -542,8 +542,8 @@ ENTRY main {
 
   TF_ASSERT_OK(CreateTritonIrAndFileCheck(this, kHloText,
                                           "triton_softmax_computation", R"(
-// CHECK:         #xla_gpu.indexing_map<(d0) -> (d0 floordiv 32), domain: d0 in [0, 2047], is_simplified: true>
-// CHECK:         #xla_gpu.indexing_map<(d0) -> (d0 mod 32), domain: d0 in [0, 2047], is_simplified: true>
+// CHECK:         #xla_gpu.indexing_map<"(d0) -> (d0 floordiv 32), domain: d0 in [0, 2047], is_simplified: true">
+// CHECK:         #xla_gpu.indexing_map<"(d0) -> (d0 mod 32), domain: d0 in [0, 2047], is_simplified: true">
 // CHECK-LABEL:   tt.func @triton_fn(
 // CHECK-SAME:                       %[[P0:[A-Za-z0-9_]*]]: !tt.ptr<f32>
 // CHECK-SAME:                       %[[P1:[A-Za-z0-9_]*]]: !tt.ptr<f32>