ROCm · dhernandez0 · Apr 7, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/docs/dev/triage-rocmlir.rst b/docs/dev/triage-rocmlir.rst
@@ -44,7 +44,7 @@ If error message like above is present then proceed with following steps.
 ::
 
    module {
-     func.func @mlir_convolution(%arg0: !migraphx.shaped<2x8x3x3xf32, 72x9x3x1>, %arg1: !migraphx.shaped<1x8x4x4xf32, 128x16x4x1>) -> !migraphx.shaped<1x2x2x2xf32, 8x4x2x1> attributes {arch = "gfx90a:sramecc+:xnack-", enable_splitk_for_tuning = true, kernel = "mixr", num_cu = 110 : i64} {
+     func.func @mlir_convolution(%arg0: !migraphx.shaped<2x8x3x3xf32, 72x9x3x1>, %arg1: !migraphx.shaped<1x8x4x4xf32, 128x16x4x1>) -> !migraphx.shaped<1x2x2x2xf32, 8x4x2x1> attributes {rock.arch = "gfx90a:sramecc+:xnack-", rock.enable_splitk_for_tuning = true, rock.kernel = "mixr", rock.num_cu = 110 : i64, rock.num_chiplets = 1 : i64} {
        %0 = migraphx.convolution %arg1, %arg0 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x8x4x4xf32, 128x16x4x1>, <2x8x3x3xf32, 72x9x3x1> -> <1x2x2x2xf32, 8x4x2x1>
        return %0 : !migraphx.shaped<1x2x2x2xf32, 8x4x2x1>
      }
@@ -132,7 +132,7 @@ Step 4 See if its a ``B3`` - accuracy issue of MLIR-generated kernel
 ::
 
  module {
-     func.func @mlir_dot_add(%arg0: !migraphx.shaped<1x5x4xf32, 20x4x1>, %arg1: !migraphx.shaped<1x4x3xf32, 12x3x1>, %arg2: !migraphx.shaped<1x5x3xf32, 15x3x1>) -> !migraphx.shaped<1x5x3xf32, 15x3x1> attributes {arch = "gfx90a:sramecc+:xnack-", enable_splitk_for_tuning = true, kernel = "mixr", num_cu = 110 : i64} {
+     func.func @mlir_dot_add(%arg0: !migraphx.shaped<1x5x4xf32, 20x4x1>, %arg1: !migraphx.shaped<1x4x3xf32, 12x3x1>, %arg2: !migraphx.shaped<1x5x3xf32, 15x3x1>) -> !migraphx.shaped<1x5x3xf32, 15x3x1> attributes {rock.arch = "gfx90a:sramecc+:xnack-", rock.enable_splitk_for_tuning = true, rock.kernel = "mixr", rock.num_cu = 110 : i64, rock.num_chiplets = 1 : i64} {
        %0 = migraphx.dot %arg0, %arg1 : <1x5x4xf32, 20x4x1>, <1x4x3xf32, 12x3x1> -> <1x5x3xf32, 15x3x1>
        %1 = migraphx.add %0, %arg2 : <1x5x3xf32, 15x3x1>, <1x5x3xf32, 15x3x1> -> <1x5x3xf32, 15x3x1>
        return %1 : !migraphx.shaped<1x5x3xf32, 15x3x1>

@@ -36,6 +36,7 @@
 #include <mlir-c/Dialect/RockEnums.h>
 #include <numeric>
 #include <ostream>
+#include <tuple>
 
 #ifdef MIGRAPHX_MLIR
 #include <mlir-c/IR.h>
@@ -48,8 +49,8 @@
 #include <mlir-c/Pass.h>
 #include <mlir-c/Support.h>
 #include <mutex>
-#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 4
+#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 5
 #warning "Incompatible version of rocMLIR library used, disabling"
 // Only undefine when not using cppcheck
 #ifndef CPPCHECK
 #undef MIGRAPHX_MLIR
@@ -639,13 +640,13 @@
         auto ops = create_operation_state("func.func");
         ops.add_attributes({{"function_type", make_function_type(input_shapes, outputs)},
                             {"sym_name", sym_name},
-                            {"kernel", std::string("mixr")},
-                            {"arch", target_arch},
-                            {"num_cu", num_cu},
-                            {"num_chiplets", num_chiplets}});
+                            {"rock.kernel", std::string("mixr")},
+                            {"rock.arch", target_arch},
+                            {"rock.num_cu", num_cu},
+                            {"rock.num_chiplets", num_chiplets}});
         if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
         {
-            ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}});
+            ops.add_attributes({{"rock.enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}});
         }
         ops.add_region(std::move(region));
         insert(body, std::move(ops));
@@ -912,10 +913,14 @@
         }
     }
 
-    void run_backend_pipeline()
+    void run_backend_pipeline(const std::string& solution)
     {
         mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
-        mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
+        MlirMIGraphXBackendOptions opts{};
+        opts.arch       = target_arch.c_str();
+        opts.perfConfig = solution.c_str();
-        opts.perfConfig = solution.c_str();
+        opts.perfConfig = solution.empty() ? nullptr : solution.c_str();
-        opts.perfConfig = solution.c_str();
+        opts.perfConfig = solution.empty() ? nullptr : solution.c_str();
+        opts.optLevel   = 3;
+        mlirMIGraphXAddBackendPipeline(pm_back.get(), &opts);
         logger.clear();
         const size_t trace = value_of(MIGRAPHX_TRACE_MLIR{});
         static std::mutex mutex;
@@ -944,15 +949,17 @@
         std::string tuning_cfg_path = string_value_of(MIGRAPHX_MLIR_TUNING_CFG{});
         if(not tuning_cfg_path.empty())
             get_module_tuned();
-        if(not solution.is_null())
-            set_tuning(solution);
+        if(solution.is_null())
+            MIGRAPHX_THROW("MLIR backend pipeline requires a tuning solution");
+        set_tuning(solution);
         // 2nd pipeline to call
-        run_backend_pipeline();
+        run_backend_pipeline(solution.to<std::string>());
 
         code_object_op op{};
         op.symbol_name                = sym_name;
         op.code_object                = get_binary();
-        std::tie(op.global, op.local) = get_launch_params();
+        // TODO: update code_object_op to use cluster size
+        std::tie(std::ignore, op.global, op.local) = get_launch_params();
         return op;
     }
 
@@ -964,14 +971,15 @@
         num_chiplets       = device.get_chiplet_count();
     }
 
-    std::pair<std::size_t, std::size_t> get_launch_params() const
+    std::tuple<std::size_t, std::size_t, std::size_t> get_launch_params() const
     {
-        uint32_t attrs[2];
-        // returns block and grid sizes
+        uint32_t attrs[3];
+        // returns block, grid and cluster sizes
         mlirGetKernelAttrs(mmodule.get(), attrs);
-        std::size_t local  = attrs[0];
-        std::size_t global = local * attrs[1];
-        return {global, local};
+        std::size_t local   = attrs[0];
+        std::size_t global  = local * attrs[1];
+        std::size_t cluster = attrs[2];
+        return {cluster, global, local};
     }
 
     value::binary get_binary() const
@@ -1391,11 +1399,11 @@
 #else

 template <class T>
 void use(T&)
 {
 }

 std::string dump_mlir(module) { return {}; }

 std::string dump_mlir(module m, const std::vector<shape>& inputs)
 {

diff --git a/test/gpu/mlir.cpp b/test/gpu/mlir.cpp
@@ -161,9 +161,9 @@ static std::string get_attrs()
 {
     if(migraphx::enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
     {
-        return R"({arch = "", enable_splitk_for_tuning, kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})";
+        return R"({rock.arch = "", rock.enable_splitk_for_tuning, rock.kernel = "mixr", rock.num_chiplets = 0 : i64, rock.num_cu = 0 : i64})";
     }
-    return R"({arch = "", kernel = "mixr", num_chiplets = 0 : i64, num_cu = 0 : i64})";
+    return R"({rock.arch = "", rock.kernel = "mixr", rock.num_chiplets = 0 : i64, rock.num_cu = 0 : i64})";
 }
 
 TEST_CASE(conv)