From 7ea87ed0fed90e59b73e6c6e8e632aa95edf44e4 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 18 Feb 2026 21:47:50 +0000
Subject: [PATCH 01/16] Lower `migraphx.conv` into `linalg.generic` and other
 forms

---
 .../MIGraphXToTosa/MIGraphXToTosa.h           |   3 +
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 307 +++++++++++++++++-
 .../MIGraphXToLinalg/MIGraphXToLinalgPass.cpp |   7 +
 .../MIGraphXToTosa/MIGraphXToTosa.cpp         |   4 +
 .../e2e/migraphx-to-linalg-conv-cpu.e2e.mlir  |  44 +++
 .../migraphx-to-linalg-conv.mlir              |  44 +++
 .../migraphx-to-linalg-not-implemented.mlir   |   6 -
 mlir/tools/rocmlir-driver/rocmlir-driver.cpp  |  20 +-
 8 files changed, 419 insertions(+), 16 deletions(-)
 create mode 100644 mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv-cpu.e2e.mlir
 create mode 100644 mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
diff --git a/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h b/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h
index 2c2912a1d083..6ef5d42e0a95 100644
--- a/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h
+++ b/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h
@@ -66,6 +66,9 @@ void populateMIGraphXFuncBoundaryToTosaConversionPatterns(
     RewritePatternSet &patterns, TypeConverter &typeConverter);
 
 void addMIGraphXToTosaPasses(OpPassManager &pm);
+
+void populateMIGraphXToLinalgMHALLauncherConversion(
+    RewritePatternSet &target, TypeConverter &typeConverter);
 } // namespace migraphx
 } // namespace mlir
 
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index e7c6083c78e5..e1183a80815a 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -104,6 +104,307 @@ LogicalResult AsUnderlyingShapeConverter::matchAndRewrite(
       "input shape is non standard or broadcast; cannot convert this shape");
 }
 
+namespace {
+struct ConvConverter final
+    : public OpConversionPattern<migraphx::ConvolutionOp> {
+  using OpConversionPattern<migraphx::ConvolutionOp>::OpConversionPattern;
+  using OpConversionPattern<migraphx::ConvolutionOp>::getTypeConverter;
+  using OpAdaptor =
+      typename OpConversionPattern<migraphx::ConvolutionOp>::OpAdaptor;
+
+  LogicalResult
+  matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override;
+
+private:
+  LogicalResult emitConv(ConversionPatternRewriter &rewriter,
+                         migraphx::ConvolutionOp op, Value input,
+                         Value filter) const;
+};
+} // namespace
+
+// Nice helpder function for the linalg.generic op region
+static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
+  Value inputVal = blockArgs[0];
+  Value filterVal = blockArgs[1];
+  Value outputVal = blockArgs[2];
+  Value mul = arith::MulFOp::create(b, loc, inputVal, filterVal);
+  Value add = arith::AddFOp::create(b, loc, outputVal, mul);
+  linalg::YieldOp::create(b, loc, add);
+}
+
+/// Emit Conv1D expect input shape to be (batch, group, channel, height),
+/// filter to be (group, filter, channel, height)
+static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
+                               Location loc, RankedTensorType resultType,
+                               Value input, Value filter, Value zero,
+                               Attribute strides, Attribute dilation) {
+  MLIRContext *ctx = rewriter.getContext();
+  auto strideArr = cast<ArrayAttr>(strides);
+  auto dilationArr = cast<ArrayAttr>(dilation);
+  int64_t strideVal = cast<IntegerAttr>(strideArr[0]).getInt();
+  int64_t dilationVal = cast<IntegerAttr>(dilationArr[0]).getInt();
+
+  // Iteration domain: (batch, group, filter, oh, channel, kh)
+  AffineExpr batch, group, filterExpr, oh, channel, kh;
+  bindDims(ctx, batch, group, filterExpr, oh, channel, kh);
+
+  AffineMap inputMap = AffineMap::get(
+      6, 0, {batch, group, channel, oh * strideVal + kh * dilationVal}, ctx);
+  AffineMap filterMap =
+      AffineMap::get(6, 0, {group, filterExpr, channel, kh}, ctx);
+  AffineMap outputMap =
+      AffineMap::get(6, 0, {batch, group, filterExpr, oh}, ctx);
+
+  SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
+  SmallVector<utils::IteratorType> iteratorTypes = {
+      utils::IteratorType::parallel,  // n
+      utils::IteratorType::parallel,  // g
+      utils::IteratorType::parallel,  // f
+      utils::IteratorType::parallel,  // oh
+      utils::IteratorType::reduction, // c
+      utils::IteratorType::reduction, // kh
+  };
+
+  return linalg::GenericOp::create(rewriter, loc, resultType,
+                                   ValueRange{input, filter}, zero,
+                                   indexingMaps, iteratorTypes, convBodyBuilder)
+      .getResult(0);
+}
+
+/// Emit Conv3D expect input shape to be (batch, group, channel, h, w, d),
+// filter to be (group, filter, channel, kh, kw, kd)
+static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
+                               Location loc, RankedTensorType resultType,
+                               Value input, Value filter, Value zero,
+                               Attribute strides, Attribute dilation) {
+  MLIRContext *ctx = rewriter.getContext();
+  auto strideArr = cast<ArrayAttr>(strides);
+  auto dilationArr = cast<ArrayAttr>(dilation);
+  int64_t strideH = cast<IntegerAttr>(strideArr[0]).getInt();
+  int64_t strideW = cast<IntegerAttr>(strideArr[1]).getInt();
+  int64_t strideD = cast<IntegerAttr>(strideArr[2]).getInt();
+  int64_t dilationH = cast<IntegerAttr>(dilationArr[0]).getInt();
+  int64_t dilationW = cast<IntegerAttr>(dilationArr[1]).getInt();
+  int64_t dilationD = cast<IntegerAttr>(dilationArr[2]).getInt();
+
+  // Iteration domain:
+  //   (batch, group, filter, oh, ow, od, channel, kh, kw, kd)
+  AffineExpr batch, group, filterExpr, oh, ow, od, channel, kh, kw, kd;
+  bindDims(ctx, batch, group, filterExpr, oh, ow, od, channel, kh, kw, kd);
+
+  AffineMap inputMap = AffineMap::get(
+      10, 0,
+      {batch, group, channel, oh * strideH + kh * dilationH,
+       ow * strideW + kw * dilationW, od * strideD + kd * dilationD},
+      ctx);
+  AffineMap filterMap =
+      AffineMap::get(10, 0, {group, filterExpr, channel, kh, kw, kd}, ctx);
+  AffineMap outputMap =
+      AffineMap::get(10, 0, {batch, group, filterExpr, oh, ow, od}, ctx);
+
+  SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
+  SmallVector<utils::IteratorType> iteratorTypes = {
+      utils::IteratorType::parallel,  // batch
+      utils::IteratorType::parallel,  // group
+      utils::IteratorType::parallel,  // filter
+      utils::IteratorType::parallel,  // oh
+      utils::IteratorType::parallel,  // ow
+      utils::IteratorType::parallel,  // od
+      utils::IteratorType::reduction, // channel
+      utils::IteratorType::reduction, // kh
+      utils::IteratorType::reduction, // kw
+      utils::IteratorType::reduction, // kd
+  };
+
+  return linalg::GenericOp::create(rewriter, loc, resultType,
+                                   ValueRange{input, filter}, zero,
+                                   indexingMaps, iteratorTypes, convBodyBuilder)
+      .getResult(0);
+}
+
+LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
+                                      migraphx::ConvolutionOp op, Value input,
+                                      Value filter) const {
+  // Input and filter are already in NGC* and GFC* form (group dimension
+  // expanded). Build the result type as NGF* (with explicit G), emit the
+  // grouped linalg conv (1D/2D/3D), then collapse back to NF* for the type
+  // converter.
+  Location loc = op.getLoc();
+  int64_t group = op.getGroupAttr().getInt();
+  int64_t dim = cast<RankedTensorType>(input.getType()).getRank() -
+                3; // exclude batch (N), group (G), channel (C)
+
+  // Result type from the op is NF*; expand to NGF* for the linalg conv.
+  RankedTensorType resultType =
+      cast<RankedTensorType>(getTypeConverter()->convertType(op.getResult()));
+  ArrayRef<int64_t> resultShape = resultType.getShape();
+  SmallVector<int64_t, 4> newShape;
+  int64_t n = resultType.getDimSize(0);
+  int64_t newF = resultType.getDimSize(1) / group;
+  assert(resultType.getDimSize(1) % group == 0 &&
+         "output channel must be divisible");
+  newShape.push_back(n);
+  newShape.push_back(group);
+  newShape.push_back(newF);
+  newShape.insert(newShape.end(), std::next(resultShape.begin(), 2),
+                  resultShape.end());
+  auto newResultType =
+      RankedTensorType::get(newShape, resultType.getElementType());
+  Value zero = arith::ConstantOp::create(rewriter, loc, newResultType,
+                                         rewriter.getZeroAttr(newResultType));
+  Attribute strides = op.getStride();
+  Attribute dilation = op.getDilation();
+
+  Value result;
+  switch (dim) {
+  case 1: {
+    result = emitGroupedConv1D(rewriter, loc, newResultType, input, filter,
+                               zero, strides, dilation);
+    break;
+  }
+  case 2: {
+    // linalg provides us we named op we can use so we use those instead
+    result = linalg::Conv2DNgchwGfchwOp::create(rewriter, loc, {newResultType},
+                                                {input, filter}, {zero},
+                                                strides, dilation)
+                 .getResult(0);
+    break;
+  }
+  case 3: {
+    result = emitGroupedConv3D(rewriter, loc, newResultType, input, filter,
+                               zero, strides, dilation);
+    break;
+  }
+  default: {
+    op.emitError("unsupported convolution dimensions");
+    return failure();
+  }
+  }
+
+  // we must reshape the operand to what the type converter expects
+  SmallVector<ReassociationIndices, 4> reassociation{{0}, {1, 2}};
+  llvm::for_each(llvm::seq<int64_t>(3, dim + 3),
+                 [&](int64_t index) { reassociation.push_back({index}); });
+  auto finalResult =
+      tensor::CollapseShapeOp::create(rewriter, loc, result, reassociation);
+
+  if (auto attr = (*op).template getAttrOfType<StringAttr>("perf_config")) {
+    finalResult->setAttr("perf_config", attr);
+  }
+
+  rewriter.replaceOp(op, finalResult);
+  return success();
+}
+
+LogicalResult
+ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
+                               ConversionPatternRewriter &rewriter) const {
+  // Forward convolution is lowered in three steps:
+  // 1. Apply padding to the input when the op has non-zero padding.
+  // 2. Expand the channel dimension into (group, channel_per_group),
+  // introducing
+  //    a group dimension G. Input becomes NGC* (e.g. NGCL, NGCHW, NGCDHW) and
+  //    filter becomes GFC* (e.g. GFCL, GFCHW, GFCDHW), matching the group attr.
+  // 3. Emit the grouped linalg convolution (1D/2D/3D), then collapse the
+  //    result back to the original NFHW/NFDHW shape for the type converter.
+  Location loc = op.getLoc();
+  Value input = adaptor.getInput();
+  Value filter = adaptor.getFilter();
+  ArrayAttr padAttr = adaptor.getPaddingAttr();
+  RankedTensorType inputType = cast<RankedTensorType>(input.getType());
+  int64_t dim = inputType.getRank() - 2;
+  int64_t group = op.getGroupAttr().getInt();
+  // Step 1: apply padding when any padding value is non-zero.
+  if (!llvm::all_of(padAttr, [](Attribute pad) {
+        return cast<IntegerAttr>(pad).getValue() == 0;
+      })) {
+    // Apply symmetric padding to spatial dimensions.
+    SmallVector<OpFoldResult, 4> low(inputType.getRank(),
+                                     rewriter.getIndexAttr(0));
+    SmallVector<OpFoldResult, 4> high(inputType.getRank(),
+                                      rewriter.getIndexAttr(0));
+    // insert padding to inputs
+    assert(2 * dim == (int64_t)padAttr.size() && "padding is symmetric");
+
+    // MIGraphX padAttr is [hlow, wlow, hhigh, whigh]
+    SmallVector<int64_t, 4> newShape(inputType.getShape());
+    auto lowAttrs = padAttr.getValue().drop_back(dim);
+    auto highAttrs = padAttr.getValue().drop_front(dim);
+    //  Dim H is always located at the second index regardless of dimension of
+    //  the convolution.
+    int64_t dimHOffset = 2;
+    llvm::for_each(llvm::seq<int64_t>(dim), [&](int64_t index) {
+      int64_t lowPad = cast<IntegerAttr>(lowAttrs[index]).getInt();
+      int64_t highPad = cast<IntegerAttr>(highAttrs[index]).getInt();
+      newShape[dimHOffset + index] += lowPad + highPad;
+      low[dimHOffset + index] = rewriter.getIndexAttr(lowPad);
+      high[dimHOffset + index] = rewriter.getIndexAttr(highPad);
+    });
+
+    RankedTensorType newInputType =
+        RankedTensorType::get(newShape, inputType.getElementType());
+    Value padValue = arith::ConstantOp::create(
+        rewriter, loc, rewriter.getZeroAttr(inputType.getElementType()));
+    input = tensor::PadOp::create(rewriter, loc, newInputType, input, low, high,
+                                  padValue)
+                .getResult();
+  }
+
+  auto expandGroupDim = [&](Value input, bool isFilter) -> Value {
+    RankedTensorType originalType = cast<RankedTensorType>(input.getType());
+    ArrayRef<int64_t> originalShape = originalType.getShape();
+    SmallVector<int64_t, 4> newShape;
+
+    if (isFilter) {
+      // FCHW into GFCHW
+      int64_t newF = originalType.getDimSize(0) / group;
+      assert(originalType.getDimSize(0) % group == 0 &&
+             "output channel must be divisible by group");
+      newShape.push_back(group);
+      newShape.push_back(newF);
+      newShape.push_back(originalType.getDimSize(1));
+      newShape.insert(newShape.end(), std::next(originalShape.begin(), 2),
+                      originalShape.end());
+      RankedTensorType newType =
+          RankedTensorType::get(newShape, originalType.getElementType());
+
+      SmallVector<ReassociationIndices, 4> reassociation;
+      reassociation.push_back({0, 1});
+      llvm::for_each(llvm::seq<int64_t>(2, dim + 3),
+                     [&](int64_t i) { reassociation.push_back({i}); });
+      return tensor::ExpandShapeOp::create(rewriter, loc, newType, input,
+                                           reassociation);
+    } else {
+      // Convert NCHW into NGCHW
+      int64_t newC = originalType.getDimSize(1) / group;
+      assert(originalType.getDimSize(1) % group == 0 &&
+             "input channel must be divisible by group");
+      newShape.push_back(originalType.getDimSize(0));
+      newShape.push_back(group);
+      newShape.push_back(newC);
+      newShape.insert(newShape.end(), std::next(originalShape.begin(), 2),
+                      originalShape.end());
+
+      RankedTensorType newType =
+          RankedTensorType::get(newShape, originalType.getElementType());
+      SmallVector<ReassociationIndices, 4> reassociation;
+      reassociation.push_back({0});
+      reassociation.push_back({1, 2});
+      llvm::for_each(llvm::seq<int64_t>(3, dim + 3),
+                     [&](int64_t i) { reassociation.push_back({i}); });
+      return tensor::ExpandShapeOp::create(rewriter, loc, newType, input,
+                                           reassociation);
+    }
+  };
+
+  // Step 2: expand group dimension (NCHW -> NGCHW, FCHW -> GFCHW).
+  input = expandGroupDim(input, false);
+  filter = expandGroupDim(filter, true);
+  // Step 3: emit linalg conv and collapse result to match type converter.
+  return emitConv(rewriter, op, input, filter);
+}
+
 // TODO: add support for scaled gemms, and migraphx::DeQuantizeLinearConverter
 //===----------------------------------------------------------------------===//
 // Base kernels (gemm)
@@ -396,13 +697,17 @@ void mlir::migraphx::populateMIGraphXToLinalgConversionPatterns(
            ElementwiseConverter<migraphx::SqrtOp, linalg::SqrtOp>,
            ElementwiseConverter<migraphx::TanhOp, linalg::TanhOp>,
            ElementwiseConverter<migraphx::RecipOp, linalg::ReciprocalOp>,
-           ReluConverter, ClipConverter>(converter, patterns.getContext());
+           ReluConverter, ClipConverter, ConvConverter>(converter, patterns.getContext());
 }
 
 void mlir::migraphx::populateMIGraphXFuncBoundaryToLinalgConversionPatterns(
     RewritePatternSet &patterns, TypeConverter &typeConverter) {
   patterns.add<AsUnderlyingShapeConverter, AsLogicalShapeOpConverter>(
       typeConverter, patterns.getContext());
+
+  // mhal.launch can be generated through rocmlir-gen, so we need a way to
+  // legalize it
+  populateMIGraphXToLinalgMHALLauncherConversion(patterns, typeConverter);
   populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, typeConverter);
   populateReturnOpTypeConversionPattern(patterns, typeConverter);
   populateCallOpTypeConversionPattern(patterns, typeConverter);
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp
index 8206d2d98be1..75f5588d2024 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp
@@ -52,6 +52,11 @@ void mlir::migraphx::populateMIGraphXToLinalgBoundaryDialectConversion(
   target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) {
     return typeConverter.isSignatureLegal(op.getFunctionType());
   });
+  target.addDynamicallyLegalOp<mhal::LaunchOp>(
+      [=](mhal::LaunchOp op) -> std::optional<bool> {
+        return typeConverter.isLegal(op.getResultTypes()) &&
+               typeConverter.isLegal(op.getOperandTypes());
+      });
   target.addDynamicallyLegalOp<func::ReturnOp>(
       [&](func::ReturnOp op) { return typeConverter.isLegal(op); });
   target.addDynamicallyLegalOp<func::CallOp>(
@@ -91,6 +96,8 @@ void MIGraphXToLinalgPass::runOnOperation() {
       boundaryConversionTarget, boundaryTypeConverter);
   migraphx::populateMIGraphXFuncBoundaryToLinalgConversionPatterns(
       boundaryPattern, boundaryTypeConverter);
+  migraphx::populateMIGraphXToLinalgMHALLauncherConversion(
+      boundaryPattern, boundaryTypeConverter);
   if (failed(applyPartialConversion(func, boundaryConversionTarget,
                                     std::move(boundaryPattern)))) {
     return signalPassFailure();
diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
index 7f38dc70c9e3..0866c080383c 100644
--- a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
+++ b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
@@ -1527,3 +1527,7 @@ void mlir::migraphx::populateMIGraphXFuncBoundaryToTosaConversionPatterns(
   populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, typeConverter);
   populateCallOpTypeConversionPattern(patterns, typeConverter);
 }
+void mlir::migraphx::populateMIGraphXToLinalgMHALLauncherConversion(
+    RewritePatternSet &patterns, TypeConverter &typeConverter) {
+  patterns.add<MHALLaunchConverter>(typeConverter, patterns.getContext());
+}
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv-cpu.e2e.mlir
new file mode 100644
index 000000000000..562c6fb20a90
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv-cpu.e2e.mlir
@@ -0,0 +1,44 @@
+// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand_type float -rand_min 0 -rand_max 0 -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=GOLD
+// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+
+/// README - There are essentially two test (BOTH, and GOLD). 
+/// BOTH checks if the tosa pipeline gives the same value (given the 
+/// same seed) as the linalg pipeline. They will pass if both of the them 
+/// returns the same value. GOLD checks if the output for the linalg pipeline 
+/// matches an equivalent pytorch implementation.
+
+/// Gold value computed as the following:
+///
+/// # These pattern are from the rocmlir-gen
+/// pattern = torch.tensor([0.5, -1.0, 0.75], dtype=torch.float32) 
+/// flat_x = torch.tensor([pattern[i % 3].item() for i in range(750)], dtype=torch.float32)
+/// x_nchwd = flat_x.reshape(2, 3, 5, 5, 5)  # N, C, H, W, D
+/// x = x_nchwd.permute(0, 1, 4, 2, 3)  # -> (N, C, D, H, W)
+/// 
+/// flat_w = torch.tensor([pattern[i % 3].item() for i in range(96)], dtype=torch.float32)
+/// w_fchwd = flat_w.reshape(4, 3, 2, 2, 2)  # F, C, H, W, D
+/// weight = w_fchwd.permute(0, 1, 4, 2, 3)  # -> (F, C, kD, kH, kW)
+/// out = torch.nn.functional.conv3d(
+///     x, weight,
+///     stride=(2, 2, 2),
+///     dilation=(2, 2, 2),
+///     padding=0,
+///     groups=1,
+/// )
+/// out_nfhwd = out.permute(0, 1, 3, 4, 2)
+/// flat_out = out_nfhwd.reshape(-1)
+/// 
+/// print("Full 64:", flat_out.tolist()) 
+/// Outputs: 
+/// Full 64: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625]
+
+
+module{
+  // BOTH: [6.09101, 7.06269, 5.96599, 7.63177, 5.83172, 5.96893, 5.16868, 6.0204, 6.80761, 6.78844, 5.75672, 7.33505, 5.417{{.*}}, 6.04153, 5.14715, 6.728{{.*}}, 7.30343, 7.90745, 6.73162, 8.21738, 5.65554, 7.37453, 6.6329, 6.6093, 5.2816, 6.17693, 5.19904, 6.38292, 4.55713, 4.62921, 4.72307, 5.47466, 4.551, 6.15787, 4.97358, 5.89798, 5.10684, 6.01542, 5.18933, 5.58596, 5.22862, 7.13881, 4.88134, 5.56315, 5.52007, 6.27824, 4.93779, 5.71044, 6.27934, 7.51976, 5.23159, 7.17014, 6.74235, 5.59631, 5.33666, 6.20902, 4.95302, 5.26817, 4.50571, 5.17464, 4.49137, 4.80133, 3.39298, 4.92709]
+  // GOLD: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625]
+  func.func @conv(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { 
+    %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 1 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <4x3x2x2x2xf32, 24x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1>
+    return %0 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>
+  }
+}
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
new file mode 100644
index 000000000000..c931f5d1c4db
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -0,0 +1,44 @@
+// RUN: rocmlir-opt -split-input-file --migraphx-to-linalg -verify-diagnostics %s | FileCheck %s
+
+// CHECK: #[[map:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
+// CHECK: #[[map1:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+// CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d(
+// CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins
+// CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
+// CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
+// CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
+// CHECK-DAG:           linalg.yield %[[four]]
+func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>  {
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 1 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <4x3x2x2x2xf32, 24x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1>
+  %1 = migraphx.add %0, %arg0 : <2x4x2x2x2xf32, 32x8x4x2x1>, <2x4x2x2x2xf32, 32x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1>
+  return %1 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d(
+// CHECK: linalg.conv_2d_ngchw_gfchw
+func.func @conv_2d(%arg0: !migraphx.shaped<1x128x28x28xf32, 100352x784x28x1>, %arg1: !migraphx.shaped<1x128x56x56xf32, 401408x3136x56x1>, %arg2: !migraphx.shaped<128x128x3x3xf32, 1152x9x3x1>) -> !migraphx.shaped<1x128x28x28xf32, 100352x784x28x1> {
+  %1 = migraphx.convolution %arg1, %arg2 {dilation = [1, 1], group = 1 : i64, padding = [1, 1, 1, 1], padding_mode = 0 : i64, stride = [2, 2]} : <1x128x56x56xf32, 401408x3136x56x1>, <128x128x3x3xf32, 1152x9x3x1> -> <1x128x28x28xf32, 100352x784x28x1>
+  %2 = migraphx.add %1, %arg0 : <1x128x28x28xf32, 100352x784x28x1>, <1x128x28x28xf32, 100352x784x28x1> -> <1x128x28x28xf32, 100352x784x28x1>
+  %3 = migraphx.relu %2 : <1x128x28x28xf32, 100352x784x28x1> -> <1x128x28x28xf32, 100352x784x28x1>
+  return %3 : !migraphx.shaped<1x128x28x28xf32, 100352x784x28x1>
+}
+
+// -----
+// CHECK: #[[map:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+// CHECK: #[[map1:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+// CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+
+// CHECK-LABEL: func.func @conv_1d(
+// CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
+// CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
+// CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
+// CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
+// CHECK-DAG:           linalg.yield %[[four]]
+func.func @conv_1d(%arg0: !migraphx.shaped<1x64x224xf32, 14336x224x1>, %arg1: !migraphx.shaped<1x3x224xf32, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf32, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> {
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf32, 672x224x1>, <64x3x7xf32, 21x7x1> -> <1x64x224xf32, 14336x224x1>
+  return %0 : !migraphx.shaped<1x64x224xf32, 14336x224x1>
+}
+
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir
index 88abbb598b3a..aaa661e6ccbb 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir
@@ -60,12 +60,6 @@ func.func @func_quant_convolution(%arg0: !migraphx.shaped<1x1xi8, 1x1>, %arg1: !
   func.return
 }
 
-func.func @func_convolution(%arg0: !migraphx.shaped<1x1xf32, 1x1>, %arg1: !migraphx.shaped<1x1xf32, 1x1>) {
-  // expected-error @+1{{failed to legalize operation 'migraphx.convolution'}}
-  migraphx.convolution %arg0, %arg1 {dilation = [1, 1], group = 1 : i64, padding = [0, 0], stride = [1, 1]}: <1x1xf32, 1x1>, <1x1xf32, 1x1> -> <1x1xf32, 1x1>
-  func.return
-}
-
 func.func @func_backwards_data_convolution(%arg0: !migraphx.shaped<1x1xf32, 1x1>, %arg1: !migraphx.shaped<1x1xf32, 1x1>) {
   // expected-error @+1{{failed to legalize operation 'migraphx.backwards_data_convolution'}}
   migraphx.backwards_data_convolution %arg0, %arg1 {dilation = [1, 1], group = 1 : i64, padding = [0, 0], stride = [1, 1]}: <1x1xf32, 1x1>, <1x1xf32, 1x1> -> <1x1xf32, 1x1>
diff --git a/mlir/tools/rocmlir-driver/rocmlir-driver.cpp b/mlir/tools/rocmlir-driver/rocmlir-driver.cpp
index db74b31758b0..f2abfc6cb254 100644
--- a/mlir/tools/rocmlir-driver/rocmlir-driver.cpp
+++ b/mlir/tools/rocmlir-driver/rocmlir-driver.cpp
@@ -57,11 +57,11 @@ static cl::opt<std::string> kernelPipeline(
                    "gpu,rocdl,binary or full"),
     cl::init(""));
 
-static cl::opt<std::string>
-    hostPipeline("host-pipeline", cl::desc("rocmlir-driver host pipeline list"),
-                 cl::value_desc("comma separated list of rock pipelines: "
-                                "migraphx,highlevel,mhal,runner or full"),
-                 cl::init(""));
+static cl::opt<std::string> hostPipeline(
+    "host-pipeline", cl::desc("rocmlir-driver host pipeline list"),
+    cl::value_desc("comma separated list of rock pipelines: "
+                   "migraphx,migraphx-linalg,highlevel,mhal,runner or full"),
+    cl::init(""));
 
 static cl::opt<bool> legacyRockPipeline("c", cl::Hidden, cl::init(false),
                                         cl::Optional,
@@ -279,8 +279,8 @@ static LogicalResult runMLIRPasses(ModuleOp &module,
     }
   }
 
-  llvm::SmallDenseSet<StringRef> hostPipelineOptions{"migraphx", "highlevel",
-                                                     "mhal", "runner"};
+  llvm::SmallDenseSet<StringRef> hostPipelineOptions{
+      "migraphx", "highlevel", "mhal", "runner", "migraphx-linalg"};
   llvm::SmallDenseSet<StringRef> hostPipelineSet;
   std::string hostPipelineStr = hostPipeline.getValue();
   if (failed(parsePipeline(hostPipelineStr, hostPipelineSet,
@@ -288,9 +288,11 @@ static LogicalResult runMLIRPasses(ModuleOp &module,
     return failure();
   }
 
-  if (hostPipelineSet.contains("migraphx")) {
+  if (hostPipelineSet.contains("migraphx") ||
+      hostPipelineSet.contains("migraphx-linalg")) {
     PassManager pm(module->getName(), PassManager::Nesting::Implicit);
-    migraphx::addHighLevelPipeline(pm);
+    bool lowerFromLinalg = hostPipelineSet.contains("migraphx-linalg");
+    migraphx::addHighLevelPipeline(pm, lowerFromLinalg);
     if (failed(pm.run(module))) {
       return failure();
     }

From 6df9db298ff342ecd93af9049b83a5fa22b02065 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Fri, 20 Feb 2026 18:06:29 +0000
Subject: [PATCH 02/16] smave work

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 32 ++++++++++++++-----
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index e1183a80815a..a154b16bbe29 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -133,6 +133,19 @@ static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
   linalg::YieldOp::create(b, loc, add);
 }
 
+/// Emit attributes for
+static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp) {
+  Operation *newOp = convOp.getDefiningOp();
+  newOp->setAttr("pad", op.getPaddingAttr());
+  newOp->setAttr("group", op.getGroupAttr());
+  newOp->setAttr("stride", op.getStrideAttr());
+  newOp->setAttr("dilation", op.getDilation());
+
+  // Convert optional attributes
+  if (auto attr = (*op).template getAttrOfType<StringAttr>("perf_config"))
+    newOp->setAttr("perf_config", attr);
+}
+
 /// Emit Conv1D expect input shape to be (batch, group, channel, height),
 /// filter to be (group, filter, channel, height)
 static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
@@ -194,14 +207,16 @@ static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
   bindDims(ctx, batch, group, filterExpr, oh, ow, od, channel, kh, kw, kd);
 
   AffineMap inputMap = AffineMap::get(
-      10, 0,
+      /*dimCount=*/10, /*symbolCount=*/0,
       {batch, group, channel, oh * strideH + kh * dilationH,
        ow * strideW + kw * dilationW, od * strideD + kd * dilationD},
       ctx);
   AffineMap filterMap =
-      AffineMap::get(10, 0, {group, filterExpr, channel, kh, kw, kd}, ctx);
+      AffineMap::get(/*dimCount=*/10, /*symbolCount=*/0,
+                     {group, filterExpr, channel, kh, kw, kd}, ctx);
   AffineMap outputMap =
-      AffineMap::get(10, 0, {batch, group, filterExpr, oh, ow, od}, ctx);
+      AffineMap::get(/*dimCount=*/10, /*symbolCount=*/0,
+                     {batch, group, filterExpr, oh, ow, od}, ctx);
 
   SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
   SmallVector<utils::IteratorType> iteratorTypes = {
@@ -282,6 +297,8 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   }
   }
 
+  emitConvAttributes(op, result);
+
   // we must reshape the operand to what the type converter expects
   SmallVector<ReassociationIndices, 4> reassociation{{0}, {1, 2}};
   llvm::for_each(llvm::seq<int64_t>(3, dim + 3),
@@ -289,10 +306,6 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   auto finalResult =
       tensor::CollapseShapeOp::create(rewriter, loc, result, reassociation);
 
-  if (auto attr = (*op).template getAttrOfType<StringAttr>("perf_config")) {
-    finalResult->setAttr("perf_config", attr);
-  }
-
   rewriter.replaceOp(op, finalResult);
   return success();
 }
@@ -398,7 +411,10 @@ ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
     }
   };
 
-  // Step 2: expand group dimension (NCHW -> NGCHW, FCHW -> GFCHW).
+  // Step 2: expand group dimension (NCHW -> NGCHW, FCHW -> GFCHW). We
+  // want expand in group dimension because linalg.conv2d_ngchw_gfchw
+  // expects the layout to have the group dimension. It also makes for
+  // a nicer linalg.generic loop
   input = expandGroupDim(input, false);
   filter = expandGroupDim(filter, true);
   // Step 3: emit linalg conv and collapse result to match type converter.

From c0709d31d35a36951916be1d037b1d64f86b52a7 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Fri, 20 Feb 2026 19:30:21 +0000
Subject: [PATCH 03/16] Added test case for conv2d group

---
 .../e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir           | 9 +++++++++
 .../e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir     | 6 ++++++
 2 files changed, 15 insertions(+)
 create mode 100644 mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir
 create mode 100644 mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir

diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir
new file mode 100644
index 000000000000..5512a8dcb88f
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir
@@ -0,0 +1,9 @@
+// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+
+// BOTH: [1123, 1231312]
+func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
+  %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
+    <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>
+  func.return %out : !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1>
+}
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir
new file mode 100644
index 000000000000..924021181901
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir
@@ -0,0 +1,6 @@
+
+func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, >) -> !migraphx.shaped<2x8x27x19xf32, > {
+  %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
+    <1x4x5x5xf32, 100x1x20x4>, <8x4x3x3xf32, 36x1x12x4> -> <1x8x3x3xf32, 63x1x21x7>
+  func.return %out : !migraphx.shaped<1x8x3x3xf32, 63x1x21x7>
+}

From 85179afd19616d10b57e6c1f9d5720a33a45b3bd Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Fri, 20 Feb 2026 22:07:04 +0000
Subject: [PATCH 04/16] Added more testcase

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 44 ++++++++++++-------
 ...graphx-to-linalg-conv1d-group-cpu.e2e.mlir | 11 +++++
 .../migraphx-to-linalg-conv2d-cpu.e2e.mlir    |  9 ----
 ...graphx-to-linalg-conv2d-group-cpu.e2e.mlir | 12 +++--
 ...graphx-to-linalg-conv3d-group-cpu.e2e.mlir |  9 ++++
 .../migraphx-to-linalg-conv.mlir              |  9 ++--
 6 files changed, 61 insertions(+), 33 deletions(-)
 create mode 100644 mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group-cpu.e2e.mlir
 delete mode 100644 mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir
 create mode 100644 mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group-cpu.e2e.mlir

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index a154b16bbe29..f33330abdac8 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -135,6 +135,10 @@ static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
 
 /// Emit attributes for
 static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp) {
+  if(isa<linalg::LinalgOp>(convOp.getDefiningOp())){
+    return;
+  }
+
   Operation *newOp = convOp.getDefiningOp();
   newOp->setAttr("pad", op.getPaddingAttr());
   newOp->setAttr("group", op.getGroupAttr());
@@ -151,12 +155,10 @@ static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp) {
 static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
                                Location loc, RankedTensorType resultType,
                                Value input, Value filter, Value zero,
-                               Attribute strides, Attribute dilation) {
+                               DenseIntElementsAttr strides, DenseIntElementsAttr dilation) {
   MLIRContext *ctx = rewriter.getContext();
-  auto strideArr = cast<ArrayAttr>(strides);
-  auto dilationArr = cast<ArrayAttr>(dilation);
-  int64_t strideVal = cast<IntegerAttr>(strideArr[0]).getInt();
-  int64_t dilationVal = cast<IntegerAttr>(dilationArr[0]).getInt();
+  int64_t strideVal = strides.getValues<int64_t>()[0];
+  int64_t dilationVal = dilation.getValues<int64_t>()[0];
 
   // Iteration domain: (batch, group, filter, oh, channel, kh)
   AffineExpr batch, group, filterExpr, oh, channel, kh;
@@ -190,16 +192,16 @@ static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
 static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
                                Location loc, RankedTensorType resultType,
                                Value input, Value filter, Value zero,
-                               Attribute strides, Attribute dilation) {
+                               DenseIntElementsAttr strides, DenseIntElementsAttr dilation) {
   MLIRContext *ctx = rewriter.getContext();
-  auto strideArr = cast<ArrayAttr>(strides);
-  auto dilationArr = cast<ArrayAttr>(dilation);
-  int64_t strideH = cast<IntegerAttr>(strideArr[0]).getInt();
-  int64_t strideW = cast<IntegerAttr>(strideArr[1]).getInt();
-  int64_t strideD = cast<IntegerAttr>(strideArr[2]).getInt();
-  int64_t dilationH = cast<IntegerAttr>(dilationArr[0]).getInt();
-  int64_t dilationW = cast<IntegerAttr>(dilationArr[1]).getInt();
-  int64_t dilationD = cast<IntegerAttr>(dilationArr[2]).getInt();
+  auto strideVals = strides.getValues<int64_t>();
+  int64_t strideH = strideVals[0];
+  int64_t strideW = strideVals[1];
+  int64_t strideD = strideVals[2];
+  auto dilationVals = dilation.getValues<int64_t>();
+  int64_t dilationH = dilationVals[0];
+  int64_t dilationW = dilationVals[1];
+  int64_t dilationD = dilationVals[2];
 
   // Iteration domain:
   //   (batch, group, filter, oh, ow, od, channel, kh, kw, kd)
@@ -268,8 +270,18 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
       RankedTensorType::get(newShape, resultType.getElementType());
   Value zero = arith::ConstantOp::create(rewriter, loc, newResultType,
                                          rewriter.getZeroAttr(newResultType));
-  Attribute strides = op.getStride();
-  Attribute dilation = op.getDilation();
+
+  // linalg.* expects attribute to be in tensor and not DenseI64Array. Convert the ArrayAttr into 
+  // a one to one tensor attribute
+  auto convertAtttributeToLinalg = [&](ArrayAttr attr){
+    SmallVector<int64_t, 4> value;
+    llvm::for_each(attr.getValue(), [&](Attribute current){
+        value.push_back(cast<IntegerAttr>(current).getInt());
+    });
+    return rewriter.getI64TensorAttr(value);
+  };
+  DenseIntElementsAttr strides = convertAtttributeToLinalg(op.getStride());
+  DenseIntElementsAttr dilation = convertAtttributeToLinalg(op.getDilation());
 
   Value result;
   switch (dim) {
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group-cpu.e2e.mlir
new file mode 100644
index 000000000000..a9658468d92f
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group-cpu.e2e.mlir
@@ -0,0 +1,11 @@
+// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+
+// Only a small subset of the array is checked because it is quite huge
+
+// BOTH: [2.94651, 3.09122, 3.86495, 4.54138, 3.18018, 4.06578, 2.97265, 4.05155, 2.35716, 4.26762, 3.49153, 4.14329, 3.82529, 5.43395, 4.66598, 4.98826, 4.41554, 5.15631, 3.91766, 4.79236, 4.52993, 4.25152, 4.87812, 5.10546, 4.19679, 5.1306, 4.2836, 3.7857, 5.21429, 4.6504, 4.83997, 3.91648, 5.86651, 4.76546, 5.00734, 5.18668, 5.38386, 4.1707, 5.43972, 5.57541, 5.33734, 5.14293, 4.10719, 5.32505, 4.39825
+func.func @conv_1d_group(%in: !migraphx.shaped<10x8x123xf32, 984x123x1>, %fil: !migraphx.shaped<12x2x7xf32, 14x7x1>) -> !migraphx.shaped<10x12x53xf32, 636x53x1> {
+  %out = migraphx.convolution %in, %fil {dilation = [4], group = 4 : i64, padding = [3,3], padding_mode = 0 : i64, stride = [2]} :
+    <10x8x123xf32, 984x123x1>, <12x2x7xf32, 14x7x1> -> <10x12x53xf32,  636x53x1>
+  func.return %out : !migraphx.shaped<10x12x53xf32,  636x53x1>
+}
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir
deleted file mode 100644
index 5512a8dcb88f..000000000000
--- a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-cpu.e2e.mlir
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
-// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
-
-// BOTH: [1123, 1231312]
-func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
-  %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
-    <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>
-  func.return %out : !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1>
-}
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir
index 924021181901..bc36d3956044 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir
@@ -1,6 +1,12 @@
+// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 
-func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, >) -> !migraphx.shaped<2x8x27x19xf32, > {
+// Here we are checking to see if conv_2d with non standard stride, dilation, and a group parameter matches the existing tosa pipeline
+// Note - this array is quite large, so we are only checking a small subset
+
+// BOTH: [5.83007, 7.83374, 8.46274, 9.03237, 6.51391, 7.75809, 9.73003, 8.48013, 8.15419, 9.9975, 7.50244, 7.11982, 6.58057, 7.40089, 7.71545, 9.73616, 7.74541, 8.08335, 7.91827, 8.001{{.*}}, 9.33702, 11.0582, 9.34619, 10.305, 8.82474, 10.8324, 10.3826, 9.73949, 11.7825, 9.81817, 8.47468, 8.90449, 9.19788, 10.373, 10.2517, 9.64079, 9.87895, 11.9531, 8.59595, 8.78564, 9.26618, 9.2312, 8.38519, 8.64322, 9.76614, 8.41956, 8.74126, 9.29434, 9.50276, 8.11855, 9.82343, 10.0092, 10.0752, 9.29225, 11.1891, 9.088{{.*}}, 9.75943, 8.79682, 9.60196, 8.71861, 9.83224, 9.29888, 8.44989, 8.82743, 10.4409, 8.31476, 9.59674, 8.74762, 10.2553, 9.95829, 10.0612, 9.25078, 9.32061, 10.5277, 8.74543, 9.62819, 8.38384, 9.35403, 9.30592, 9.60566, 10.4934
+func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
   %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
-    <1x4x5x5xf32, 100x1x20x4>, <8x4x3x3xf32, 36x1x12x4> -> <1x8x3x3xf32, 63x1x21x7>
-  func.return %out : !migraphx.shaped<1x8x3x3xf32, 63x1x21x7>
+    <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>
+  func.return %out : !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1>
 }
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group-cpu.e2e.mlir
new file mode 100644
index 000000000000..6e48bc6214c2
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group-cpu.e2e.mlir
@@ -0,0 +1,9 @@
+// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+
+// BOTH: [9.78569, 8.8887, 12.9401, 10.9686, 8.19386, 11.9315, 17.5043, 11.5946, 18.9063, 16.2578, 13.171, 17.1096, 11.1845, 8.23256, 12.6896, 11.3629, 7.74514, 11.8208, 9.7909, 5.80301, 12.0013, 10.5879, 6.19064, 10.4416, 15.567, 10.1144, 17.6427, 15.261, 11.4259, 15.8709, 11.25, 8.493{{.*}}, 14.3779, 10.9948, 7.25571, 13.0338, 21.26{{.*}}, 15.4558, 21.4179, 22.3508, 16.92, 26.6663, 35.4665, 25.8853, 34.06{{.*}}, 34.5179, 26.577, 37.9682, 25.5063, 15.7656, 26.3552, 21.6613, 18.2474
+func.func @conv_3d(%in: !migraphx.shaped<10x8x12x13x14xf32, 17472x2184x182x14x1>, %fil: !migraphx.shaped<12x8x2x3x4xf32, 192x24x12x4x1>) -> !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> {
+  %out = migraphx.convolution %in, %fil {dilation = [3, 4, 5], group = 1 : i64, padding = [2, 3, 4, 2, 3, 4], padding_mode = 0 : i64, stride = [1, 2, 3]} : 
+        <10x8x12x13x14xf32, 17472x2184x182x14x1>, <12x8x2x3x4xf32, 192x24x12x4x1> -> <10x12x13x6x3xf32, 2808x234x18x3x1>
+  func.return %out : !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1>
+}
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index c931f5d1c4db..ffb45f5d6138 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -19,11 +19,10 @@ func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !m
 
 // CHECK-LABEL: func.func @conv_2d(
 // CHECK: linalg.conv_2d_ngchw_gfchw
-func.func @conv_2d(%arg0: !migraphx.shaped<1x128x28x28xf32, 100352x784x28x1>, %arg1: !migraphx.shaped<1x128x56x56xf32, 401408x3136x56x1>, %arg2: !migraphx.shaped<128x128x3x3xf32, 1152x9x3x1>) -> !migraphx.shaped<1x128x28x28xf32, 100352x784x28x1> {
-  %1 = migraphx.convolution %arg1, %arg2 {dilation = [1, 1], group = 1 : i64, padding = [1, 1, 1, 1], padding_mode = 0 : i64, stride = [2, 2]} : <1x128x56x56xf32, 401408x3136x56x1>, <128x128x3x3xf32, 1152x9x3x1> -> <1x128x28x28xf32, 100352x784x28x1>
-  %2 = migraphx.add %1, %arg0 : <1x128x28x28xf32, 100352x784x28x1>, <1x128x28x28xf32, 100352x784x28x1> -> <1x128x28x28xf32, 100352x784x28x1>
-  %3 = migraphx.relu %2 : <1x128x28x28xf32, 100352x784x28x1> -> <1x128x28x28xf32, 100352x784x28x1>
-  return %3 : !migraphx.shaped<1x128x28x28xf32, 100352x784x28x1>
+func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
+  %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
+    <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>
+  func.return %out : !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1>
 }
 
 // -----

From 1256298257cb1e576a6f4c6a3beafffc61fd8eae Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Fri, 20 Feb 2026 22:20:52 +0000
Subject: [PATCH 05/16] Rename testfile to have .cpu extension

---
 ...-linalg-conv-cpu.e2e.mlir => migraphx-to-linalg-conv.cpu.mlir} | 0
 ...roup-cpu.e2e.mlir => migraphx-to-linalg-conv1d-group.cpu.mlir} | 0
 ...roup-cpu.e2e.mlir => migraphx-to-linalg-conv2d-group.cpu.mlir} | 0
 ...roup-cpu.e2e.mlir => migraphx-to-linalg-conv3d-group.cpu.mlir} | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename mlir/test/Conversion/MIGraphXToLinalg/e2e/{migraphx-to-linalg-conv-cpu.e2e.mlir => migraphx-to-linalg-conv.cpu.mlir} (100%)
 rename mlir/test/Conversion/MIGraphXToLinalg/e2e/{migraphx-to-linalg-conv1d-group-cpu.e2e.mlir => migraphx-to-linalg-conv1d-group.cpu.mlir} (100%)
 rename mlir/test/Conversion/MIGraphXToLinalg/e2e/{migraphx-to-linalg-conv2d-group-cpu.e2e.mlir => migraphx-to-linalg-conv2d-group.cpu.mlir} (100%)
 rename mlir/test/Conversion/MIGraphXToLinalg/e2e/{migraphx-to-linalg-conv3d-group-cpu.e2e.mlir => migraphx-to-linalg-conv3d-group.cpu.mlir} (100%)

diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir
similarity index 100%
rename from mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv-cpu.e2e.mlir
rename to mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group.cpu.mlir
similarity index 100%
rename from mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group-cpu.e2e.mlir
rename to mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group.cpu.mlir
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group.cpu.mlir
similarity index 100%
rename from mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group-cpu.e2e.mlir
rename to mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group.cpu.mlir
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group-cpu.e2e.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group.cpu.mlir
similarity index 100%
rename from mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group-cpu.e2e.mlir
rename to mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group.cpu.mlir

From 3bd564d22812abe7a385de1ed634ae714b9ce147 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Fri, 20 Feb 2026 22:41:25 +0000
Subject: [PATCH 06/16] clang-format

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp      | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index f33330abdac8..d9d57493cdad 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -135,7 +135,7 @@ static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
 
 /// Emit attributes for
 static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp) {
-  if(isa<linalg::LinalgOp>(convOp.getDefiningOp())){
+  if (isa<linalg::LinalgOp>(convOp.getDefiningOp())) {
     return;
   }
 
@@ -155,7 +155,8 @@ static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp) {
 static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
                                Location loc, RankedTensorType resultType,
                                Value input, Value filter, Value zero,
-                               DenseIntElementsAttr strides, DenseIntElementsAttr dilation) {
+                               DenseIntElementsAttr strides,
+                               DenseIntElementsAttr dilation) {
   MLIRContext *ctx = rewriter.getContext();
   int64_t strideVal = strides.getValues<int64_t>()[0];
   int64_t dilationVal = dilation.getValues<int64_t>()[0];
@@ -192,7 +193,8 @@ static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
 static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
                                Location loc, RankedTensorType resultType,
                                Value input, Value filter, Value zero,
-                               DenseIntElementsAttr strides, DenseIntElementsAttr dilation) {
+                               DenseIntElementsAttr strides,
+                               DenseIntElementsAttr dilation) {
   MLIRContext *ctx = rewriter.getContext();
   auto strideVals = strides.getValues<int64_t>();
   int64_t strideH = strideVals[0];
@@ -271,12 +273,12 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   Value zero = arith::ConstantOp::create(rewriter, loc, newResultType,
                                          rewriter.getZeroAttr(newResultType));
 
-  // linalg.* expects attribute to be in tensor and not DenseI64Array. Convert the ArrayAttr into 
-  // a one to one tensor attribute
-  auto convertAtttributeToLinalg = [&](ArrayAttr attr){
+  // linalg.* expects attribute to be in tensor and not DenseI64Array. Convert
+  // the ArrayAttr into a one to one tensor attribute
+  auto convertAtttributeToLinalg = [&](ArrayAttr attr) {
     SmallVector<int64_t, 4> value;
-    llvm::for_each(attr.getValue(), [&](Attribute current){
-        value.push_back(cast<IntegerAttr>(current).getInt());
+    llvm::for_each(attr.getValue(), [&](Attribute current) {
+      value.push_back(cast<IntegerAttr>(current).getInt());
     });
     return rewriter.getI64TensorAttr(value);
   };

From c3b6277efb41aa07ac17c07e650851799bd9ca2d Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Sat, 21 Feb 2026 01:56:00 +0000
Subject: [PATCH 07/16] Added attribute testcase

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 14 +++++---------
 .../migraphx-to-linalg-conv.mlir              | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index d9d57493cdad..98bacfe68de5 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -134,16 +134,12 @@ static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
 }
 
 /// Emit attributes for
-static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp) {
-  if (isa<linalg::LinalgOp>(convOp.getDefiningOp())) {
-    return;
-  }
-
+static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp, Attribute strides, Attribute dilation, Attribute pad) {
   Operation *newOp = convOp.getDefiningOp();
-  newOp->setAttr("pad", op.getPaddingAttr());
+  newOp->setAttr("pad", pad);
   newOp->setAttr("group", op.getGroupAttr());
-  newOp->setAttr("stride", op.getStrideAttr());
-  newOp->setAttr("dilation", op.getDilation());
+  newOp->setAttr("stride", strides);
+  newOp->setAttr("dilation", dilation);
 
   // Convert optional attributes
   if (auto attr = (*op).template getAttrOfType<StringAttr>("perf_config"))
@@ -311,7 +307,7 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   }
   }
 
-  emitConvAttributes(op, result);
+  emitConvAttributes(op, result, strides, dilation, convertAtttributeToLinalg(op.getPaddingAttr()));
 
   // we must reshape the operand to what the type converter expects
   SmallVector<ReassociationIndices, 4> reassociation{{0}, {1, 2}};
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index ffb45f5d6138..7e33bd696319 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -41,3 +41,22 @@ func.func @conv_1d(%arg0: !migraphx.shaped<1x64x224xf32, 14336x224x1>, %arg1: !m
   return %0 : !migraphx.shaped<1x64x224xf32, 14336x224x1>
 }
 
+// -----
+
+// Checking for the perf_config, dilation, strides, and pad attributes
+
+// CHECK-LABEL: func.func @mlir_convolution_add(
+// CHECK-SAME:  %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor{{.*}}
+// CHECK-DAG:   %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG:   %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG:   %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG:   %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG:   %[[cst:.*]] = arith.constant
+// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) attrs =  {dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = dense<2> : tensor<3xi64>}
+// CHECK-DAG:   %[[collapsed:.*]] = tensor.collapse_shape %[[zero]]
+// CHECK-DAG:   %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG:   return %[[collapsed_3]]
+func.func @mlir_convolution_add(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> attributes {kernel, arch="gfx950"}{
+  %0 = migraphx.convolution %arg1, %arg2 {perf_config="v3:16,32,4,16,16,4,4,1,2,1,1", dilation = [2, 2, 2], group = 1 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <4x3x2x2x2xf32, 24x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1>
+  return %0 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>
+}

From 6f67460acc04daa60328af45bd6bc0552a22ee9c Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Sat, 21 Feb 2026 02:10:11 +0000
Subject: [PATCH 08/16] Emit conv op name attribute

---
 .../Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp | 12 ++++++++++--
 .../MIGraphXToLinalg/migraphx-to-linalg-conv.mlir    |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index 98bacfe68de5..0baa7928311f 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -134,7 +134,9 @@ static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
 }
 
 /// Emit attributes for
-static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp, Attribute strides, Attribute dilation, Attribute pad) {
+static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp,
+                               Attribute strides, Attribute dilation,
+                               Attribute pad, Attribute convOpName) {
   Operation *newOp = convOp.getDefiningOp();
   newOp->setAttr("pad", pad);
   newOp->setAttr("group", op.getGroupAttr());
@@ -144,6 +146,7 @@ static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp, Attribu
   // Convert optional attributes
   if (auto attr = (*op).template getAttrOfType<StringAttr>("perf_config"))
     newOp->setAttr("perf_config", attr);
+  newOp->setAttr("conv_op", convOpName);
 }
 
 /// Emit Conv1D expect input shape to be (batch, group, channel, height),
@@ -282,10 +285,12 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   DenseIntElementsAttr dilation = convertAtttributeToLinalg(op.getDilation());
 
   Value result;
+  Attribute resultConvOpName;
   switch (dim) {
   case 1: {
     result = emitGroupedConv1D(rewriter, loc, newResultType, input, filter,
                                zero, strides, dilation);
+    resultConvOpName = rewriter.getStringAttr("ngch_gfch");
     break;
   }
   case 2: {
@@ -294,11 +299,13 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
                                                 {input, filter}, {zero},
                                                 strides, dilation)
                  .getResult(0);
+    resultConvOpName = rewriter.getStringAttr("ngchw_gfchw");
     break;
   }
   case 3: {
     result = emitGroupedConv3D(rewriter, loc, newResultType, input, filter,
                                zero, strides, dilation);
+    resultConvOpName = rewriter.getStringAttr("ngchwd_gfchwd");
     break;
   }
   default: {
@@ -307,7 +314,8 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   }
   }
 
-  emitConvAttributes(op, result, strides, dilation, convertAtttributeToLinalg(op.getPaddingAttr()));
+  emitConvAttributes(op, result, strides, dilation,
+                     convertAtttributeToLinalg(op.getPaddingAttr()), resultConvOpName);
 
   // we must reshape the operand to what the type converter expects
   SmallVector<ReassociationIndices, 4> reassociation{{0}, {1, 2}};
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index 7e33bd696319..b3931bf4318a 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -52,7 +52,7 @@ func.func @conv_1d(%arg0: !migraphx.shaped<1x64x224xf32, 14336x224x1>, %arg1: !m
 // CHECK-DAG:   %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
 // CHECK-DAG:   %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
 // CHECK-DAG:   %[[cst:.*]] = arith.constant
-// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) attrs =  {dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = dense<2> : tensor<3xi64>}
+// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) attrs =  {conv_op = "ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = dense<2> : tensor<3xi64>}
 // CHECK-DAG:   %[[collapsed:.*]] = tensor.collapse_shape %[[zero]]
 // CHECK-DAG:   %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
 // CHECK-DAG:   return %[[collapsed_3]]

From 4c1a2823296c84f523a4e40046bdef42e0a41369 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 23 Feb 2026 15:40:31 +0000
Subject: [PATCH 09/16] Address copilot comments

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 53 ++++++++++++-------
 .../e2e/migraphx-to-linalg-conv.cpu.mlir      |  6 +--
 2 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index 0baa7928311f..3434c153de3d 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -123,7 +123,7 @@ struct ConvConverter final
 };
 } // namespace
 
-// Nice helpder function for the linalg.generic op region
+// Nice helper function for the linalg.generic op region
 static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
   Value inputVal = blockArgs[0];
   Value filterVal = blockArgs[1];
@@ -133,7 +133,7 @@ static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) {
   linalg::YieldOp::create(b, loc, add);
 }
 
-/// Emit attributes for
+/// Emit convolution attributes on the newly created operation.
 static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp,
                                Attribute strides, Attribute dilation,
                                Attribute pad, Attribute convOpName) {
@@ -149,7 +149,7 @@ static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp,
   newOp->setAttr("conv_op", convOpName);
 }
 
-/// Emit Conv1D expect input shape to be (batch, group, channel, height),
+/// Emit Conv1D expects input shape to be (batch, group, channel, height),
 /// filter to be (group, filter, channel, height)
 static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
                                Location loc, RankedTensorType resultType,
@@ -165,11 +165,12 @@ static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
   bindDims(ctx, batch, group, filterExpr, oh, channel, kh);
 
   AffineMap inputMap = AffineMap::get(
-      6, 0, {batch, group, channel, oh * strideVal + kh * dilationVal}, ctx);
-  AffineMap filterMap =
-      AffineMap::get(6, 0, {group, filterExpr, channel, kh}, ctx);
-  AffineMap outputMap =
-      AffineMap::get(6, 0, {batch, group, filterExpr, oh}, ctx);
+      /*dimCount=*/6, /*symbolCount=*/0,
+      {batch, group, channel, oh * strideVal + kh * dilationVal}, ctx);
+  AffineMap filterMap = AffineMap::get(/*dimCount=*/6, /*symbolCount=*/0,
+                                       {group, filterExpr, channel, kh}, ctx);
+  AffineMap outputMap = AffineMap::get(/*dimCount=*/6, /*symbolCount=*/0,
+                                       {batch, group, filterExpr, oh}, ctx);
 
   SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
   SmallVector<utils::IteratorType> iteratorTypes = {
@@ -187,8 +188,8 @@ static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
       .getResult(0);
 }
 
-/// Emit Conv3D expect input shape to be (batch, group, channel, h, w, d),
-// filter to be (group, filter, channel, kh, kw, kd)
+/// Emit Conv3D expects input shape to be (batch, group, channel, h, w, d),
+/// filter to be (group, filter, channel, kh, kw, kd)
 static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
                                Location loc, RankedTensorType resultType,
                                Value input, Value filter, Value zero,
@@ -261,7 +262,7 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   int64_t n = resultType.getDimSize(0);
   int64_t newF = resultType.getDimSize(1) / group;
   assert(resultType.getDimSize(1) % group == 0 &&
-         "output channel must be divisible");
+         "output channel must be divisible by group");
   newShape.push_back(n);
   newShape.push_back(group);
   newShape.push_back(newF);
@@ -274,15 +275,15 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
 
   // linalg.* expects attribute to be in tensor and not DenseI64Array. Convert
   // the ArrayAttr into a one to one tensor attribute
-  auto convertAtttributeToLinalg = [&](ArrayAttr attr) {
+  auto convertAttributeToLinalg = [&](ArrayAttr attr) {
     SmallVector<int64_t, 4> value;
     llvm::for_each(attr.getValue(), [&](Attribute current) {
       value.push_back(cast<IntegerAttr>(current).getInt());
     });
     return rewriter.getI64TensorAttr(value);
   };
-  DenseIntElementsAttr strides = convertAtttributeToLinalg(op.getStride());
-  DenseIntElementsAttr dilation = convertAtttributeToLinalg(op.getDilation());
+  DenseIntElementsAttr strides = convertAttributeToLinalg(op.getStride());
+  DenseIntElementsAttr dilation = convertAttributeToLinalg(op.getDilation());
 
   Value result;
   Attribute resultConvOpName;
@@ -294,7 +295,7 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
     break;
   }
   case 2: {
-    // linalg provides us we named op we can use so we use those instead
+    // linalg provides us with a named op we can use, so we use that instead
     result = linalg::Conv2DNgchwGfchwOp::create(rewriter, loc, {newResultType},
                                                 {input, filter}, {zero},
                                                 strides, dilation)
@@ -315,7 +316,8 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   }
 
   emitConvAttributes(op, result, strides, dilation,
-                     convertAtttributeToLinalg(op.getPaddingAttr()), resultConvOpName);
+                     convertAtttributeToLinalg(op.getPaddingAttr()),
+                     resultConvOpName);
 
   // we must reshape the operand to what the type converter expects
   SmallVector<ReassociationIndices, 4> reassociation{{0}, {1, 2}};
@@ -346,6 +348,16 @@ ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
   RankedTensorType inputType = cast<RankedTensorType>(input.getType());
   int64_t dim = inputType.getRank() - 2;
   int64_t group = op.getGroupAttr().getInt();
+
+  // For now, the linalg.generic region doesn't support type casting,
+  // so we emit an error for now
+
+  if (inputType.getElementType() != op.getFilter().getType().getElementType() ||
+      inputType.getElementType() != op.getResult().getType().getElementType()) {
+    return op.emitError(
+        "type casting between operands and result is unsupported for now");
+  }
+
   // Step 1: apply padding when any padding value is non-zero.
   if (!llvm::all_of(padAttr, [](Attribute pad) {
         return cast<IntegerAttr>(pad).getValue() == 0;
@@ -358,12 +370,12 @@ ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
     // insert padding to inputs
     assert(2 * dim == (int64_t)padAttr.size() && "padding is symmetric");
 
-    // MIGraphX padAttr is [hlow, wlow, hhigh, whigh]
+    // MIGraphX padAttr is [dim0_low, dim1_low,..., dim0_high, dim1_high, ...]
     SmallVector<int64_t, 4> newShape(inputType.getShape());
     auto lowAttrs = padAttr.getValue().drop_back(dim);
     auto highAttrs = padAttr.getValue().drop_front(dim);
-    //  Dim H is always located at the second index regardless of dimension of
-    //  the convolution.
+    //  The first spatial dimension (H) is always located at index 2 in the
+    //  NC* layout (after batch and channel), regardless of convolution rank.
     int64_t dimHOffset = 2;
     llvm::for_each(llvm::seq<int64_t>(dim), [&](int64_t index) {
       int64_t lowPad = cast<IntegerAttr>(lowAttrs[index]).getInt();
@@ -731,7 +743,8 @@ void mlir::migraphx::populateMIGraphXToLinalgConversionPatterns(
            ElementwiseConverter<migraphx::SqrtOp, linalg::SqrtOp>,
            ElementwiseConverter<migraphx::TanhOp, linalg::TanhOp>,
            ElementwiseConverter<migraphx::RecipOp, linalg::ReciprocalOp>,
-           ReluConverter, ClipConverter, ConvConverter>(converter, patterns.getContext());
+           ReluConverter, ClipConverter, ConvConverter>(converter,
+                                                        patterns.getContext());
 }
 
 void mlir::migraphx::populateMIGraphXFuncBoundaryToLinalgConversionPatterns(
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir
index 562c6fb20a90..849a2aa7bee4 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir
@@ -2,15 +2,15 @@
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 
-/// README - There are essentially two test (BOTH, and GOLD). 
+/// README - There are essentially two tests (BOTH, and GOLD). 
 /// BOTH checks if the tosa pipeline gives the same value (given the 
-/// same seed) as the linalg pipeline. They will pass if both of the them 
+/// same seed) as the linalg pipeline. They will pass if both of them 
 /// returns the same value. GOLD checks if the output for the linalg pipeline 
 /// matches an equivalent pytorch implementation.
 
 /// Gold value computed as the following:
 ///
-/// # These pattern are from the rocmlir-gen
+/// # These patterns are from the rocmlir-gen
 /// pattern = torch.tensor([0.5, -1.0, 0.75], dtype=torch.float32) 
 /// flat_x = torch.tensor([pattern[i % 3].item() for i in range(750)], dtype=torch.float32)
 /// x_nchwd = flat_x.reshape(2, 3, 5, 5, 5)  # N, C, H, W, D

From b0584243ffc69fa30ae08e27ce70962ff2fd36db Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 23 Feb 2026 16:12:58 +0000
Subject: [PATCH 10/16] Fix compilation

---
 .../mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h       | 4 ++++
 mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h  | 3 ---
 mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 2 +-
 mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp         | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h b/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h
index 11e94aa9fe75..27f0d396bd0d 100644
--- a/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h
+++ b/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h
@@ -39,6 +39,10 @@ void populateMIGraphXToLinalgBoundaryDialectConversion(
 /// migraphx.mlir.as_logical_shape and migraphx.mlir.as_underlying_shape.
 void populateMIGraphXFuncBoundaryToLinalgConversionPatterns(
     RewritePatternSet &target, TypeConverter &typeConverter);
+
+/// Populates conversion patterns for function boundaries mhal.launcher
+void populateMIGraphXToLinalgMHALLauncherConversion(
+    RewritePatternSet &target, TypeConverter &typeConverter);
 } // namespace migraphx
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h b/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h
index 6ef5d42e0a95..2c2912a1d083 100644
--- a/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h
+++ b/mlir/include/mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h
@@ -66,9 +66,6 @@ void populateMIGraphXFuncBoundaryToTosaConversionPatterns(
     RewritePatternSet &patterns, TypeConverter &typeConverter);
 
 void addMIGraphXToTosaPasses(OpPassManager &pm);
-
-void populateMIGraphXToLinalgMHALLauncherConversion(
-    RewritePatternSet &target, TypeConverter &typeConverter);
 } // namespace migraphx
 } // namespace mlir
 
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index 3434c153de3d..c1e853f05a3d 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -316,7 +316,7 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   }
 
   emitConvAttributes(op, result, strides, dilation,
-                     convertAtttributeToLinalg(op.getPaddingAttr()),
+                     convertAttributeToLinalg(op.getPaddingAttr()),
                      resultConvOpName);
 
   // we must reshape the operand to what the type converter expects
diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
index 0866c080383c..2fc73dde6368 100644
--- a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
+++ b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
@@ -10,7 +10,7 @@
 // These rewriters lower from the MIGraphX to the Tos dialect.
 //
 //===----------------------------------------------------------------------===//
-
+#include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h"
 #include "mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"

From 06aa357fdced273a808c6adfcf0513c23b01cca7 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 23 Feb 2026 16:31:56 +0000
Subject: [PATCH 11/16] Added type casting test

---
 .../MIGraphXToLinalg/migraphx-to-linalg-conv.mlir      | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index b3931bf4318a..d62e5c2f68cf 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -43,6 +43,16 @@ func.func @conv_1d(%arg0: !migraphx.shaped<1x64x224xf32, 14336x224x1>, %arg1: !m
 
 // -----
 
+// Currently, we don't support type casting
+func.func @conv_1d_different_types(%arg1: !migraphx.shaped<1x3x224xf16, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf16, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> {
+  // expected-error @+2 {{type casting between operands and result is unsupported for now}}
+  // expected-error @+1 {{failed to legalize operation}}
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf16, 672x224x1>, <64x3x7xf16, 21x7x1> -> <1x64x224xf32, 14336x224x1>
+  return %0 : !migraphx.shaped<1x64x224xf32, 14336x224x1>
+}
+
+// -----
+
 // Checking for the perf_config, dilation, strides, and pad attributes
 
 // CHECK-LABEL: func.func @mlir_convolution_add(

From cbdd1420d881b7a28f51fd581380caeca1f97ef9 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 23 Feb 2026 20:26:44 +0000
Subject: [PATCH 12/16] Emits `linalg.generic` instead of conv2d

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 54 +++++++++++++++++--
 .../migraphx-to-linalg-conv.mlir              | 13 ++++-
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index c1e853f05a3d..dc83a3234ca7 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -188,6 +188,54 @@ static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
       .getResult(0);
 }
 
+/// Emit Conv2D
+static Value emitGroupedConv2D(ConversionPatternRewriter &rewriter,
+                               Location loc, RankedTensorType resultType,
+                               Value input, Value filter, Value zero,
+                               DenseIntElementsAttr strides,
+                               DenseIntElementsAttr dilation) {
+  MLIRContext *ctx = rewriter.getContext();
+  auto strideVals = strides.getValues<int64_t>();
+  int64_t strideH = strideVals[0];
+  int64_t strideW = strideVals[1];
+  auto dilationVals = dilation.getValues<int64_t>();
+  int64_t dilationH = dilationVals[0];
+  int64_t dilationW = dilationVals[1];
+
+  // Iteration domain:
+  //   (batch, group, filter, oh, ow, channel, kh, kw)
+  AffineExpr batch, group, filterExpr, oh, ow, channel, kh, kw;
+  bindDims(ctx, batch, group, filterExpr, oh, ow, channel, kh, kw);
+
+  AffineMap inputMap = AffineMap::get(
+      /*dimCount=*/8, /*symbolCount=*/0,
+      {batch, group, channel, oh * strideH + kh * dilationH,
+       ow * strideW + kw * dilationW},
+      ctx);
+  AffineMap filterMap =
+      AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
+                     {group, filterExpr, channel, kh, kw}, ctx);
+  AffineMap outputMap =
+      AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
+                     {batch, group, filterExpr, oh, ow}, ctx);
+
+  SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
+  SmallVector<utils::IteratorType> iteratorTypes = {
+      utils::IteratorType::parallel,  // batch
+      utils::IteratorType::parallel,  // group
+      utils::IteratorType::parallel,  // filter
+      utils::IteratorType::parallel,  // oh
+      utils::IteratorType::parallel,  // ow
+      utils::IteratorType::reduction, // channel
+      utils::IteratorType::reduction, // kh
+      utils::IteratorType::reduction // kw
+  };
+
+  return linalg::GenericOp::create(rewriter, loc, resultType,
+                                   ValueRange{input, filter}, zero,
+                                   indexingMaps, iteratorTypes, convBodyBuilder)
+      .getResult(0);
+}
 /// Emit Conv3D expects input shape to be (batch, group, channel, h, w, d),
 /// filter to be (group, filter, channel, kh, kw, kd)
 static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
@@ -296,10 +344,8 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   }
   case 2: {
     // linalg provides us with a named op we can use, so we use that instead
-    result = linalg::Conv2DNgchwGfchwOp::create(rewriter, loc, {newResultType},
-                                                {input, filter}, {zero},
-                                                strides, dilation)
-                 .getResult(0);
+    result = emitGroupedConv2D(rewriter, loc, newResultType, input, filter,
+        zero, strides, dilation);
     resultConvOpName = rewriter.getStringAttr("ngchw_gfchw");
     break;
   }
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index d62e5c2f68cf..2cdd5aebdceb 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -5,6 +5,7 @@
 // CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
 // CHECK-LABEL: func.func @conv_3d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins
+// CHECK-SAME:    attrs =  {conv_op = "ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, stride = dense<2> : tensor<3xi64>}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -17,8 +18,17 @@ func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !m
 
 // -----
 
+// CHECK: #map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 4 + d6 * 2, d4 * 5 + d7 * 3)>
+// CHECK: #map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+// CHECK: #map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+
 // CHECK-LABEL: func.func @conv_2d(
-// CHECK: linalg.conv_2d_ngchw_gfchw
+// CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]}
+// CHECK-SAME:     attrs =  {conv_op = "ngchw_gfchw", dilation = dense<[2, 3]> : tensor<2xi64>, group = 2 : i64, pad = dense<2> : tensor<4xi64>, stride = dense<[4, 5]> : tensor<2xi64>}
+// CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
+// CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
+// CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
+// CHECK-DAG:           linalg.yield %[[four]]
 func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
   %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
     <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>
@@ -32,6 +42,7 @@ func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %f
 
 // CHECK-LABEL: func.func @conv_1d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
+// CHECK-SAME:    attrs =  {conv_op = "ngch_gfch", dilation = dense<1> : tensor<1xi64>, group = 1 : i64, pad = dense<3> : tensor<2xi64>, stride = dense<1> : tensor<1xi64>}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]

From 17d427db88b618bbdb1fb2596fdeeb95a717faa7 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 23 Feb 2026 20:34:41 +0000
Subject: [PATCH 13/16] Rename op_name to have convnd prefix

---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp         | 15 +++++++--------
 .../Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp  |  2 +-
 .../MIGraphXToLinalg/migraphx-to-linalg-conv.mlir |  8 ++++----
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index dc83a3234ca7..742d2b29056d 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -215,9 +215,8 @@ static Value emitGroupedConv2D(ConversionPatternRewriter &rewriter,
   AffineMap filterMap =
       AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
                      {group, filterExpr, channel, kh, kw}, ctx);
-  AffineMap outputMap =
-      AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
-                     {batch, group, filterExpr, oh, ow}, ctx);
+  AffineMap outputMap = AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
+                                       {batch, group, filterExpr, oh, ow}, ctx);
 
   SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
   SmallVector<utils::IteratorType> iteratorTypes = {
@@ -228,7 +227,7 @@ static Value emitGroupedConv2D(ConversionPatternRewriter &rewriter,
       utils::IteratorType::parallel,  // ow
       utils::IteratorType::reduction, // channel
       utils::IteratorType::reduction, // kh
-      utils::IteratorType::reduction // kw
+      utils::IteratorType::reduction  // kw
   };
 
   return linalg::GenericOp::create(rewriter, loc, resultType,
@@ -339,20 +338,20 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   case 1: {
     result = emitGroupedConv1D(rewriter, loc, newResultType, input, filter,
                                zero, strides, dilation);
-    resultConvOpName = rewriter.getStringAttr("ngch_gfch");
+    resultConvOpName = rewriter.getStringAttr("conv1d_ngch_gfch");
     break;
   }
   case 2: {
     // linalg provides us with a named op we can use, so we use that instead
     result = emitGroupedConv2D(rewriter, loc, newResultType, input, filter,
-        zero, strides, dilation);
-    resultConvOpName = rewriter.getStringAttr("ngchw_gfchw");
+                               zero, strides, dilation);
+    resultConvOpName = rewriter.getStringAttr("conv2d_ngchw_gfchw");
     break;
   }
   case 3: {
     result = emitGroupedConv3D(rewriter, loc, newResultType, input, filter,
                                zero, strides, dilation);
-    resultConvOpName = rewriter.getStringAttr("ngchwd_gfchwd");
+    resultConvOpName = rewriter.getStringAttr("conv3d_ngchwd_gfchwd");
     break;
   }
   default: {
diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
index 2fc73dde6368..5c9aeb564a41 100644
--- a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
+++ b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
@@ -10,8 +10,8 @@
 // These rewriters lower from the MIGraphX to the Tos dialect.
 //
 //===----------------------------------------------------------------------===//
-#include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h"
 #include "mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h"
+#include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/FuncConversions.h"
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index 2cdd5aebdceb..952622bc2fa7 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -5,7 +5,7 @@
 // CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
 // CHECK-LABEL: func.func @conv_3d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins
-// CHECK-SAME:    attrs =  {conv_op = "ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, stride = dense<2> : tensor<3xi64>}
+// CHECK-SAME:    attrs =  {conv_op = "conv3d_ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, stride = dense<2> : tensor<3xi64>}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -24,7 +24,7 @@ func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !m
 
 // CHECK-LABEL: func.func @conv_2d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]}
-// CHECK-SAME:     attrs =  {conv_op = "ngchw_gfchw", dilation = dense<[2, 3]> : tensor<2xi64>, group = 2 : i64, pad = dense<2> : tensor<4xi64>, stride = dense<[4, 5]> : tensor<2xi64>}
+// CHECK-SAME:     attrs =  {conv_op = "conv2d_ngchw_gfchw", dilation = dense<[2, 3]> : tensor<2xi64>, group = 2 : i64, pad = dense<2> : tensor<4xi64>, stride = dense<[4, 5]> : tensor<2xi64>}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -42,7 +42,7 @@ func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %f
 
 // CHECK-LABEL: func.func @conv_1d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME:    attrs =  {conv_op = "ngch_gfch", dilation = dense<1> : tensor<1xi64>, group = 1 : i64, pad = dense<3> : tensor<2xi64>, stride = dense<1> : tensor<1xi64>}
+// CHECK-SAME:    attrs =  {conv_op = "conv1d_ngch_gfch", dilation = dense<1> : tensor<1xi64>, group = 1 : i64, pad = dense<3> : tensor<2xi64>, stride = dense<1> : tensor<1xi64>}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -73,7 +73,7 @@ func.func @conv_1d_different_types(%arg1: !migraphx.shaped<1x3x224xf16, 672x224x
 // CHECK-DAG:   %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
 // CHECK-DAG:   %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
 // CHECK-DAG:   %[[cst:.*]] = arith.constant
-// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) attrs =  {conv_op = "ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = dense<2> : tensor<3xi64>}
+// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) attrs =  {conv_op = "conv3d_ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = dense<2> : tensor<3xi64>}
 // CHECK-DAG:   %[[collapsed:.*]] = tensor.collapse_shape %[[zero]]
 // CHECK-DAG:   %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
 // CHECK-DAG:   return %[[collapsed_3]]

From d64aeb364a0cc179b10a09c2a100949baec5efe3 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Tue, 24 Feb 2026 20:15:41 +0000
Subject: [PATCH 14/16] Code cleanup

---
 .../mlir/Dialect/Rock/IR/RockAttrDefs.td      |   9 +
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 223 ++++++------------
 2 files changed, 76 insertions(+), 156 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
index 796e6d6dae9c..280890af7d9f 100644
--- a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
+++ b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
@@ -59,6 +59,15 @@ def ConvOpBwdWeightType : I32EnumAttrCase<"BwdWeight", 2, "conv_bwd_weight">;
 def ConvOpTypes : Rock_I32Enum<"ConvOpType", "The type of a convolution operation",
   [ConvOpType, ConvOpBwdDataType, ConvOpBwdWeightType]>;
 
+/// LinalgConvType
+def LinalgConv_1D : I32EnumAttrCase<"Conv1dNgchGfch", 0, "conv1d_ngch_gfch">;
+def LinalgConv_2D : I32EnumAttrCase<"Conv2dNgchwGfchw", 1, "conv2d_ngchw_gfchw">;
+def LinalgConv_3D : I32EnumAttrCase<"Conv3dNgchwdGfchwd", 2, "conv3d_ngchwd_gfchwd">;
+
+def LinalgConvType : Rock_I32Enum<"LinalgConvType",
+  "The layout of a grouped convolution operation",
+  [LinalgConv_1D, LinalgConv_2D, LinalgConv_3D]>;
+
 /// Kerneltype
 def KernelTypeConv : I32EnumAttrCase<"Conv", 0>;
 def KernelTypeConvBwdData : I32EnumAttrCase<"ConvBwdData", 1>;
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index 742d2b29056d..d5314ec7c29d 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/FuncConversions.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Rock/IR/RockTypes.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 using namespace mlir;
@@ -149,139 +150,65 @@ static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp,
   newOp->setAttr("conv_op", convOpName);
 }
 
-/// Emit Conv1D expects input shape to be (batch, group, channel, height),
-/// filter to be (group, filter, channel, height)
-static Value emitGroupedConv1D(ConversionPatternRewriter &rewriter,
-                               Location loc, RankedTensorType resultType,
-                               Value input, Value filter, Value zero,
-                               DenseIntElementsAttr strides,
-                               DenseIntElementsAttr dilation) {
-  MLIRContext *ctx = rewriter.getContext();
-  int64_t strideVal = strides.getValues<int64_t>()[0];
-  int64_t dilationVal = dilation.getValues<int64_t>()[0];
-
-  // Iteration domain: (batch, group, filter, oh, channel, kh)
-  AffineExpr batch, group, filterExpr, oh, channel, kh;
-  bindDims(ctx, batch, group, filterExpr, oh, channel, kh);
-
-  AffineMap inputMap = AffineMap::get(
-      /*dimCount=*/6, /*symbolCount=*/0,
-      {batch, group, channel, oh * strideVal + kh * dilationVal}, ctx);
-  AffineMap filterMap = AffineMap::get(/*dimCount=*/6, /*symbolCount=*/0,
-                                       {group, filterExpr, channel, kh}, ctx);
-  AffineMap outputMap = AffineMap::get(/*dimCount=*/6, /*symbolCount=*/0,
-                                       {batch, group, filterExpr, oh}, ctx);
-
-  SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
-  SmallVector<utils::IteratorType> iteratorTypes = {
-      utils::IteratorType::parallel,  // n
-      utils::IteratorType::parallel,  // g
-      utils::IteratorType::parallel,  // f
-      utils::IteratorType::parallel,  // oh
-      utils::IteratorType::reduction, // c
-      utils::IteratorType::reduction, // kh
-  };
-
-  return linalg::GenericOp::create(rewriter, loc, resultType,
-                                   ValueRange{input, filter}, zero,
-                                   indexingMaps, iteratorTypes, convBodyBuilder)
-      .getResult(0);
-}
-
-/// Emit Conv2D
-static Value emitGroupedConv2D(ConversionPatternRewriter &rewriter,
-                               Location loc, RankedTensorType resultType,
-                               Value input, Value filter, Value zero,
-                               DenseIntElementsAttr strides,
-                               DenseIntElementsAttr dilation) {
+/// Emit a grouped convolution of any spatial rank (1D, 2D, or 3D).
+/// Input shape: (batch, group, channel, spatial...),
+/// filter shape: (group, filter, channel, kernel_spatial...)
+///
+/// clang-format off
+///   for n in batch:
+///     for g in group:
+///       for f in filters:
+///         for oh_0 in output_spatial_0:
+///           for oh_1 in output_spatial_1:
+///             // ...
+///             for oh_{dim-1} in output_spatial_{dim-1}:
+///               for c in channels:                          // reduction
+///                 for kh_0 in kernel_spatial_0:              // reduction
+///                   for kh_1 in kernel_spatial_1:            // reduction
+///                     // ...
+/// clang-format on
+static Value emitGroupedConv(ConversionPatternRewriter &rewriter, Location loc,
+                             RankedTensorType resultType, Value input,
+                             Value filter, Value zero,
+                             DenseIntElementsAttr strides,
+                             DenseIntElementsAttr dilation) {
   MLIRContext *ctx = rewriter.getContext();
+  int64_t dim = cast<RankedTensorType>(input.getType()).getRank() - 3;
   auto strideVals = strides.getValues<int64_t>();
-  int64_t strideH = strideVals[0];
-  int64_t strideW = strideVals[1];
   auto dilationVals = dilation.getValues<int64_t>();
-  int64_t dilationH = dilationVals[0];
-  int64_t dilationW = dilationVals[1];
-
-  // Iteration domain:
-  //   (batch, group, filter, oh, ow, channel, kh, kw)
-  AffineExpr batch, group, filterExpr, oh, ow, channel, kh, kw;
-  bindDims(ctx, batch, group, filterExpr, oh, ow, channel, kh, kw);
-
-  AffineMap inputMap = AffineMap::get(
-      /*dimCount=*/8, /*symbolCount=*/0,
-      {batch, group, channel, oh * strideH + kh * dilationH,
-       ow * strideW + kw * dilationW},
-      ctx);
-  AffineMap filterMap =
-      AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
-                     {group, filterExpr, channel, kh, kw}, ctx);
-  AffineMap outputMap = AffineMap::get(/*dimCount=*/8, /*symbolCount=*/0,
-                                       {batch, group, filterExpr, oh, ow}, ctx);
-
-  SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
-  SmallVector<utils::IteratorType> iteratorTypes = {
-      utils::IteratorType::parallel,  // batch
-      utils::IteratorType::parallel,  // group
-      utils::IteratorType::parallel,  // filter
-      utils::IteratorType::parallel,  // oh
-      utils::IteratorType::parallel,  // ow
-      utils::IteratorType::reduction, // channel
-      utils::IteratorType::reduction, // kh
-      utils::IteratorType::reduction  // kw
-  };
 
-  return linalg::GenericOp::create(rewriter, loc, resultType,
-                                   ValueRange{input, filter}, zero,
-                                   indexingMaps, iteratorTypes, convBodyBuilder)
-      .getResult(0);
-}
-/// Emit Conv3D expects input shape to be (batch, group, channel, h, w, d),
-/// filter to be (group, filter, channel, kh, kw, kd)
-static Value emitGroupedConv3D(ConversionPatternRewriter &rewriter,
-                               Location loc, RankedTensorType resultType,
-                               Value input, Value filter, Value zero,
-                               DenseIntElementsAttr strides,
-                               DenseIntElementsAttr dilation) {
-  MLIRContext *ctx = rewriter.getContext();
-  auto strideVals = strides.getValues<int64_t>();
-  int64_t strideH = strideVals[0];
-  int64_t strideW = strideVals[1];
-  int64_t strideD = strideVals[2];
-  auto dilationVals = dilation.getValues<int64_t>();
-  int64_t dilationH = dilationVals[0];
-  int64_t dilationW = dilationVals[1];
-  int64_t dilationD = dilationVals[2];
-
-  // Iteration domain:
-  //   (batch, group, filter, oh, ow, od, channel, kh, kw, kd)
-  AffineExpr batch, group, filterExpr, oh, ow, od, channel, kh, kw, kd;
-  bindDims(ctx, batch, group, filterExpr, oh, ow, od, channel, kh, kw, kd);
-
-  AffineMap inputMap = AffineMap::get(
-      /*dimCount=*/10, /*symbolCount=*/0,
-      {batch, group, channel, oh * strideH + kh * dilationH,
-       ow * strideW + kw * dilationW, od * strideD + kd * dilationD},
-      ctx);
-  AffineMap filterMap =
-      AffineMap::get(/*dimCount=*/10, /*symbolCount=*/0,
-                     {group, filterExpr, channel, kh, kw, kd}, ctx);
-  AffineMap outputMap =
-      AffineMap::get(/*dimCount=*/10, /*symbolCount=*/0,
-                     {batch, group, filterExpr, oh, ow, od}, ctx);
-
-  SmallVector<AffineMap> indexingMaps = {inputMap, filterMap, outputMap};
-  SmallVector<utils::IteratorType> iteratorTypes = {
-      utils::IteratorType::parallel,  // batch
-      utils::IteratorType::parallel,  // group
-      utils::IteratorType::parallel,  // filter
-      utils::IteratorType::parallel,  // oh
-      utils::IteratorType::parallel,  // ow
-      utils::IteratorType::parallel,  // od
-      utils::IteratorType::reduction, // channel
-      utils::IteratorType::reduction, // kh
-      utils::IteratorType::reduction, // kw
-      utils::IteratorType::reduction, // kd
-  };
+  // Iteration domain layout:
+  //   parallel:  batch, group, filter, oh_0 .. oh_{dim-1}
+  //   reduction: channel, kh_0 .. kh_{dim-1}
+  int64_t totalDims = 4 + 2 * dim;
+  SmallVector<AffineExpr> d;
+  for (int64_t i = 0; i < totalDims; ++i)
+    d.push_back(getAffineDimExpr(i, ctx));
+
+  AffineExpr batch = d[0], group = d[1], filterExpr = d[2];
+  AffineExpr channel = d[3 + dim];
+
+  SmallVector<AffineExpr> inputExprs = {batch, group, channel};
+  for (int64_t i = 0; i < dim; ++i)
+    inputExprs.push_back(d[3 + i] * strideVals[i] +
+                         d[4 + dim + i] * dilationVals[i]);
+
+  SmallVector<AffineExpr> filterExprs = {group, filterExpr, channel};
+  for (int64_t i = 0; i < dim; ++i)
+    filterExprs.push_back(d[4 + dim + i]);
+
+  SmallVector<AffineExpr> outputExprs = {batch, group, filterExpr};
+  for (int64_t i = 0; i < dim; ++i)
+    outputExprs.push_back(d[3 + i]);
+
+  SmallVector<AffineMap> indexingMaps = {
+      AffineMap::get(totalDims, 0, inputExprs, ctx),
+      AffineMap::get(totalDims, 0, filterExprs, ctx),
+      AffineMap::get(totalDims, 0, outputExprs, ctx)};
+
+  SmallVector<utils::IteratorType> iteratorTypes(3 + dim,
+                                                 utils::IteratorType::parallel);
+  iteratorTypes.append(1 + dim, utils::IteratorType::reduction);
 
   return linalg::GenericOp::create(rewriter, loc, resultType,
                                    ValueRange{input, filter}, zero,
@@ -300,6 +227,7 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   int64_t group = op.getGroupAttr().getInt();
   int64_t dim = cast<RankedTensorType>(input.getType()).getRank() -
                 3; // exclude batch (N), group (G), channel (C)
+  assert(dim >= 1 && dim <= 3 && "this should be checked at matchAndRewrite");
 
   // Result type from the op is NF*; expand to NGF* for the linalg conv.
   RankedTensorType resultType =
@@ -332,33 +260,12 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   DenseIntElementsAttr strides = convertAttributeToLinalg(op.getStride());
   DenseIntElementsAttr dilation = convertAttributeToLinalg(op.getDilation());
 
-  Value result;
-  Attribute resultConvOpName;
-  switch (dim) {
-  case 1: {
-    result = emitGroupedConv1D(rewriter, loc, newResultType, input, filter,
-                               zero, strides, dilation);
-    resultConvOpName = rewriter.getStringAttr("conv1d_ngch_gfch");
-    break;
-  }
-  case 2: {
-    // linalg provides us with a named op we can use, so we use that instead
-    result = emitGroupedConv2D(rewriter, loc, newResultType, input, filter,
-                               zero, strides, dilation);
-    resultConvOpName = rewriter.getStringAttr("conv2d_ngchw_gfchw");
-    break;
-  }
-  case 3: {
-    result = emitGroupedConv3D(rewriter, loc, newResultType, input, filter,
-                               zero, strides, dilation);
-    resultConvOpName = rewriter.getStringAttr("conv3d_ngchwd_gfchwd");
-    break;
-  }
-  default: {
-    op.emitError("unsupported convolution dimensions");
-    return failure();
-  }
-  }
+  rock::LinalgConvType convLayout = (dim == 1) ? rock::LinalgConvType::Conv1dNgchGfch: 
+    (dim == 2) ? rock::LinalgConvType::Conv2dNgchwGfchw :  rock::LinalgConvType::Conv3dNgchwdGfchwd;
+  auto resultConvOpName = rewriter.getStringAttr(
+      rock::getNameForLinalgConvType(convLayout));
+  Value result = emitGroupedConv(rewriter, loc, newResultType, input, filter,
+                                 zero, strides, dilation);
 
   emitConvAttributes(op, result, strides, dilation,
                      convertAttributeToLinalg(op.getPaddingAttr()),
@@ -394,6 +301,10 @@ ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
   int64_t dim = inputType.getRank() - 2;
   int64_t group = op.getGroupAttr().getInt();
 
+  if(dim > 3 || dim < 1) {
+    return op.emitError(Twine(dim) + "D conv is not supported for now");
+  }
+
   // For now, the linalg.generic region doesn't support type casting,
   // so we emit an error for now
 

From 5ea29107dcae1145608d4ddef36e57e5300e6ada Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Tue, 24 Feb 2026 20:25:34 +0000
Subject: [PATCH 15/16] Fixed testcase after changing to LinalgConv

---
 .../mlir/Dialect/Rock/IR/RockAttrDefs.td      | 15 ++++---
 .../MIGraphXToLinalg/MIGraphXToLinalg.cpp     | 41 +++++++++----------
 .../migraphx-to-linalg-conv.mlir              |  9 ++--
 3 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
index 280890af7d9f..ab99bef06809 100644
--- a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
+++ b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
@@ -61,12 +61,15 @@ def ConvOpTypes : Rock_I32Enum<"ConvOpType", "The type of a convolution operatio
 
 /// LinalgConvType
 def LinalgConv_1D : I32EnumAttrCase<"Conv1dNgchGfch", 0, "conv1d_ngch_gfch">;
-def LinalgConv_2D : I32EnumAttrCase<"Conv2dNgchwGfchw", 1, "conv2d_ngchw_gfchw">;
-def LinalgConv_3D : I32EnumAttrCase<"Conv3dNgchwdGfchwd", 2, "conv3d_ngchwd_gfchwd">;
-
-def LinalgConvType : Rock_I32Enum<"LinalgConvType",
-  "The layout of a grouped convolution operation",
-  [LinalgConv_1D, LinalgConv_2D, LinalgConv_3D]>;
+def LinalgConv_2D
+    : I32EnumAttrCase<"Conv2dNgchwGfchw", 1, "conv2d_ngchw_gfchw">;
+def LinalgConv_3D
+    : I32EnumAttrCase<"Conv3dNgchwdGfchwd", 2, "conv3d_ngchwd_gfchwd">;
+
+def LinalgConvType
+    : Rock_I32Enum<"LinalgConvType",
+                   "The layout of a grouped convolution operation",
+                   [LinalgConv_1D, LinalgConv_2D, LinalgConv_3D]>;
 
 /// Kerneltype
 def KernelTypeConv : I32EnumAttrCase<"Conv", 0>;
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index d5314ec7c29d..0b9da8c7e66f 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -170,12 +170,18 @@ static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp,
 static Value emitGroupedConv(ConversionPatternRewriter &rewriter, Location loc,
                              RankedTensorType resultType, Value input,
                              Value filter, Value zero,
-                             DenseIntElementsAttr strides,
-                             DenseIntElementsAttr dilation) {
+                             ArrayAttr strides,
+                             ArrayAttr dilation) {
   MLIRContext *ctx = rewriter.getContext();
   int64_t dim = cast<RankedTensorType>(input.getType()).getRank() - 3;
-  auto strideVals = strides.getValues<int64_t>();
-  auto dilationVals = dilation.getValues<int64_t>();
+  SmallVector<int64_t, 4> strideVals;
+  SmallVector<int64_t, 4> dilationVals;
+  llvm::transform(strides.getValue(), std::back_inserter(strideVals), [](Attribute attr){
+      return cast<IntegerAttr>(attr).getInt();
+  });
+  llvm::transform(dilation.getValue(), std::back_inserter(dilationVals), [](Attribute attr){
+      return cast<IntegerAttr>(attr).getInt();
+  });
 
   // Iteration domain layout:
   //   parallel:  batch, group, filter, oh_0 .. oh_{dim-1}
@@ -248,27 +254,20 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
   Value zero = arith::ConstantOp::create(rewriter, loc, newResultType,
                                          rewriter.getZeroAttr(newResultType));
 
-  // linalg.* expects attribute to be in tensor and not DenseI64Array. Convert
-  // the ArrayAttr into a one to one tensor attribute
-  auto convertAttributeToLinalg = [&](ArrayAttr attr) {
-    SmallVector<int64_t, 4> value;
-    llvm::for_each(attr.getValue(), [&](Attribute current) {
-      value.push_back(cast<IntegerAttr>(current).getInt());
-    });
-    return rewriter.getI64TensorAttr(value);
-  };
-  DenseIntElementsAttr strides = convertAttributeToLinalg(op.getStride());
-  DenseIntElementsAttr dilation = convertAttributeToLinalg(op.getDilation());
+  ArrayAttr strides = op.getStride();
+  ArrayAttr dilation =op.getDilation();
 
-  rock::LinalgConvType convLayout = (dim == 1) ? rock::LinalgConvType::Conv1dNgchGfch: 
-    (dim == 2) ? rock::LinalgConvType::Conv2dNgchwGfchw :  rock::LinalgConvType::Conv3dNgchwdGfchwd;
-  auto resultConvOpName = rewriter.getStringAttr(
-      rock::getNameForLinalgConvType(convLayout));
+  rock::LinalgConvType convLayout =
+      (dim == 1)   ? rock::LinalgConvType::Conv1dNgchGfch
+      : (dim == 2) ? rock::LinalgConvType::Conv2dNgchwGfchw
+                   : rock::LinalgConvType::Conv3dNgchwdGfchwd;
+  auto resultConvOpName =
+      rewriter.getStringAttr(rock::getNameForLinalgConvType(convLayout));
   Value result = emitGroupedConv(rewriter, loc, newResultType, input, filter,
                                  zero, strides, dilation);
 
   emitConvAttributes(op, result, strides, dilation,
-                     convertAttributeToLinalg(op.getPaddingAttr()),
+                     op.getPaddingAttr(),
                      resultConvOpName);
 
   // we must reshape the operand to what the type converter expects
@@ -301,7 +300,7 @@ ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor,
   int64_t dim = inputType.getRank() - 2;
   int64_t group = op.getGroupAttr().getInt();
 
-  if(dim > 3 || dim < 1) {
+  if (dim > 3 || dim < 1) {
     return op.emitError(Twine(dim) + "D conv is not supported for now");
   }
 
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index 952622bc2fa7..136c1c00cc56 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -5,7 +5,7 @@
 // CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
 // CHECK-LABEL: func.func @conv_3d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins
-// CHECK-SAME:    attrs =  {conv_op = "conv3d_ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, stride = dense<2> : tensor<3xi64>}
+// CHECK-SAME:    attrs = {conv_op = "conv3d_ngchwd_gfchwd", dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -24,7 +24,7 @@ func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !m
 
 // CHECK-LABEL: func.func @conv_2d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]}
-// CHECK-SAME:     attrs =  {conv_op = "conv2d_ngchw_gfchw", dilation = dense<[2, 3]> : tensor<2xi64>, group = 2 : i64, pad = dense<2> : tensor<4xi64>, stride = dense<[4, 5]> : tensor<2xi64>}
+// CHECK-SAME:    attrs = {conv_op = "conv2d_ngchw_gfchw", dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -42,7 +42,7 @@ func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %f
 
 // CHECK-LABEL: func.func @conv_1d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME:    attrs =  {conv_op = "conv1d_ngch_gfch", dilation = dense<1> : tensor<1xi64>, group = 1 : i64, pad = dense<3> : tensor<2xi64>, stride = dense<1> : tensor<1xi64>}
+// CHECK-SAME:    attrs = {conv_op = "conv1d_ngch_gfch", dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -73,7 +73,8 @@ func.func @conv_1d_different_types(%arg1: !migraphx.shaped<1x3x224xf16, 672x224x
 // CHECK-DAG:   %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
 // CHECK-DAG:   %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
 // CHECK-DAG:   %[[cst:.*]] = arith.constant
-// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) attrs =  {conv_op = "conv3d_ngchwd_gfchwd", dilation = dense<2> : tensor<3xi64>, group = 1 : i64, pad = dense<0> : tensor<6xi64>, perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = dense<2> : tensor<3xi64>}
+// CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}})
+// CHECK-SAME:   attrs =  {conv_op = "conv3d_ngchwd_gfchwd", dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]}
 // CHECK-DAG:   %[[collapsed:.*]] = tensor.collapse_shape %[[zero]]
 // CHECK-DAG:   %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
 // CHECK-DAG:   return %[[collapsed_3]]

From 319e4047896bef8b6289f37580248622195fbeeb Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 25 Feb 2026 03:50:02 +0000
Subject: [PATCH 16/16] Emits rock attribute instead of string now

---
 mlir/include/mlir/Conversion/RocMLIRPasses.td             | 2 +-
 mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td         | 2 ++
 mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp | 4 ++--
 .../Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp  | 1 +
 .../MIGraphXToLinalg/migraphx-to-linalg-conv.mlir         | 8 ++++----
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Conversion/RocMLIRPasses.td b/mlir/include/mlir/Conversion/RocMLIRPasses.td
index 5bf65d40c9d1..a93b8e1a1ac6 100644
--- a/mlir/include/mlir/Conversion/RocMLIRPasses.td
+++ b/mlir/include/mlir/Conversion/RocMLIRPasses.td
@@ -144,7 +144,7 @@ def MIGraphXToLinalgPass : Pass<"migraphx-to-linalg", "::mlir::func::FuncOp"> {
   }];
 
   let dependentDialects = ["arith::ArithDialect", "tensor::TensorDialect",
-                           "linalg::LinalgDialect"];
+                           "linalg::LinalgDialect", "rock::RockDialect"];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
index ab99bef06809..c2d7fab4de73 100644
--- a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
+++ b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
@@ -71,6 +71,8 @@ def LinalgConvType
                    "The layout of a grouped convolution operation",
                    [LinalgConv_1D, LinalgConv_2D, LinalgConv_3D]>;
 
+def LinalgConvTypeAttr : EnumAttr<Rock_Dialect, LinalgConvType, "LinalgConvType">;
+
 /// Kerneltype
 def KernelTypeConv : I32EnumAttrCase<"Conv", 0>;
 def KernelTypeConvBwdData : I32EnumAttrCase<"ConvBwdData", 1>;
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
index 0b9da8c7e66f..cee2096754fc 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -15,7 +15,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/FuncConversions.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
-#include "mlir/Dialect/Rock/IR/RockTypes.h"
+#include "mlir/Dialect/Rock/IR/Rock.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 using namespace mlir;
@@ -262,7 +262,7 @@ LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter,
       : (dim == 2) ? rock::LinalgConvType::Conv2dNgchwGfchw
                    : rock::LinalgConvType::Conv3dNgchwdGfchwd;
   auto resultConvOpName =
-      rewriter.getStringAttr(rock::getNameForLinalgConvType(convLayout));
+      rock::LinalgConvTypeAttr::get(rewriter.getContext(), convLayout);
   Value result = emitGroupedConv(rewriter, loc, newResultType, input, filter,
                                  zero, strides, dilation);
 
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp
index 75f5588d2024..abb44768253b 100644
--- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp
+++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/Rock/IR/Rock.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
index 136c1c00cc56..b1918ae6aff9 100644
--- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
+++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir
@@ -5,7 +5,7 @@
 // CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
 // CHECK-LABEL: func.func @conv_3d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins
-// CHECK-SAME:    attrs = {conv_op = "conv3d_ngchwd_gfchwd", dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]}
+// CHECK-SAME:    attrs = {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -24,7 +24,7 @@ func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !m
 
 // CHECK-LABEL: func.func @conv_2d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]}
-// CHECK-SAME:    attrs = {conv_op = "conv2d_ngchw_gfchw", dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]}
+// CHECK-SAME:    attrs = {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -42,7 +42,7 @@ func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %f
 
 // CHECK-LABEL: func.func @conv_1d(
 // CHECK:         linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME:    attrs = {conv_op = "conv1d_ngch_gfch", dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]}
+// CHECK-SAME:    attrs = {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]}
 // CHECK-DAG:       ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32)
 // CHECK-DAG:           %[[three:.*]] = arith.mulf %[[in]], %[[in_5]]
 // CHECK-DAG:           %[[four:.*]] = arith.addf %[[out]], %[[three]]
@@ -74,7 +74,7 @@ func.func @conv_1d_different_types(%arg1: !migraphx.shaped<1x3x224xf16, 672x224x
 // CHECK-DAG:   %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
 // CHECK-DAG:   %[[cst:.*]] = arith.constant
 // CHECK-DAG:   %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}})
-// CHECK-SAME:   attrs =  {conv_op = "conv3d_ngchwd_gfchwd", dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]}
+// CHECK-SAME:   attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]}
 // CHECK-DAG:   %[[collapsed:.*]] = tensor.collapse_shape %[[zero]]
 // CHECK-DAG:   %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
 // CHECK-DAG:   return %[[collapsed_3]]