diff --git a/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h b/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h index 11e94aa9fe75..27f0d396bd0d 100644 --- a/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h +++ b/mlir/include/mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h @@ -39,6 +39,10 @@ void populateMIGraphXToLinalgBoundaryDialectConversion( /// migraphx.mlir.as_logical_shape and migraphx.mlir.as_underlying_shape. void populateMIGraphXFuncBoundaryToLinalgConversionPatterns( RewritePatternSet &target, TypeConverter &typeConverter); + +/// Populates conversion patterns for function boundaries mhal.launcher +void populateMIGraphXToLinalgMHALLauncherConversion( + RewritePatternSet &target, TypeConverter &typeConverter); } // namespace migraphx } // namespace mlir diff --git a/mlir/include/mlir/Conversion/RocMLIRPasses.td b/mlir/include/mlir/Conversion/RocMLIRPasses.td index 5bf65d40c9d1..a93b8e1a1ac6 100644 --- a/mlir/include/mlir/Conversion/RocMLIRPasses.td +++ b/mlir/include/mlir/Conversion/RocMLIRPasses.td @@ -144,7 +144,7 @@ def MIGraphXToLinalgPass : Pass<"migraphx-to-linalg", "::mlir::func::FuncOp"> { }]; let dependentDialects = ["arith::ArithDialect", "tensor::TensorDialect", - "linalg::LinalgDialect"]; + "linalg::LinalgDialect", "rock::RockDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td index 796e6d6dae9c..c2d7fab4de73 100644 --- a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td +++ b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td @@ -59,6 +59,20 @@ def ConvOpBwdWeightType : I32EnumAttrCase<"BwdWeight", 2, "conv_bwd_weight">; def ConvOpTypes : Rock_I32Enum<"ConvOpType", "The type of a convolution operation", [ConvOpType, ConvOpBwdDataType, ConvOpBwdWeightType]>; +/// LinalgConvType +def LinalgConv_1D : I32EnumAttrCase<"Conv1dNgchGfch", 0, "conv1d_ngch_gfch">; +def LinalgConv_2D + : I32EnumAttrCase<"Conv2dNgchwGfchw", 1, "conv2d_ngchw_gfchw">; +def LinalgConv_3D + : I32EnumAttrCase<"Conv3dNgchwdGfchwd", 2, "conv3d_ngchwd_gfchwd">; + +def LinalgConvType + : Rock_I32Enum<"LinalgConvType", + "The layout of a grouped convolution operation", + [LinalgConv_1D, LinalgConv_2D, LinalgConv_3D]>; + +def LinalgConvTypeAttr : EnumAttr; + /// Kerneltype def KernelTypeConv : I32EnumAttrCase<"Conv", 0>; def KernelTypeConvBwdData : I32EnumAttrCase<"ConvBwdData", 1>; diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp index e7c6083c78e5..cee2096754fc 100644 --- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp +++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Rock/IR/Rock.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" using namespace mlir; @@ -104,6 +105,307 @@ LogicalResult AsUnderlyingShapeConverter::matchAndRewrite( "input shape is non standard or broadcast; cannot convert this shape"); } +namespace { +struct ConvConverter final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::getTypeConverter; + using OpAdaptor = + typename OpConversionPattern::OpAdaptor; + + LogicalResult + matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override; + +private: + LogicalResult emitConv(ConversionPatternRewriter &rewriter, + migraphx::ConvolutionOp op, Value input, + Value filter) const; +}; +} // namespace + +// Nice helper function for the linalg.generic op region +static void convBodyBuilder(OpBuilder &b, Location loc, ValueRange blockArgs) { + Value inputVal = blockArgs[0]; + Value filterVal = blockArgs[1]; + Value outputVal = blockArgs[2]; + Value mul = arith::MulFOp::create(b, loc, inputVal, filterVal); + Value add = arith::AddFOp::create(b, loc, outputVal, mul); + linalg::YieldOp::create(b, loc, add); +} + +/// Emit convolution attributes on the newly created operation. +static void emitConvAttributes(migraphx::ConvolutionOp op, Value convOp, + Attribute strides, Attribute dilation, + Attribute pad, Attribute convOpName) { + Operation *newOp = convOp.getDefiningOp(); + newOp->setAttr("pad", pad); + newOp->setAttr("group", op.getGroupAttr()); + newOp->setAttr("stride", strides); + newOp->setAttr("dilation", dilation); + + // Convert optional attributes + if (auto attr = (*op).template getAttrOfType("perf_config")) + newOp->setAttr("perf_config", attr); + newOp->setAttr("conv_op", convOpName); +} + +/// Emit a grouped convolution of any spatial rank (1D, 2D, or 3D). +/// Input shape: (batch, group, channel, spatial...), +/// filter shape: (group, filter, channel, kernel_spatial...) +/// +/// clang-format off +/// for n in batch: +/// for g in group: +/// for f in filters: +/// for oh_0 in output_spatial_0: +/// for oh_1 in output_spatial_1: +/// // ... +/// for oh_{dim-1} in output_spatial_{dim-1}: +/// for c in channels: // reduction +/// for kh_0 in kernel_spatial_0: // reduction +/// for kh_1 in kernel_spatial_1: // reduction +/// // ... +/// clang-format on +static Value emitGroupedConv(ConversionPatternRewriter &rewriter, Location loc, + RankedTensorType resultType, Value input, + Value filter, Value zero, + ArrayAttr strides, + ArrayAttr dilation) { + MLIRContext *ctx = rewriter.getContext(); + int64_t dim = cast(input.getType()).getRank() - 3; + SmallVector strideVals; + SmallVector dilationVals; + llvm::transform(strides.getValue(), std::back_inserter(strideVals), [](Attribute attr){ + return cast(attr).getInt(); + }); + llvm::transform(dilation.getValue(), std::back_inserter(dilationVals), [](Attribute attr){ + return cast(attr).getInt(); + }); + + // Iteration domain layout: + // parallel: batch, group, filter, oh_0 .. oh_{dim-1} + // reduction: channel, kh_0 .. kh_{dim-1} + int64_t totalDims = 4 + 2 * dim; + SmallVector d; + for (int64_t i = 0; i < totalDims; ++i) + d.push_back(getAffineDimExpr(i, ctx)); + + AffineExpr batch = d[0], group = d[1], filterExpr = d[2]; + AffineExpr channel = d[3 + dim]; + + SmallVector inputExprs = {batch, group, channel}; + for (int64_t i = 0; i < dim; ++i) + inputExprs.push_back(d[3 + i] * strideVals[i] + + d[4 + dim + i] * dilationVals[i]); + + SmallVector filterExprs = {group, filterExpr, channel}; + for (int64_t i = 0; i < dim; ++i) + filterExprs.push_back(d[4 + dim + i]); + + SmallVector outputExprs = {batch, group, filterExpr}; + for (int64_t i = 0; i < dim; ++i) + outputExprs.push_back(d[3 + i]); + + SmallVector indexingMaps = { + AffineMap::get(totalDims, 0, inputExprs, ctx), + AffineMap::get(totalDims, 0, filterExprs, ctx), + AffineMap::get(totalDims, 0, outputExprs, ctx)}; + + SmallVector iteratorTypes(3 + dim, + utils::IteratorType::parallel); + iteratorTypes.append(1 + dim, utils::IteratorType::reduction); + + return linalg::GenericOp::create(rewriter, loc, resultType, + ValueRange{input, filter}, zero, + indexingMaps, iteratorTypes, convBodyBuilder) + .getResult(0); +} + +LogicalResult ConvConverter::emitConv(ConversionPatternRewriter &rewriter, + migraphx::ConvolutionOp op, Value input, + Value filter) const { + // Input and filter are already in NGC* and GFC* form (group dimension + // expanded). Build the result type as NGF* (with explicit G), emit the + // grouped linalg conv (1D/2D/3D), then collapse back to NF* for the type + // converter. + Location loc = op.getLoc(); + int64_t group = op.getGroupAttr().getInt(); + int64_t dim = cast(input.getType()).getRank() - + 3; // exclude batch (N), group (G), channel (C) + assert(dim >= 1 && dim <= 3 && "this should be checked at matchAndRewrite"); + + // Result type from the op is NF*; expand to NGF* for the linalg conv. + RankedTensorType resultType = + cast(getTypeConverter()->convertType(op.getResult())); + ArrayRef resultShape = resultType.getShape(); + SmallVector newShape; + int64_t n = resultType.getDimSize(0); + int64_t newF = resultType.getDimSize(1) / group; + assert(resultType.getDimSize(1) % group == 0 && + "output channel must be divisible by group"); + newShape.push_back(n); + newShape.push_back(group); + newShape.push_back(newF); + newShape.insert(newShape.end(), std::next(resultShape.begin(), 2), + resultShape.end()); + auto newResultType = + RankedTensorType::get(newShape, resultType.getElementType()); + Value zero = arith::ConstantOp::create(rewriter, loc, newResultType, + rewriter.getZeroAttr(newResultType)); + + ArrayAttr strides = op.getStride(); + ArrayAttr dilation =op.getDilation(); + + rock::LinalgConvType convLayout = + (dim == 1) ? rock::LinalgConvType::Conv1dNgchGfch + : (dim == 2) ? rock::LinalgConvType::Conv2dNgchwGfchw + : rock::LinalgConvType::Conv3dNgchwdGfchwd; + auto resultConvOpName = + rock::LinalgConvTypeAttr::get(rewriter.getContext(), convLayout); + Value result = emitGroupedConv(rewriter, loc, newResultType, input, filter, + zero, strides, dilation); + + emitConvAttributes(op, result, strides, dilation, + op.getPaddingAttr(), + resultConvOpName); + + // we must reshape the operand to what the type converter expects + SmallVector reassociation{{0}, {1, 2}}; + llvm::for_each(llvm::seq(3, dim + 3), + [&](int64_t index) { reassociation.push_back({index}); }); + auto finalResult = + tensor::CollapseShapeOp::create(rewriter, loc, result, reassociation); + + rewriter.replaceOp(op, finalResult); + return success(); +} + +LogicalResult +ConvConverter::matchAndRewrite(migraphx::ConvolutionOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const { + // Forward convolution is lowered in three steps: + // 1. Apply padding to the input when the op has non-zero padding. + // 2. Expand the channel dimension into (group, channel_per_group), + // introducing + // a group dimension G. Input becomes NGC* (e.g. NGCL, NGCHW, NGCDHW) and + // filter becomes GFC* (e.g. GFCL, GFCHW, GFCDHW), matching the group attr. + // 3. Emit the grouped linalg convolution (1D/2D/3D), then collapse the + // result back to the original NFHW/NFDHW shape for the type converter. + Location loc = op.getLoc(); + Value input = adaptor.getInput(); + Value filter = adaptor.getFilter(); + ArrayAttr padAttr = adaptor.getPaddingAttr(); + RankedTensorType inputType = cast(input.getType()); + int64_t dim = inputType.getRank() - 2; + int64_t group = op.getGroupAttr().getInt(); + + if (dim > 3 || dim < 1) { + return op.emitError(Twine(dim) + "D conv is not supported for now"); + } + + // For now, the linalg.generic region doesn't support type casting, + // so we emit an error for now + + if (inputType.getElementType() != op.getFilter().getType().getElementType() || + inputType.getElementType() != op.getResult().getType().getElementType()) { + return op.emitError( + "type casting between operands and result is unsupported for now"); + } + + // Step 1: apply padding when any padding value is non-zero. + if (!llvm::all_of(padAttr, [](Attribute pad) { + return cast(pad).getValue() == 0; + })) { + // Apply symmetric padding to spatial dimensions. + SmallVector low(inputType.getRank(), + rewriter.getIndexAttr(0)); + SmallVector high(inputType.getRank(), + rewriter.getIndexAttr(0)); + // insert padding to inputs + assert(2 * dim == (int64_t)padAttr.size() && "padding is symmetric"); + + // MIGraphX padAttr is [dim0_low, dim1_low,..., dim0_high, dim1_high, ...] + SmallVector newShape(inputType.getShape()); + auto lowAttrs = padAttr.getValue().drop_back(dim); + auto highAttrs = padAttr.getValue().drop_front(dim); + // The first spatial dimension (H) is always located at index 2 in the + // NC* layout (after batch and channel), regardless of convolution rank. + int64_t dimHOffset = 2; + llvm::for_each(llvm::seq(dim), [&](int64_t index) { + int64_t lowPad = cast(lowAttrs[index]).getInt(); + int64_t highPad = cast(highAttrs[index]).getInt(); + newShape[dimHOffset + index] += lowPad + highPad; + low[dimHOffset + index] = rewriter.getIndexAttr(lowPad); + high[dimHOffset + index] = rewriter.getIndexAttr(highPad); + }); + + RankedTensorType newInputType = + RankedTensorType::get(newShape, inputType.getElementType()); + Value padValue = arith::ConstantOp::create( + rewriter, loc, rewriter.getZeroAttr(inputType.getElementType())); + input = tensor::PadOp::create(rewriter, loc, newInputType, input, low, high, + padValue) + .getResult(); + } + + auto expandGroupDim = [&](Value input, bool isFilter) -> Value { + RankedTensorType originalType = cast(input.getType()); + ArrayRef originalShape = originalType.getShape(); + SmallVector newShape; + + if (isFilter) { + // FCHW into GFCHW + int64_t newF = originalType.getDimSize(0) / group; + assert(originalType.getDimSize(0) % group == 0 && + "output channel must be divisible by group"); + newShape.push_back(group); + newShape.push_back(newF); + newShape.push_back(originalType.getDimSize(1)); + newShape.insert(newShape.end(), std::next(originalShape.begin(), 2), + originalShape.end()); + RankedTensorType newType = + RankedTensorType::get(newShape, originalType.getElementType()); + + SmallVector reassociation; + reassociation.push_back({0, 1}); + llvm::for_each(llvm::seq(2, dim + 3), + [&](int64_t i) { reassociation.push_back({i}); }); + return tensor::ExpandShapeOp::create(rewriter, loc, newType, input, + reassociation); + } else { + // Convert NCHW into NGCHW + int64_t newC = originalType.getDimSize(1) / group; + assert(originalType.getDimSize(1) % group == 0 && + "input channel must be divisible by group"); + newShape.push_back(originalType.getDimSize(0)); + newShape.push_back(group); + newShape.push_back(newC); + newShape.insert(newShape.end(), std::next(originalShape.begin(), 2), + originalShape.end()); + + RankedTensorType newType = + RankedTensorType::get(newShape, originalType.getElementType()); + SmallVector reassociation; + reassociation.push_back({0}); + reassociation.push_back({1, 2}); + llvm::for_each(llvm::seq(3, dim + 3), + [&](int64_t i) { reassociation.push_back({i}); }); + return tensor::ExpandShapeOp::create(rewriter, loc, newType, input, + reassociation); + } + }; + + // Step 2: expand group dimension (NCHW -> NGCHW, FCHW -> GFCHW). We + // want expand in group dimension because linalg.conv2d_ngchw_gfchw + // expects the layout to have the group dimension. It also makes for + // a nicer linalg.generic loop + input = expandGroupDim(input, false); + filter = expandGroupDim(filter, true); + // Step 3: emit linalg conv and collapse result to match type converter. + return emitConv(rewriter, op, input, filter); +} + // TODO: add support for scaled gemms, and migraphx::DeQuantizeLinearConverter //===----------------------------------------------------------------------===// // Base kernels (gemm) @@ -396,13 +698,18 @@ void mlir::migraphx::populateMIGraphXToLinalgConversionPatterns( ElementwiseConverter, ElementwiseConverter, ElementwiseConverter, - ReluConverter, ClipConverter>(converter, patterns.getContext()); + ReluConverter, ClipConverter, ConvConverter>(converter, + patterns.getContext()); } void mlir::migraphx::populateMIGraphXFuncBoundaryToLinalgConversionPatterns( RewritePatternSet &patterns, TypeConverter &typeConverter) { patterns.add( typeConverter, patterns.getContext()); + + // mhal.launch can be generated through rocmlir-gen, so we need a way to + // legalize it + populateMIGraphXToLinalgMHALLauncherConversion(patterns, typeConverter); populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, typeConverter); populateReturnOpTypeConversionPattern(patterns, typeConverter); populateCallOpTypeConversionPattern(patterns, typeConverter); diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp index 8206d2d98be1..abb44768253b 100644 --- a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp +++ b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalgPass.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/Rock/IR/Rock.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" @@ -52,6 +53,11 @@ void mlir::migraphx::populateMIGraphXToLinalgBoundaryDialectConversion( target.addDynamicallyLegalOp([&](func::FuncOp op) { return typeConverter.isSignatureLegal(op.getFunctionType()); }); + target.addDynamicallyLegalOp( + [=](mhal::LaunchOp op) -> std::optional { + return typeConverter.isLegal(op.getResultTypes()) && + typeConverter.isLegal(op.getOperandTypes()); + }); target.addDynamicallyLegalOp( [&](func::ReturnOp op) { return typeConverter.isLegal(op); }); target.addDynamicallyLegalOp( @@ -91,6 +97,8 @@ void MIGraphXToLinalgPass::runOnOperation() { boundaryConversionTarget, boundaryTypeConverter); migraphx::populateMIGraphXFuncBoundaryToLinalgConversionPatterns( boundaryPattern, boundaryTypeConverter); + migraphx::populateMIGraphXToLinalgMHALLauncherConversion( + boundaryPattern, boundaryTypeConverter); if (failed(applyPartialConversion(func, boundaryConversionTarget, std::move(boundaryPattern)))) { return signalPassFailure(); diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp index 7f38dc70c9e3..5c9aeb564a41 100644 --- a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp +++ b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp @@ -10,8 +10,8 @@ // These rewriters lower from the MIGraphX to the Tos dialect. // //===----------------------------------------------------------------------===// - #include "mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h" +#include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" @@ -1527,3 +1527,7 @@ void mlir::migraphx::populateMIGraphXFuncBoundaryToTosaConversionPatterns( populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, typeConverter); populateCallOpTypeConversionPattern(patterns, typeConverter); } +void mlir::migraphx::populateMIGraphXToLinalgMHALLauncherConversion( + RewritePatternSet &patterns, TypeConverter &typeConverter) { + patterns.add(typeConverter, patterns.getContext()); +} diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir new file mode 100644 index 000000000000..849a2aa7bee4 --- /dev/null +++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv.cpu.mlir @@ -0,0 +1,44 @@ +// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand_type float -rand_min 0 -rand_max 0 -fut conv_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=GOLD +// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH + +/// README - There are essentially two tests (BOTH, and GOLD). +/// BOTH checks if the tosa pipeline gives the same value (given the +/// same seed) as the linalg pipeline. They will pass if both of them +/// returns the same value. GOLD checks if the output for the linalg pipeline +/// matches an equivalent pytorch implementation. + +/// Gold value computed as the following: +/// +/// # These patterns are from the rocmlir-gen +/// pattern = torch.tensor([0.5, -1.0, 0.75], dtype=torch.float32) +/// flat_x = torch.tensor([pattern[i % 3].item() for i in range(750)], dtype=torch.float32) +/// x_nchwd = flat_x.reshape(2, 3, 5, 5, 5) # N, C, H, W, D +/// x = x_nchwd.permute(0, 1, 4, 2, 3) # -> (N, C, D, H, W) +/// +/// flat_w = torch.tensor([pattern[i % 3].item() for i in range(96)], dtype=torch.float32) +/// w_fchwd = flat_w.reshape(4, 3, 2, 2, 2) # F, C, H, W, D +/// weight = w_fchwd.permute(0, 1, 4, 2, 3) # -> (F, C, kD, kH, kW) +/// out = torch.nn.functional.conv3d( +/// x, weight, +/// stride=(2, 2, 2), +/// dilation=(2, 2, 2), +/// padding=0, +/// groups=1, +/// ) +/// out_nfhwd = out.permute(0, 1, 3, 4, 2) +/// flat_out = out_nfhwd.reshape(-1) +/// +/// print("Full 64:", flat_out.tolist()) +/// Outputs: +/// Full 64: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625] + + +module{ + // BOTH: [6.09101, 7.06269, 5.96599, 7.63177, 5.83172, 5.96893, 5.16868, 6.0204, 6.80761, 6.78844, 5.75672, 7.33505, 5.417{{.*}}, 6.04153, 5.14715, 6.728{{.*}}, 7.30343, 7.90745, 6.73162, 8.21738, 5.65554, 7.37453, 6.6329, 6.6093, 5.2816, 6.17693, 5.19904, 6.38292, 4.55713, 4.62921, 4.72307, 5.47466, 4.551, 6.15787, 4.97358, 5.89798, 5.10684, 6.01542, 5.18933, 5.58596, 5.22862, 7.13881, 4.88134, 5.56315, 5.52007, 6.27824, 4.93779, 5.71044, 6.27934, 7.51976, 5.23159, 7.17014, 6.74235, 5.59631, 5.33666, 6.20902, 4.95302, 5.26817, 4.50571, 5.17464, 4.49137, 4.80133, 3.39298, 4.92709] + // GOLD: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625] + func.func @conv(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { + %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 1 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <4x3x2x2x2xf32, 24x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1> + return %0 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> + } +} diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group.cpu.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group.cpu.mlir new file mode 100644 index 000000000000..a9658468d92f --- /dev/null +++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv1d-group.cpu.mlir @@ -0,0 +1,11 @@ +// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH + +// Only a small subset of the array is checked because it is quite huge + +// BOTH: [2.94651, 3.09122, 3.86495, 4.54138, 3.18018, 4.06578, 2.97265, 4.05155, 2.35716, 4.26762, 3.49153, 4.14329, 3.82529, 5.43395, 4.66598, 4.98826, 4.41554, 5.15631, 3.91766, 4.79236, 4.52993, 4.25152, 4.87812, 5.10546, 4.19679, 5.1306, 4.2836, 3.7857, 5.21429, 4.6504, 4.83997, 3.91648, 5.86651, 4.76546, 5.00734, 5.18668, 5.38386, 4.1707, 5.43972, 5.57541, 5.33734, 5.14293, 4.10719, 5.32505, 4.39825 +func.func @conv_1d_group(%in: !migraphx.shaped<10x8x123xf32, 984x123x1>, %fil: !migraphx.shaped<12x2x7xf32, 14x7x1>) -> !migraphx.shaped<10x12x53xf32, 636x53x1> { + %out = migraphx.convolution %in, %fil {dilation = [4], group = 4 : i64, padding = [3,3], padding_mode = 0 : i64, stride = [2]} : + <10x8x123xf32, 984x123x1>, <12x2x7xf32, 14x7x1> -> <10x12x53xf32, 636x53x1> + func.return %out : !migraphx.shaped<10x12x53xf32, 636x53x1> +} diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group.cpu.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group.cpu.mlir new file mode 100644 index 000000000000..bc36d3956044 --- /dev/null +++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv2d-group.cpu.mlir @@ -0,0 +1,12 @@ +// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH + +// Here we are checking to see if conv_2d with non standard stride, dilation, and a group parameter matches the existing tosa pipeline +// Note - this array is quite large, so we are only checking a small subset + +// BOTH: [5.83007, 7.83374, 8.46274, 9.03237, 6.51391, 7.75809, 9.73003, 8.48013, 8.15419, 9.9975, 7.50244, 7.11982, 6.58057, 7.40089, 7.71545, 9.73616, 7.74541, 8.08335, 7.91827, 8.001{{.*}}, 9.33702, 11.0582, 9.34619, 10.305, 8.82474, 10.8324, 10.3826, 9.73949, 11.7825, 9.81817, 8.47468, 8.90449, 9.19788, 10.373, 10.2517, 9.64079, 9.87895, 11.9531, 8.59595, 8.78564, 9.26618, 9.2312, 8.38519, 8.64322, 9.76614, 8.41956, 8.74126, 9.29434, 9.50276, 8.11855, 9.82343, 10.0092, 10.0752, 9.29225, 11.1891, 9.088{{.*}}, 9.75943, 8.79682, 9.60196, 8.71861, 9.83224, 9.29888, 8.44989, 8.82743, 10.4409, 8.31476, 9.59674, 8.74762, 10.2553, 9.95829, 10.0612, 9.25078, 9.32061, 10.5277, 8.74543, 9.62819, 8.38384, 9.35403, 9.30592, 9.60566, 10.4934 +func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> { + %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : + <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1> + func.return %out : !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> +} diff --git a/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group.cpu.mlir b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group.cpu.mlir new file mode 100644 index 000000000000..6e48bc6214c2 --- /dev/null +++ b/mlir/test/Conversion/MIGraphXToLinalg/e2e/migraphx-to-linalg-conv3d-group.cpu.mlir @@ -0,0 +1,9 @@ +// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH + +// BOTH: [9.78569, 8.8887, 12.9401, 10.9686, 8.19386, 11.9315, 17.5043, 11.5946, 18.9063, 16.2578, 13.171, 17.1096, 11.1845, 8.23256, 12.6896, 11.3629, 7.74514, 11.8208, 9.7909, 5.80301, 12.0013, 10.5879, 6.19064, 10.4416, 15.567, 10.1144, 17.6427, 15.261, 11.4259, 15.8709, 11.25, 8.493{{.*}}, 14.3779, 10.9948, 7.25571, 13.0338, 21.26{{.*}}, 15.4558, 21.4179, 22.3508, 16.92, 26.6663, 35.4665, 25.8853, 34.06{{.*}}, 34.5179, 26.577, 37.9682, 25.5063, 15.7656, 26.3552, 21.6613, 18.2474 +func.func @conv_3d(%in: !migraphx.shaped<10x8x12x13x14xf32, 17472x2184x182x14x1>, %fil: !migraphx.shaped<12x8x2x3x4xf32, 192x24x12x4x1>) -> !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> { + %out = migraphx.convolution %in, %fil {dilation = [3, 4, 5], group = 1 : i64, padding = [2, 3, 4, 2, 3, 4], padding_mode = 0 : i64, stride = [1, 2, 3]} : + <10x8x12x13x14xf32, 17472x2184x182x14x1>, <12x8x2x3x4xf32, 192x24x12x4x1> -> <10x12x13x6x3xf32, 2808x234x18x3x1> + func.return %out : !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> +} diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir new file mode 100644 index 000000000000..b1918ae6aff9 --- /dev/null +++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-conv.mlir @@ -0,0 +1,84 @@ +// RUN: rocmlir-opt -split-input-file --migraphx-to-linalg -verify-diagnostics %s | FileCheck %s + +// CHECK: #[[map:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> +// CHECK: #[[map1:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +// CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d( +// CHECK: linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins +// CHECK-SAME: attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} +// CHECK-DAG: ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32) +// CHECK-DAG: %[[three:.*]] = arith.mulf %[[in]], %[[in_5]] +// CHECK-DAG: %[[four:.*]] = arith.addf %[[out]], %[[three]] +// CHECK-DAG: linalg.yield %[[four]] +func.func @conv_3d(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>, %arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { + %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 1 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <4x3x2x2x2xf32, 24x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1> + %1 = migraphx.add %0, %arg0 : <2x4x2x2x2xf32, 32x8x4x2x1>, <2x4x2x2x2xf32, 32x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1> + return %1 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> +} + +// ----- + +// CHECK: #map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 4 + d6 * 2, d4 * 5 + d7 * 3)> +// CHECK: #map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +// CHECK: #map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> + +// CHECK-LABEL: func.func @conv_2d( +// CHECK: linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} +// CHECK-SAME: attrs = {conv_op = #rock, dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]} +// CHECK-DAG: ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32) +// CHECK-DAG: %[[three:.*]] = arith.mulf %[[in]], %[[in_5]] +// CHECK-DAG: %[[four:.*]] = arith.addf %[[out]], %[[three]] +// CHECK-DAG: linalg.yield %[[four]] +func.func @conv_2d(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> { + %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : + <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1> + func.return %out : !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> +} + +// ----- +// CHECK: #[[map:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +// CHECK: #[[map1:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +// CHECK: #[[map2:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: func.func @conv_1d( +// CHECK: linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} +// CHECK-SAME: attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]} +// CHECK-DAG: ^bb0(%[[in:.*]]: f32, %[[in_5:.*]]: f32, %[[out:.*]]: f32) +// CHECK-DAG: %[[three:.*]] = arith.mulf %[[in]], %[[in_5]] +// CHECK-DAG: %[[four:.*]] = arith.addf %[[out]], %[[three]] +// CHECK-DAG: linalg.yield %[[four]] +func.func @conv_1d(%arg0: !migraphx.shaped<1x64x224xf32, 14336x224x1>, %arg1: !migraphx.shaped<1x3x224xf32, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf32, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> { + %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf32, 672x224x1>, <64x3x7xf32, 21x7x1> -> <1x64x224xf32, 14336x224x1> + return %0 : !migraphx.shaped<1x64x224xf32, 14336x224x1> +} + +// ----- + +// Currently, we don't support type casting +func.func @conv_1d_different_types(%arg1: !migraphx.shaped<1x3x224xf16, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf16, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> { + // expected-error @+2 {{type casting between operands and result is unsupported for now}} + // expected-error @+1 {{failed to legalize operation}} + %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf16, 672x224x1>, <64x3x7xf16, 21x7x1> -> <1x64x224xf32, 14336x224x1> + return %0 : !migraphx.shaped<1x64x224xf32, 14336x224x1> +} + +// ----- + +// Checking for the perf_config, dilation, strides, and pad attributes + +// CHECK-LABEL: func.func @mlir_convolution_add( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor{{.*}} +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[cst:.*]] = arith.constant +// CHECK-DAG: %[[zero:.*]] = linalg.generic {{.*}} ins(%[[expanded_1]], %[[expanded_2]] : tensor{{.*}}) outs(%[[cst]] : tensor{{.*}}) +// CHECK-SAME: attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[zero]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +func.func @mlir_convolution_add(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> attributes {kernel, arch="gfx950"}{ + %0 = migraphx.convolution %arg1, %arg2 {perf_config="v3:16,32,4,16,16,4,4,1,2,1,1", dilation = [2, 2, 2], group = 1 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <4x3x2x2x2xf32, 24x8x4x2x1> -> <2x4x2x2x2xf32, 32x8x4x2x1> + return %0 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> +} diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir index 88abbb598b3a..aaa661e6ccbb 100644 --- a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir +++ b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir @@ -60,12 +60,6 @@ func.func @func_quant_convolution(%arg0: !migraphx.shaped<1x1xi8, 1x1>, %arg1: ! func.return } -func.func @func_convolution(%arg0: !migraphx.shaped<1x1xf32, 1x1>, %arg1: !migraphx.shaped<1x1xf32, 1x1>) { - // expected-error @+1{{failed to legalize operation 'migraphx.convolution'}} - migraphx.convolution %arg0, %arg1 {dilation = [1, 1], group = 1 : i64, padding = [0, 0], stride = [1, 1]}: <1x1xf32, 1x1>, <1x1xf32, 1x1> -> <1x1xf32, 1x1> - func.return -} - func.func @func_backwards_data_convolution(%arg0: !migraphx.shaped<1x1xf32, 1x1>, %arg1: !migraphx.shaped<1x1xf32, 1x1>) { // expected-error @+1{{failed to legalize operation 'migraphx.backwards_data_convolution'}} migraphx.backwards_data_convolution %arg0, %arg1 {dilation = [1, 1], group = 1 : i64, padding = [0, 0], stride = [1, 1]}: <1x1xf32, 1x1>, <1x1xf32, 1x1> -> <1x1xf32, 1x1> diff --git a/mlir/tools/rocmlir-driver/rocmlir-driver.cpp b/mlir/tools/rocmlir-driver/rocmlir-driver.cpp index db74b31758b0..f2abfc6cb254 100644 --- a/mlir/tools/rocmlir-driver/rocmlir-driver.cpp +++ b/mlir/tools/rocmlir-driver/rocmlir-driver.cpp @@ -57,11 +57,11 @@ static cl::opt kernelPipeline( "gpu,rocdl,binary or full"), cl::init("")); -static cl::opt - hostPipeline("host-pipeline", cl::desc("rocmlir-driver host pipeline list"), - cl::value_desc("comma separated list of rock pipelines: " - "migraphx,highlevel,mhal,runner or full"), - cl::init("")); +static cl::opt hostPipeline( + "host-pipeline", cl::desc("rocmlir-driver host pipeline list"), + cl::value_desc("comma separated list of rock pipelines: " + "migraphx,migraphx-linalg,highlevel,mhal,runner or full"), + cl::init("")); static cl::opt legacyRockPipeline("c", cl::Hidden, cl::init(false), cl::Optional, @@ -279,8 +279,8 @@ static LogicalResult runMLIRPasses(ModuleOp &module, } } - llvm::SmallDenseSet hostPipelineOptions{"migraphx", "highlevel", - "mhal", "runner"}; + llvm::SmallDenseSet hostPipelineOptions{ + "migraphx", "highlevel", "mhal", "runner", "migraphx-linalg"}; llvm::SmallDenseSet hostPipelineSet; std::string hostPipelineStr = hostPipeline.getValue(); if (failed(parsePipeline(hostPipelineStr, hostPipelineSet, @@ -288,9 +288,11 @@ static LogicalResult runMLIRPasses(ModuleOp &module, return failure(); } - if (hostPipelineSet.contains("migraphx")) { + if (hostPipelineSet.contains("migraphx") || + hostPipelineSet.contains("migraphx-linalg")) { PassManager pm(module->getName(), PassManager::Nesting::Implicit); - migraphx::addHighLevelPipeline(pm); + bool lowerFromLinalg = hostPipelineSet.contains("migraphx-linalg"); + migraphx::addHighLevelPipeline(pm, lowerFromLinalg); if (failed(pm.run(module))) { return failure(); }