diff --git a/include/Conversion/AffineToNeura/LoopNestAnalysis.h b/include/Conversion/AffineToNeura/LoopNestAnalysis.h new file mode 100644 index 00000000..67517371 --- /dev/null +++ b/include/Conversion/AffineToNeura/LoopNestAnalysis.h @@ -0,0 +1,70 @@ +#ifndef CONVERSION_AFFINE_TO_NEURA_LOOP_NEST_ANALYSIS_H +#define CONVERSION_AFFINE_TO_NEURA_LOOP_NEST_ANALYSIS_H + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Operation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include + +namespace mlir { +namespace neura { + +/// Loop information structure - Stores all analysis information for a single loop. +struct LoopInfo { + affine::AffineForOp loop; // The loop operation itself. + LoopInfo *parent = nullptr; // Parent loop (nullptr if top-level). + llvm::SmallVector children; // Child loops list. + unsigned depth = 0; // Nesting depth (0=top-level). + bool is_perfect_nest = true; // Whether it is a perfect nest. + + // Operations list for imperfect nesting. + llvm::SmallVector operations_before_child; // Operations before child loops. + llvm::SmallVector operations_after_child; // Operations after child loops. + + LoopInfo(affine::AffineForOp loop) : loop(loop) {} +}; + +/// Loop nest analysis class. +/// +/// Purpose: Provides loop hierarchy information for AffineToNeura pass to support optimization decisions. +/// +/// Usage example: +/// LoopNestAnalysis analysis(func_op); +/// analysis.dump(); // Prints analysis results. +/// LoopInfo *info = analysis.getLoopInfo(loop); +/// if (info && info->parent) { +/// // This is a nested loop, can reuse parent's valid signal. +/// } +class LoopNestAnalysis { +public: + /// Constructor - Performs loop nest analysis on the given function. + explicit LoopNestAnalysis(func::FuncOp func); + + /// Query interfaces. + LoopInfo *getLoopInfo(affine::AffineForOp loop) const; // Gets loop information. + llvm::ArrayRef getTopLevelLoops() const { return topLevelLoops; } // Gets top-level loops. + llvm::ArrayRef> getAllLoops() const { return allLoops; } // Gets all loops. + bool isPerfectNest(affine::AffineForOp loop) const; // Checks if perfect nest. + LoopInfo *getParentLoop(affine::AffineForOp loop) const; // Gets parent loop. + llvm::ArrayRef getChildLoops(affine::AffineForOp loop) const; // Gets child loops. + + /// Debug interface - Prints analysis results. + void dump() const; + +private: + /// Internal analysis methods. + void buildLoopNestTree(func::FuncOp func); // Builds loop hierarchy tree. + void analyzePerfectNests(); // Analyzes perfect nest characteristics. + + /// Data members. + llvm::DenseMap loopMap; // Loop fast lookup table. + llvm::SmallVector, 8> allLoops; // All loops (owns ownership). + llvm::SmallVector topLevelLoops; // Top-level loop pointers list. +}; + +} // namespace neura +} // namespace mlir + +#endif diff --git a/include/Conversion/ConversionPasses.h b/include/Conversion/ConversionPasses.h index 30cbf0e8..15f9b2d6 100644 --- a/include/Conversion/ConversionPasses.h +++ b/include/Conversion/ConversionPasses.h @@ -18,6 +18,7 @@ std::unique_ptr createLowerArithToNeuraPass(); std::unique_ptr createLowerLlvmToNeuraPass(); std::unique_ptr createLowerMemRefToNeuraPass(); std::unique_ptr createLowerBuiltinToNeuraPass(); +std::unique_ptr createLowerAffineToNeuraPass(); #define GEN_PASS_REGISTRATION #include "Conversion/ConversionPasses.h.inc" diff --git a/include/Conversion/ConversionPasses.td b/include/Conversion/ConversionPasses.td index 2e79dd96..7044b9ad 100644 --- a/include/Conversion/ConversionPasses.td +++ b/include/Conversion/ConversionPasses.td @@ -32,4 +32,16 @@ def LowerBuiltinToNeura : Pass<"lower-builtin-to-neura", "ModuleOp">{ let constructor = "mlir::createLowerBuiltinToNeuraPass()"; } +def LowerAffineToNeura : Pass<"lower-affine-to-neura", "func::FuncOp">{ + let summary = "Lower Affine perfect nested loops to Neura loop_control operations"; + let description = [{ + Converts perfectly nested affine.for loops directly to Neura dialect using + loop_control operations, avoiding the need to flatten to LLVM IR first. + This preserves loop structure information for better optimization on + dataflow architectures. + }]; + let constructor = "mlir::createLowerAffineToNeuraPass()"; + let dependentDialects = ["mlir::neura::NeuraDialect", "mlir::affine::AffineDialect"]; +} + #endif // CONVERSION_PASSES_TD \ No newline at end of file diff --git a/include/NeuraDialect/Architecture/Architecture.h b/include/NeuraDialect/Architecture/Architecture.h index ca7a4951..bae2db90 100644 --- a/include/NeuraDialect/Architecture/Architecture.h +++ b/include/NeuraDialect/Architecture/Architecture.h @@ -57,7 +57,9 @@ enum OperationKind { // Loop control operations. ILoopControl = 34, // Constant operations. - IConstant = 35 + IConstant = 35, + // Steering control fused operations. + ICarryInvariant = 36, IConditionalSelect = 37, IInvariantGroup = 38 }; //===----------------------------------------------------------------------===// diff --git a/include/NeuraDialect/Mapping/mapping_util.h b/include/NeuraDialect/Mapping/mapping_util.h index 8c301aa1..cf85d2a2 100644 --- a/include/NeuraDialect/Mapping/mapping_util.h +++ b/include/NeuraDialect/Mapping/mapping_util.h @@ -12,6 +12,10 @@ OperationKind getOperationKindFromMlirOp(Operation *op); // Returns true if the operation does not need CGRA tile placement. bool is_non_materialized(Operation *op); +// Returns true if the operation is a steering-mode operation that doesn't +// require DataMovOp wrapping (e.g., constants, carry, invariant, etc.). +bool is_steering_unwrapped_op(Operation *op); + // Returns true if the operation is a materialized reserve user, i.e., // phi, invariant, carry. bool isMaterializedReserveUser(Operation *op); diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index 3d70af2c..d7f4974a 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -134,4 +134,5 @@ def RemovePredicatedType : Pass<"remove-predicated-type", "ModuleOp"> { }]; let constructor = "neura::createRemovePredicatedTypePass()"; } + #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp new file mode 100644 index 00000000..39051720 --- /dev/null +++ b/lib/Conversion/AffineToNeura/AffineToNeuraPass.cpp @@ -0,0 +1,593 @@ +#include "Common/AcceleratorAttrs.h" +#include "Conversion/ConversionPasses.h" +#include "Conversion/AffineToNeura/LoopNestAnalysis.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Region.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/IR/Visitors.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraOps.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace mlir; +using namespace mlir::neura; +using namespace mlir::func; + +#define GEN_PASS_DEF_LOWERAFFINETONEURA +#include "Conversion/ConversionPasses.h.inc" + +namespace { +// Converts an AffineMap to explicit index computations using Neura operations. +// This function handles the expansion of affine expressions into arithmetic ops. +// +// Example 1 - Simple dimension access: +// Before: affine_map<(d0, d1) -> (d0, d1)> with operands (%i, %j) +// After: Returns [%i, %j] directly +// +// Example 2 - Constant offset: +// Before: affine_map<(d0) -> (d0 + 5)> with operand %i +// After: %c5 = neura.constant 5 : index +// %result = neura.add %i, %c5 : index +// Returns [%result] +// +// Example 3 - Complex expression: +// Before: affine_map<(d0, d1) -> (d0 * 2 + d1)> with operands (%i, %j) +// After: %c2 = neura.constant 2 : index +// %mul = neura.mul %i, %c2 : index +// %result = neura.add %mul, %j : index +// Returns [%result] +LogicalResult convertAffineMapToIndices(AffineMap map, ValueRange map_operands, + Location loc, PatternRewriter &rewriter, + SmallVector &new_indices) { + new_indices.clear(); + new_indices.reserve(map.getNumResults()); + for (AffineExpr expr : map.getResults()) { + if (AffineConstantExpr const_expr = dyn_cast(expr)) { + IndexType index_type = rewriter.getIndexType(); + IntegerAttr value_attr = + rewriter.getIntegerAttr(index_type, const_expr.getValue()); + new_indices.push_back(rewriter.create( + loc, index_type, value_attr)); + } else if (AffineDimExpr dim_expr = dyn_cast(expr)) { + if (dim_expr.getPosition() >= map.getNumDims() || + dim_expr.getPosition() >= + map_operands + .size()) { // Checks against mapOperands size for safety. + return failure(); + } + new_indices.push_back(map_operands[dim_expr.getPosition()]); + } else if (AffineSymbolExpr sym_expr = dyn_cast(expr)) { + unsigned symbol_operand_index = map.getNumDims() + sym_expr.getPosition(); + if (symbol_operand_index >= map_operands.size()) { + return failure(); + } + new_indices.push_back(map_operands[symbol_operand_index]); + } else { + // For more complex affine expressions (e.g., d0 + c1), + // expands them into explicit Neura arithmetic operations. + // Supports: Add, Mul, Mod, FloorDiv, CeilDiv. + llvm::errs() << "[affine2neura] Expanding complex affine expression: " + << expr << "\n"; + + // Helper lambda: recursively expands AffineExpr to Value. + std::function expandExpr = + [&](AffineExpr e) -> Value { + // Constant expression. + if (auto const_expr = dyn_cast(e)) { + return rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), + const_expr.getValue())); + } + // Dimension expression. + else if (auto dim_expr = dyn_cast(e)) { + return map_operands[dim_expr.getPosition()]; + } + // Symbol expression. + else if (auto sym_expr = dyn_cast(e)) { + unsigned symbol_operand_index = + map.getNumDims() + sym_expr.getPosition(); + return map_operands[symbol_operand_index]; + } + // Binary operation expression. + else if (auto bin_expr = dyn_cast(e)) { + Value lhs = expandExpr(bin_expr.getLHS()); + Value rhs = expandExpr(bin_expr.getRHS()); + + switch (bin_expr.getKind()) { + case AffineExprKind::Add: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::Mul: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::Mod: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::FloorDiv: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::CeilDiv: { + // ceildiv(a, b) = floordiv(a + b - 1, b). + Value one = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), 1)); + Value b_minus_1 = rewriter.create( + loc, rewriter.getIndexType(), rhs, one).getResult(); + Value numerator = rewriter.create( + loc, rewriter.getIndexType(), lhs, b_minus_1).getResult(); + return rewriter.create( + loc, rewriter.getIndexType(), numerator, rhs).getResult(); + } + default: + llvm::errs() << "[affine2neura] Unsupported binary op kind: " + << static_cast(bin_expr.getKind()) << "\n"; + return Value(); + } + } + + llvm::errs() << "[affine2neura] Unsupported affine expression type\n"; + return Value(); + }; + + Value expanded = expandExpr(expr); + if (!expanded) { + // Fallback: if expansion fails, use affine.apply (ensures correctness). + llvm::errs() << "[affine2neura] Failed to expand, using affine.apply\n"; + AffineMap single_result_map = AffineMap::get( + map.getNumDims(), map.getNumSymbols(), expr, rewriter.getContext()); + expanded = rewriter.create( + loc, single_result_map, map_operands); + } + new_indices.push_back(expanded); + } + } + return success(); +} + +// Converts affine.load to neura.load_indexed. +// Expands the affine map into explicit index computations. +// +// Example 1 - Simple 2D array access: +// Before: %val = affine.load %A[%i, %j] : memref<10x20xf32> +// After: %val = neura.load_indexed %A[%i, %j : index, index] memref<10x20xf32> : f32 +// +// Example 2 - With affine expression: +// Before: %val = affine.load %A[%i * 2 + 1, %j] : memref<100x100xf32> +// After: %c2 = neura.constant 2 : index +// %c1 = neura.constant 1 : index +// %mul = neura.mul %i, %c2 : index +// %idx0 = neura.add %mul, %c1 : index +// %val = neura.load_indexed %A[%idx0, %j : index, index] memref<100x100xf32> : f32 +struct AffineLoadLowering : public OpRewritePattern { + AffineLoadLowering(MLIRContext *context) + : OpRewritePattern(context, /*benefit=*/1) {} + + LogicalResult matchAndRewrite(affine::AffineLoadOp load_op, + PatternRewriter &rewriter) const override { + Location loc = load_op.getLoc(); + auto memref = load_op.getMemref(); + AffineMap map = load_op.getAffineMap(); + ValueRange map_operands = load_op.getMapOperands(); + // Gets the indices for the load operation. + SmallVector new_indices; + if (failed(convertAffineMapToIndices(map, map_operands, loc, rewriter, + new_indices))) { + return load_op.emitError( + "[affine2neura] Failed to convert affine map to indices"); + } + + MemRefType memref_type = dyn_cast(memref.getType()); + if (!memref_type) { + return load_op.emitError( + "[affine2neura] Base of load is not a MemRefType"); + } + if (new_indices.size() != static_cast(memref_type.getRank())) { + return load_op.emitError( + "[affine2neura] Number of indices from affine map (") + << new_indices.size() << ") does not match memref rank (" + << memref_type.getRank() << ")"; + } + + // NOTE: No explicit dimension limit is enforced here. The lowering supports + // arbitrary dimensions theoretically. For CGRA hardware with limited address + // generation units, dimension constraints should be handled at a later stage + // (e.g., during mapping or hardware-specific lowering passes). + + // Creates the neura.load_indexed operation. + LoadIndexedOp new_load_op = rewriter.create( + loc, load_op.getType(), memref, ValueRange{new_indices}); + + rewriter.replaceOp(load_op, new_load_op.getResult()); + return success(); + } +}; + +// Converts affine.store to neura.store_indexed. +// Similar to AffineLoadLowering, expands affine maps into explicit indices. +// +// Example 1 - Simple store: +// Before: affine.store %val, %A[%i, %j] : memref<10x20xf32> +// After: neura.store_indexed %val to %A[%i, %j : index, index] memref<10x20xf32> : f32 +// +// Example 2 - With affine expression: +// Before: affine.store %val, %A[%i + 1, %j * 2] : memref<100x100xf32> +// After: %c1 = neura.constant 1 : index +// %c2 = neura.constant 2 : index +// %idx0 = neura.add %i, %c1 : index +// %idx1 = neura.mul %j, %c2 : index +// neura.store_indexed %val to %A[%idx0, %idx1 : index, index] memref<100x100xf32> : f32 +struct AffineStoreLowering : public OpRewritePattern { + AffineStoreLowering(MLIRContext *context) + : OpRewritePattern(context, /*benefit=*/1) {} + + LogicalResult matchAndRewrite(affine::AffineStoreOp store_op, + PatternRewriter &rewriter) const override { + Location loc = store_op.getLoc(); + auto memref = store_op.getMemref(); + Value value = store_op.getValueToStore(); + AffineMap map = store_op.getAffineMap(); + ValueRange mapOperands = store_op.getMapOperands(); + + SmallVector newIndices; + if (failed(convertAffineMapToIndices(map, mapOperands, loc, rewriter, + newIndices))) { + return store_op.emitError( + "[affine2neura] Failed to convert affine map to indices"); + } + + MemRefType memRefType = dyn_cast(memref.getType()); + if (!memRefType) { + return store_op.emitError( + "[affine2neura] Base of store is not a MemRefType"); + } + if (newIndices.size() != static_cast(memRefType.getRank())) { + return store_op.emitError( + "[affine2neura] Number of indices from affine map (") + << newIndices.size() << ") does not match memref rank (" + << memRefType.getRank() << ")"; + } + + rewriter.create(loc, value, memref, + ValueRange{newIndices}); + rewriter.eraseOp(store_op); + return success(); + } +}; + +// Converts affine.apply to explicit Neura arithmetic operations. +// Recursively expands the affine expression tree into primitive operations. +// +// Example 1 - Linear expression: +// Before: %result = affine.apply affine_map<(d0) -> (d0 + 5)>(%i) +// After: %c5 = neura.constant 5 : index +// %result = neura.add %i, %c5 : index +// +// Example 2 - Multiply-add: +// Before: %result = affine.apply affine_map<(d0, d1) -> (d0 * 2 + d1)>(%i, %j) +// After: %c2 = neura.constant 2 : index +// %mul = neura.mul %i, %c2 : index +// %result = neura.add %mul, %j : index +// +// Example 3 - Modulo operation: +// Before: %result = affine.apply affine_map<(d0) -> (d0 mod 8)>(%i) +// After: %c8 = neura.constant 8 : index +// %result = neura.rem %i, %c8 : index +// +// Example 4 - Complex nested expression: +// Before: %result = affine.apply affine_map<(d0, d1) -> ((d0 + 1) * d1)>(%i, %j) +// After: %c1 = neura.constant 1 : index +// %add = neura.add %i, %c1 : index +// %result = neura.mul %add, %j : index +struct AffineApplyLowering : public OpRewritePattern { + AffineApplyLowering(MLIRContext *context) + : OpRewritePattern(context, /*benefit=*/1) {} + + LogicalResult matchAndRewrite(affine::AffineApplyOp apply_op, + PatternRewriter &rewriter) const override { + AffineMap map = apply_op.getAffineMap(); + ValueRange operands = apply_op.getMapOperands(); + Location loc = apply_op.getLoc(); + + // Note: AffineMap can have multiple results in general MLIR contexts + // (e.g., affine_map<(d0, d1) -> (d0 + 1, d1 * 2)> returns two values). + // However, AffineApplyOp specifically enforces single-result maps at + // construction time. This check serves as a safety guard. + // + // Example transformation: + // Before: %result = affine.apply affine_map<(d0, d1) -> (d0 * 2 + d1)>(%i, %j) + // After: %c2 = arith.constant 2 : index + // %mul = arith.muli %i, %c2 : index + // %result = arith.addi %mul, %j : index + if (map.getNumResults() != 1) { + return apply_op.emitError( + "[affine2neura] AffineApplyOp must have a single result"); + } + + AffineExpr expr = map.getResult(0); + llvm::errs() << "[affine2neura] Expanding affine.apply expression: " + << expr << "\n"; + + // Helper lambda: recursively expands AffineExpr to Value. + std::function expandExpr = + [&](AffineExpr e) -> Value { + // Constant expression. + if (auto const_expr = dyn_cast(e)) { + return rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), + const_expr.getValue())); + } + // Dimension expression. + else if (auto dim_expr = dyn_cast(e)) { + return operands[dim_expr.getPosition()]; + } + // Symbol expression. + else if (auto sym_expr = dyn_cast(e)) { + unsigned symbol_operand_index = + map.getNumDims() + sym_expr.getPosition(); + return operands[symbol_operand_index]; + } + // Binary operation expression. + else if (auto bin_expr = dyn_cast(e)) { + Value lhs = expandExpr(bin_expr.getLHS()); + Value rhs = expandExpr(bin_expr.getRHS()); + + if (!lhs || !rhs) { + return Value(); + } + + switch (bin_expr.getKind()) { + case AffineExprKind::Add: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::Mul: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::Mod: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::FloorDiv: + return rewriter.create( + loc, rewriter.getIndexType(), lhs, rhs).getResult(); + case AffineExprKind::CeilDiv: { + // ceildiv(a, b) = floordiv(a + b - 1, b). + Value one = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), 1)); + Value b_minus_1 = rewriter.create( + loc, rewriter.getIndexType(), rhs, one).getResult(); + Value numerator = rewriter.create( + loc, rewriter.getIndexType(), lhs, b_minus_1).getResult(); + return rewriter.create( + loc, rewriter.getIndexType(), numerator, rhs).getResult(); + } + default: + llvm::errs() << "[affine2neura] Unsupported binary op kind: " + << static_cast(bin_expr.getKind()) << "\n"; + return Value(); + } + } + + llvm::errs() << "[affine2neura] Unsupported affine expression type\n"; + return Value(); + }; + + Value expanded = expandExpr(expr); + if (!expanded) { + return apply_op.emitError("[affine2neura] Failed to expand affine.apply expression"); + } + + rewriter.replaceOp(apply_op, expanded); + return success(); + } +}; + +// Converts affine.for loops to neura.loop_control with dataflow semantics. +// Creates constant true for top-level loops, reuses parent's valid signal for nested loops. +// +// Example 1 - Simple single loop: +// Before: affine.for %i = 0 to 10 { +// %val = affine.load %A[%i] : memref<10xf32> +// } +// After: %c_true = neura.constant 1 : i1 +// %i, %valid1 = "neura.loop_control"(%c_true) <{end = 10, start = 0, step = 1}> : (i1) -> (index, i1) +// %val = neura.load_indexed %A[%i : index] memref<10xf32> : f32 +// +// Example 2 - Nested loops (demonstrates valid signal reuse): +// Before: affine.for %i = 0 to 10 { +// affine.for %j = 0 to 20 { +// %val = affine.load %A[%i, %j] : memref<10x20xf32> +// } +// } +// After: %c_true = neura.constant 1 : i1 +// %i, %valid_i = "neura.loop_control"(%c_true) <{end = 10, start = 0, step = 1}> : (i1) -> (index, i1) +// %j, %valid_j = "neura.loop_control"(%valid_i) <{end = 20, start = 0, step = 1}> : (i1) -> (index, i1) +// %val = neura.load_indexed %A[%i, %j : index, index] memref<10x20xf32> : f32 +// (Note: Inner loop reuses outer loop's valid_i signal, no second constant) +// +// Example 3 - Non-zero bounds and step: +// Before: affine.for %i = 5 to 100 step 2 { +// %val = affine.load %A[%i] : memref<100xf32> +// } +// After: %c_true = neura.constant 1 : i1 +// %i, %valid1 = "neura.loop_control"(%c_true) <{end = 100, start = 5, step = 2}> : (i1) -> (index, i1) +// %val = neura.load_indexed %A[%i : index] memref<100xf32> : f32 +struct AffineForLowering : public OpRewritePattern { + const LoopNestAnalysis &analysis; + llvm::DenseMap &loopValidSignals; + + AffineForLowering(MLIRContext *context, const LoopNestAnalysis &analysis, + llvm::DenseMap &loopValidSignals) + : OpRewritePattern(context, /*benefit=*/1), + analysis(analysis), loopValidSignals(loopValidSignals) {} + + LogicalResult matchAndRewrite(affine::AffineForOp for_op, + PatternRewriter &rewriter) const override { + Location loc = for_op.getLoc(); + + // Extracts loop bounds - must be constant. + // Dynamic bounds are not supported as neura.loop_control requires + // compile-time constant attributes for hardware configuration. + if (!for_op.hasConstantLowerBound() || !for_op.hasConstantUpperBound()) { + return for_op.emitError( + "[affine2neura] Non-constant loop bounds not supported. " + "Loop bounds must be compile-time constants for CGRA configuration"); + } + + int64_t lower_bound = for_op.getConstantLowerBound(); + int64_t upper_bound = for_op.getConstantUpperBound(); + int64_t step = for_op.getStepAsInt(); + + // Get loop nesting information + LoopInfo *loopInfo = analysis.getLoopInfo(for_op); + Type i1_type = rewriter.getI1Type(); + Value parent_valid; + + // Optimization: Reuse parent loop's valid signal for nested loops. + // This avoids creating redundant initialization for each nested loop. + if (loopInfo && loopInfo->parent) { + // This is a nested loop - try to reuse parent's loop_valid signal + auto it = loopValidSignals.find(loopInfo->parent->loop.getOperation()); + if (it != loopValidSignals.end()) { + parent_valid = it->second; + llvm::errs() << "[affine2neura] Reusing parent valid signal for " + << "nested loop (depth=" << loopInfo->depth << ")\n"; + } else { + // Fallback: parent not yet converted, create constant true + IntegerAttr true_attr = rewriter.getIntegerAttr(i1_type, 1); + parent_valid = rewriter.create(loc, i1_type, true_attr); + llvm::errs() << "[affine2neura] Parent valid not available, " + << "creating constant true for nested loop\n"; + } + } else { + // Top-level loop - create constant true to ensure it's always valid + IntegerAttr true_attr = rewriter.getIntegerAttr(i1_type, 1); + parent_valid = rewriter.create(loc, i1_type, true_attr); + if (loopInfo) { + llvm::errs() << "[affine2neura] Created constant true for top-level loop " + << "(depth=" << loopInfo->depth << ")\n"; + } + } + + // Creates loop_control operation. + auto index_type = rewriter.getIndexType(); + + auto loop_control = rewriter.create( + loc, + /*resultTypes=*/TypeRange{index_type, i1_type}, + /*parentValid=*/parent_valid, + /*iterationType=*/rewriter.getStringAttr("increment"), + /*start=*/rewriter.getI64IntegerAttr(lower_bound), + /*end=*/rewriter.getI64IntegerAttr(upper_bound), + /*step=*/rewriter.getI64IntegerAttr(step)); + + Value loop_index = loop_control.getResult(0); + Value loop_valid = loop_control.getResult(1); + + // Store the loop_valid signal for child loops to use. + // This enables the optimization for nested loops. + loopValidSignals[for_op.getOperation()] = loop_valid; + + // Inlines the body operations before the for_op. + Block &body_block = for_op.getRegion().front(); + Operation *terminator = body_block.getTerminator(); + rewriter.eraseOp(terminator); // Removes affine.yield first. + + // Merge the loop body into the parent block before the for_op. + // Pass the loop_index as replacement for the induction variable block argument. + rewriter.inlineBlockBefore(&body_block, for_op.getOperation(), {loop_index}); + + // Erases the for_op. + rewriter.eraseOp(for_op); + + return success(); + } +}; + +struct LowerAffineToNeuraPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerAffineToNeuraPass) + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + StringRef getArgument() const override { return "lower-affine-to-neura"; } + StringRef getDescription() const override { + return "Lower affine operations to Neura dialect operations"; + } + + void runOnOperation() override { + ModuleOp module_op = getOperation(); + MLIRContext *context = module_op.getContext(); + + module_op.walk([&](func::FuncOp func_op) { + // Checks if function targets neura accelerator, or applies to all if no attribute. + if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) { + auto target = func_op->getAttrOfType( + mlir::accel::kAcceleratorAttr); + if (!target || target.getValue() != mlir::accel::kNeuraTarget) { + return; // Skips this function. + } + } + // If no accelerator attribute, applies the pass anyway (for testing). + + // Step 1: Perform loop nest analysis + // This builds the loop hierarchy and identifies perfect/imperfect nests + llvm::errs() << "[affine2neura] Analyzing loop nests in function: " + << func_op.getName() << "\n"; + LoopNestAnalysis analysis(func_op); + analysis.dump(); // Print analysis results for debugging + + // Step 2: Create a map to store loop_valid signals + // This allows nested loops to reuse parent's valid signal + llvm::DenseMap loopValidSignals; + + // Step 3: Set up dialect conversion + // We use Dialect Conversion instead of Greedy Pattern Rewriter because: + // 1. It provides better error reporting when conversion fails + // 2. It explicitly defines which operations are legal/illegal + // 3. It's the standard approach for dialect lowering passes + ConversionTarget target(*context); + target.addLegalDialect(); + target.addIllegalDialect(); + + // Step 4: Register rewrite patterns with analysis + RewritePatternSet patterns(context); + patterns.add(context); + // Pass references to the analysis and loopValidSignals map + patterns.add(context, std::cref(analysis), + std::ref(loopValidSignals)); + + if (failed(applyPartialConversion(func_op, target, std::move(patterns)))) { + func_op.emitError("[affine2neura] Failed to lower affine " + "operations to Neura dialect"); + signalPassFailure(); + } + }); + } +}; +} // namespace + +std::unique_ptr mlir::createLowerAffineToNeuraPass() { + return std::make_unique(); +} \ No newline at end of file diff --git a/lib/Conversion/AffineToNeura/CMakeLists.txt b/lib/Conversion/AffineToNeura/CMakeLists.txt new file mode 100644 index 00000000..285099f3 --- /dev/null +++ b/lib/Conversion/AffineToNeura/CMakeLists.txt @@ -0,0 +1,19 @@ +add_mlir_dialect_library(MLIRNeuraAffineToNeuraPass + AffineToNeuraPass.cpp + LoopNestAnalysis.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/Conversion + + DEPENDS + MLIRConversionIncGen + + LINK_LIBS PUBLIC + MLIRAffineDialect + MLIRNeura + MLIRIR + MLIRPass + MLIRSupport + MLIRTransforms + MLIRFuncDialect +) diff --git a/lib/Conversion/AffineToNeura/LoopNestAnalysis.cpp b/lib/Conversion/AffineToNeura/LoopNestAnalysis.cpp new file mode 100644 index 00000000..e7410994 --- /dev/null +++ b/lib/Conversion/AffineToNeura/LoopNestAnalysis.cpp @@ -0,0 +1,191 @@ +#include "Conversion/AffineToNeura/LoopNestAnalysis.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::neura; + +/// Constructor - Performs complete loop nest analysis. +LoopNestAnalysis::LoopNestAnalysis(func::FuncOp func) { + llvm::errs() << "[LoopNestAnalysis] Starting analysis for function: " + << func.getName() << "\n"; + buildLoopNestTree(func); + llvm::errs() << "[LoopNestAnalysis] Found " << allLoops.size() << " loops\n"; + analyzePerfectNests(); + llvm::errs() << "[LoopNestAnalysis] Analysis complete\n"; +} + +// Builds the loop hierarchy tree. +void LoopNestAnalysis::buildLoopNestTree(func::FuncOp func) { + // Step 1: Collects all loops. + func.walk([&](affine::AffineForOp loop) { + auto loopInfo = std::make_unique(loop); + loopMap[loop.getOperation()] = loopInfo.get(); + allLoops.push_back(std::move(loopInfo)); + }); + + // Step 2: Establishes parent-child relationships. + for (auto &loopInfoPtr : allLoops) { + LoopInfo *loopInfo = loopInfoPtr.get(); + affine::AffineForOp loop = loopInfo->loop; + + // Searches upward for parent loop. + Operation *parentOp = loop->getParentOp(); + while (parentOp && !isa(parentOp)) { + if (auto parentLoop = dyn_cast(parentOp)) { + auto it = loopMap.find(parentLoop.getOperation()); + if (it != loopMap.end()) { + loopInfo->parent = it->second; + loopInfo->depth = loopInfo->parent->depth + 1; // depth = parent_depth + 1 + it->second->children.push_back(loopInfo); + } + break; + } + parentOp = parentOp->getParentOp(); + } + + // If no parent loop, this is a top-level loop. + if (!loopInfo->parent) { + topLevelLoops.push_back(loopInfo); + } + } +} + +// Analyzes perfect nesting characteristics. +void LoopNestAnalysis::analyzePerfectNests() { + for (auto &loopInfoPtr : allLoops) { + LoopInfo *info = loopInfoPtr.get(); + + // Leaf loops are automatically perfect. + if (info->children.empty()) { + info->is_perfect_nest = true; + continue; + } + + Block &body = info->loop.getRegion().front(); + + // Builds child loop operation set for fast lookup. + llvm::DenseSet childLoopOps; + for (LoopInfo *child : info->children) { + childLoopOps.insert(child->loop.getOperation()); + } + + Operation *firstChild = info->children.front()->loop.getOperation(); + Operation *lastChild = info->children.back()->loop.getOperation(); + + // Checks if operations exist before the first child loop. + for (Operation &op : body.getOperations()) { + if (&op == firstChild) break; + if (isa(&op)) continue; + info->operations_before_child.push_back(&op); + info->is_perfect_nest = false; // Operations before child → imperfect + } + + // Checks if operations exist after the last child loop. + bool afterLastChild = false; + for (Operation &op : body.getOperations()) { + if (&op == lastChild) { + afterLastChild = true; + continue; + } + if (afterLastChild && !isa(&op)) { + info->operations_after_child.push_back(&op); + info->is_perfect_nest = false; // Operations after child → imperfect + } + } + + // Checks if operations exist between sibling child loops. + // Example: affine.for i { affine.for j1; op; affine.for j2 } + if (info->children.size() > 1) { + bool betweenChildren = false; + Operation *prevChild = nullptr; + + for (Operation &op : body.getOperations()) { + if (childLoopOps.contains(&op)) { + if (prevChild && betweenChildren) { + info->is_perfect_nest = false; // Operations between siblings → imperfect + break; + } + prevChild = &op; + betweenChildren = false; + } else if (prevChild && !isa(&op)) { + betweenChildren = true; + } + } + } + } +} + + +// Query Interface Implementation + +// Queries LoopInfo by loop operation. +LoopInfo *LoopNestAnalysis::getLoopInfo(affine::AffineForOp loop) const { + auto it = loopMap.find(loop.getOperation()); + return it != loopMap.end() ? it->second : nullptr; +} + +// Checks if the loop is a perfect nest. +bool LoopNestAnalysis::isPerfectNest(affine::AffineForOp loop) const { + LoopInfo *info = getLoopInfo(loop); + return info ? info->is_perfect_nest : false; +} + +// Gets the parent loop. +LoopInfo *LoopNestAnalysis::getParentLoop(affine::AffineForOp loop) const { + LoopInfo *info = getLoopInfo(loop); + return info ? info->parent : nullptr; +} + +// Gets the list of child loops. +llvm::ArrayRef +LoopNestAnalysis::getChildLoops(affine::AffineForOp loop) const { + LoopInfo *info = getLoopInfo(loop); + return info ? llvm::ArrayRef(info->children) + : llvm::ArrayRef(); +} + + +// Debug Output Implementation +void LoopNestAnalysis::dump() const { + llvm::errs() << "=== Loop Nest Analysis ===\n"; + llvm::errs() << "Total loops: " << allLoops.size() << "\n"; + llvm::errs() << "Top-level loops: " << topLevelLoops.size() << "\n\n"; + + // Recursive print function. + std::function printLoop; + printLoop = [&](LoopInfo *info, unsigned indent) { + // Prints indentation. + for (unsigned i = 0; i < indent; ++i) llvm::errs() << " "; + + // Prints basic loop information. + llvm::errs() << "Loop (depth=" << info->depth + << ", perfect=" << (info->is_perfect_nest ? "yes" : "no") + << ", children=" << info->children.size() << ")"; + + // If imperfect nest, prints detailed information. + if (!info->is_perfect_nest) { + llvm::errs() << " [IMPERFECT: " + << "ops_before=" << info->operations_before_child.size() + << ", ops_after=" << info->operations_after_child.size() + << "]"; + } + llvm::errs() << "\n"; + + // Prints location information. + for (unsigned i = 0; i < indent; ++i) llvm::errs() << " "; + llvm::errs() << " at: "; + info->loop.getLoc().print(llvm::errs()); + llvm::errs() << "\n"; + + // Recursively prints child loops. + for (LoopInfo *child : info->children) { + printLoop(child, indent + 1); + } + }; + + for (LoopInfo *topLoop : topLevelLoops) { + printLoop(topLoop, 0); + } + + llvm::errs() << "=== End Loop Nest Analysis ===\n\n"; +} diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp index dc6f4532..8328eb61 100644 --- a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp +++ b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp @@ -343,8 +343,9 @@ struct LowerArithToNeuraPass ArithFDivToNeuraFDiv, ArithExtfToNeuraCast, ArithMulFToNeuraFMul, ArithSubIToNeuraSub, ArithSubFToNeuraFSub, ArithMulIToNeuraMul, ArithDivSIToNeuraDiv, ArithRemSIToNeuraOp>(context); + // Apply patterns to the function, not the entire module if (failed( - applyPatternsGreedily(getOperation(), std::move(patterns)))) { + applyPatternsGreedily(func_op, std::move(patterns)))) { signalPassFailure(); } } diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index 98f5dac2..bb6ccd5a 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -1,6 +1,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_subdirectory(ArithToNeura) +add_subdirectory(AffineToNeura) add_subdirectory(LlvmToNeura) add_subdirectory(MemRefToNeura) add_subdirectory(BuiltinToNeura) @@ -16,6 +17,7 @@ target_link_libraries(MLIRConversion INTERFACE MLIRTransforms MLIRNeura MLIRNeuraArithToNeuraPass + MLIRNeuraAffineToNeuraPass MLIRNeuraLlvmToNeuraPass MLIRNeuraMemRefToNeuraPass MLIRNeuraBuiltinToNeuraPass diff --git a/lib/NeuraDialect/Mapping/mapping_util.cpp b/lib/NeuraDialect/Mapping/mapping_util.cpp index 18b2a899..c3b3696d 100644 --- a/lib/NeuraDialect/Mapping/mapping_util.cpp +++ b/lib/NeuraDialect/Mapping/mapping_util.cpp @@ -625,9 +625,16 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op, MappingLoc src_loc, Operation *mlir::neura::getMaterializedProducer(Value operand) { Operation *producer = operand.getDefiningOp(); + + // ReserveOp is not wrapped by DataMovOp (see InsertDataMovPass). + // Return it directly as it represents the loop-carried dependency placeholder. + if (isa(producer)) { + return producer; + } + + // For operations wrapped by DataMovOp, find the actual producer. assert(isa(producer) && - "Expected operand to be defined by a DataMovOp"); - // Finds the actual producer. + "Expected a DataMovOp as operand producer for non-ReserveOp operations"); auto mov_op = dyn_cast(producer); auto materialized_producer = mov_op.getOperand().getDefiningOp(); return materialized_producer; @@ -957,12 +964,22 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc, for (Value operand : op->getOperands()) { llvm::errs() << "Processing operand: " << operand << "\n"; if (isa(operand.getDefiningOp())) { - // Skips Reserve ops (backward ctrl move) when estimate cost. + // Skips Reserve ops (backward ctrl move) when routing. continue; } Operation *data_move = operand.getDefiningOp(); + + // ReserveOp is not wrapped by DataMovOp (see InsertDataMovPass). + // Skip routing for ReserveOp as it represents loop-carried dependency. + if (isa(data_move)) { + llvm::errs() << "Skipping unwrapped operand: " << *data_move + << "\n"; + continue; + } + assert(isa(data_move) && - "Expected a DataMovOp as operand producer"); + "Expected a DataMovOp as operand for non-ReserveOp operations"); + Operation *producer = getMaterializedProducer(operand); MappingLoc src_loc = mapping_state.getAllLocsOfOp(producer).back(); diff --git a/test/Conversion/AffineToNeura/complex-affine-expressions.mlir b/test/Conversion/AffineToNeura/complex-affine-expressions.mlir new file mode 100644 index 00000000..612b1328 --- /dev/null +++ b/test/Conversion/AffineToNeura/complex-affine-expressions.mlir @@ -0,0 +1,106 @@ +// RUN: mlir-neura-opt %s --lower-affine-to-neura | FileCheck %s + +// This test verifies that complex affine expressions are correctly expanded +// into explicit Neura arithmetic operations. + +module { + // Test 1: Multiplication expression (d0 * 2) + func.func @mul_expression(%arg0: memref<10xf32>) { + affine.for %i = 0 to 10 { + %0 = affine.load %arg0[2 * %i] : memref<10xf32> + } + return + } + + // Test 2: Addition and multiplication (d0 * 3 + 1) + func.func @complex_expression(%arg0: memref<10xf32>) { + affine.for %i = 0 to 10 { + %0 = affine.load %arg0[3 * %i + 1] : memref<10xf32> + } + return + } + + // Test 3: Modulo operation (d0 % 4) + func.func @modulo_expression(%arg0: memref<10xf32>) { + affine.for %i = 0 to 10 { + %0 = affine.load %arg0[%i mod 4] : memref<10xf32> + } + return + } + + // Test 4: Floor division (d0 floordiv 2) + func.func @floordiv_expression(%arg0: memref<10xf32>) { + affine.for %i = 0 to 10 { + %0 = affine.load %arg0[%i floordiv 2] : memref<10xf32> + } + return + } + + // Test 5: Multiple dimensions with complex expressions + func.func @multi_dim_complex(%arg0: memref<10x20xf32>) { + affine.for %i = 0 to 10 { + affine.for %j = 0 to 20 { + %0 = affine.load %arg0[%i, 2 * %i + 3 * %j + 1] : memref<10x20xf32> + } + } + return + } +} + +// CHECK-LABEL: func.func @mul_expression +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 2 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.mul"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}} : index] memref<10xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// CHECK-LABEL: func.func @complex_expression +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 3 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.mul"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.add"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}} : index] memref<10xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// CHECK-LABEL: func.func @modulo_expression +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 4 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.rem"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}} : index] memref<10xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// CHECK-LABEL: func.func @floordiv_expression +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 2 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.div"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}} : index] memref<10xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// CHECK-LABEL: func.func @multi_dim_complex +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 20 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 2 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.mul"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 3 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.mul"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = "neura.add"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %{{.*}} = "neura.add"(%{{.*}}, %{{.*}}) : (index, index) -> index +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}}, %{{.*}} : index, index] memref<10x20xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } diff --git a/test/Conversion/AffineToNeura/deep-nesting.mlir b/test/Conversion/AffineToNeura/deep-nesting.mlir new file mode 100644 index 00000000..8189c100 --- /dev/null +++ b/test/Conversion/AffineToNeura/deep-nesting.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-neura-opt %s --lower-affine-to-neura | FileCheck %s + +// Corner Case: Deeply nested loops (4 levels) - tests perfect nesting with 4D +module { + func.func @deep_nesting_4d(%arg0: memref<5x5x5x5xf32>) { + affine.for %i = 0 to 5 { + affine.for %j = 0 to 5 { + affine.for %k = 0 to 5 { + affine.for %l = 0 to 5 { + %0 = affine.load %arg0[%i, %j, %k, %l] : memref<5x5x5x5xf32> + } + } + } + } + return + } +} + +// CHECK-LABEL: func.func @deep_nesting_4d(%arg0: memref<5x5x5x5xf32>) +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 5 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 5 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 5 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 5 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : index, index, index, index] memref<5x5x5x5xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NOT: affine. diff --git a/test/Conversion/AffineToNeura/imperfect-ops-after.mlir b/test/Conversion/AffineToNeura/imperfect-ops-after.mlir new file mode 100644 index 00000000..fd9aad1c --- /dev/null +++ b/test/Conversion/AffineToNeura/imperfect-ops-after.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-neura-opt %s --lower-affine-to-neura | FileCheck %s + +// Imperfect Nesting: Operations after child loop +// This tests that inner loop results can be used by outer loop operations +module { + func.func @imperfect_ops_after(%arg0: memref<10x20xf32>, %arg1: memref<10xf32>) { + affine.for %i = 0 to 10 { + // Inner loop: compute sum of row elements + affine.for %j = 0 to 20 { + %elem = affine.load %arg0[%i, %j] : memref<10x20xf32> + // In real code, %elem would be accumulated or used + } + // Operations after inner loop - uses outer loop index + %cst = arith.constant 1.0 : f32 + affine.store %cst, %arg1[%i] : memref<10xf32> + } + return + } +} + +// CHECK-LABEL: func.func @imperfect_ops_after(%arg0: memref<10x20xf32>, %arg1: memref<10xf32>) +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 20 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}}, %{{.*}} : index, index] memref<10x20xf32> : f32 +// +// CHECK-NEXT: %{{.*}} = arith.constant 1.000000e+00 : f32 +// CHECK-NEXT: neura.store_indexed %{{.*}} to %arg1[%{{.*}} : index] memref<10xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NOT: affine. diff --git a/test/Conversion/AffineToNeura/loop-nest-optimization.mlir b/test/Conversion/AffineToNeura/loop-nest-optimization.mlir new file mode 100644 index 00000000..c2ca0b9e --- /dev/null +++ b/test/Conversion/AffineToNeura/loop-nest-optimization.mlir @@ -0,0 +1,44 @@ +// RUN: mlir-neura-opt %s --lower-affine-to-neura | FileCheck %s + +// This test verifies proper handling of various loop nest patterns. + +module { + func.func @perfect_nest_2d(%arg0: memref<10x20xf32>) { + affine.for %i = 0 to 10 { + affine.for %j = 0 to 20 { + %v = affine.load %arg0[%i, %j] : memref<10x20xf32> + } + } + return + } + + func.func @two_top_level_loops(%arg0: memref<10xf32>, %arg1: memref<20xf32>) { + affine.for %i = 0 to 10 { + %v = affine.load %arg0[%i] : memref<10xf32> + } + affine.for %j = 0 to 20 { + %w = affine.load %arg1[%j] : memref<20xf32> + } + return + } +} + +// CHECK-LABEL: func.func @perfect_nest_2d(%arg0: memref<10x20xf32>) +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 20 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}}, %{{.*}} : index, index] memref<10x20xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// CHECK-LABEL: func.func @two_top_level_loops(%arg0: memref<10xf32>, %arg1: memref<20xf32>) +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 10 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}} : index] memref<10xf32> : f32 +// +// CHECK-NEXT: %{{.*}} = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%{{.*}}) <{end = 20 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg1[%{{.*}} : index] memref<20xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } diff --git a/test/Conversion/AffineToNeura/single-iteration.mlir b/test/Conversion/AffineToNeura/single-iteration.mlir new file mode 100644 index 00000000..3e2bed79 --- /dev/null +++ b/test/Conversion/AffineToNeura/single-iteration.mlir @@ -0,0 +1,20 @@ +// RUN: mlir-neura-opt %s --lower-affine-to-neura | FileCheck %s + +// Corner Case: Single iteration loop +module { + func.func @single_iteration(%arg0: memref<1xf32>) { + affine.for %i = 0 to 1 { + %0 = affine.load %arg0[%i] : memref<1xf32> + } + return + } +} + +// CHECK-LABEL: func.func @single_iteration(%arg0: memref<1xf32>) +// CHECK-NEXT: %0 = "neura.constant"() <{value = true}> : () -> i1 +// CHECK-NEXT: %{{.*}}, %{{.*}} = "neura.loop_control"(%0) <{end = 1 : i64, iterationType = "increment", start = 0 : i64, step = 1 : i64}> : (i1) -> (index, i1) +// +// CHECK-NEXT: %{{.*}} = neura.load_indexed %arg0[%{{.*}} : index] memref<1xf32> : f32 +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NOT: affine. diff --git a/test/Conversion/AffineToNeura/unsupported-affine-if.mlir b/test/Conversion/AffineToNeura/unsupported-affine-if.mlir new file mode 100644 index 00000000..1095a239 --- /dev/null +++ b/test/Conversion/AffineToNeura/unsupported-affine-if.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-neura-opt %s --lower-affine | FileCheck %s --check-prefix=CHECK-SCF +// RUN: mlir-neura-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm --convert-arith-to-llvm --convert-func-to-llvm | FileCheck %s --check-prefix=CHECK-LLVM +// RUN: mlir-neura-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm --convert-arith-to-llvm --convert-func-to-llvm --lower-llvm-to-neura | FileCheck %s --check-prefix=CHECK-NEURA-BR + +// This test demonstrates the complete multi-stage lowering chain for conditionals. +// Note: Direct lowering affine.if to Neura is not supported. +// +// The complete transformation chain: +// affine.if → scf.if → cf.cond_br → llvm.cond_br → neura.cond_br +// +// While neura.cond_br operations are generated, they cannot be mapped to CGRA +// hardware because CGRAs are spatial dataflow architectures without program +// counters or branch prediction units. + +module { + func.func @affine_if_example(%arg0: memref<10xf32>) { + affine.for %i = 0 to 10 { + affine.if affine_set<(d0) : (d0 - 5 >= 0)>(%i) { + %val = affine.load %arg0[%i] : memref<10xf32> + } + } + return + } +} + +// CHECK-SCF-LABEL: func.func @affine_if_example(%arg0: memref<10xf32>) +// CHECK-SCF-NEXT: %c0 = arith.constant 0 : index +// CHECK-SCF-NEXT: %c10 = arith.constant 10 : index +// CHECK-SCF-NEXT: %c1 = arith.constant 1 : index +// CHECK-SCF-NEXT: scf.for %arg1 = %c0 to %c10 step %c1 +// CHECK-SCF-NEXT: %c0_0 = arith.constant 0 : index +// CHECK-SCF-NEXT: %c-5 = arith.constant -5 : index +// CHECK-SCF-NEXT: %0 = arith.addi %arg1, %c-5 : index +// CHECK-SCF-NEXT: %1 = arith.cmpi sge, %0, %c0_0 : index +// CHECK-SCF-NEXT: scf.if %1 +// CHECK-SCF-NEXT: %2 = memref.load %arg0[%arg1] : memref<10xf32> +// CHECK-SCF-NEXT: } +// CHECK-SCF-NEXT: } +// CHECK-SCF-NEXT: return + +// CHECK-LLVM-LABEL: llvm.func @affine_if_example +// CHECK-LLVM: %{{.*}} = llvm.mlir.constant(0 : index) : i64 +// CHECK-LLVM: %{{.*}} = llvm.mlir.constant(10 : index) : i64 +// CHECK-LLVM: %{{.*}} = llvm.mlir.constant(1 : index) : i64 +// CHECK-LLVM: llvm.br ^bb1(%{{.*}} : i64) +// CHECK-LLVM: ^bb1(%{{.*}}: i64): +// CHECK-LLVM: %{{.*}} = llvm.icmp "slt" %{{.*}}, %{{.*}} : i64 +// CHECK-LLVM: llvm.cond_br %{{.*}}, ^bb2, ^bb5 +// +// CHECK-LLVM: ^bb2: +// CHECK-LLVM: %{{.*}} = llvm.mlir.constant(0 : index) : i64 +// CHECK-LLVM: %{{.*}} = llvm.mlir.constant(-5 : index) : i64 +// CHECK-LLVM: %{{.*}} = llvm.add %{{.*}}, %{{.*}} : i64 +// CHECK-LLVM: %{{.*}} = llvm.icmp "sge" %{{.*}}, %{{.*}} : i64 +// CHECK-LLVM: llvm.cond_br %{{.*}}, ^bb3, ^bb4 + +// CHECK-NEURA-BR-LABEL: llvm.func @affine_if_example +// CHECK-NEURA-BR: %{{.*}} = "neura.constant"() <{value = -5 : index}> : () -> i64 +// CHECK-NEURA-BR: %{{.*}} = "neura.constant"() <{value = 1 : index}> : () -> i64 +// CHECK-NEURA-BR: %{{.*}} = "neura.constant"() <{value = 10 : index}> : () -> i64 +// CHECK-NEURA-BR: %{{.*}} = "neura.constant"() <{value = 0 : index}> : () -> i64 +// CHECK-NEURA-BR: neura.br %{{.*}} : i64 to ^bb1 +// CHECK-NEURA-BR: ^bb1(%{{.*}}: i64): +// CHECK-NEURA-BR: %{{.*}} = "neura.icmp"(%{{.*}}, %{{.*}}) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CHECK-NEURA-BR: neura.cond_br %{{.*}} : i1 then to ^bb2 else to ^bb5 +// +// CHECK-NEURA-BR: ^bb2: +// CHECK-NEURA-BR: %{{.*}} = "neura.add"(%{{.*}}, %{{.*}}) : (i64, i64) -> i64 +// CHECK-NEURA-BR: %{{.*}} = "neura.icmp"(%{{.*}}, %{{.*}}) <{cmpType = "sge"}> : (i64, i64) -> i1 +// CHECK-NEURA-BR: neura.cond_br %{{.*}} : i1 then to ^bb3 else to ^bb4 diff --git a/tools/mlir-neura-opt/mlir-neura-opt.cpp b/tools/mlir-neura-opt/mlir-neura-opt.cpp index 8969fa56..7edea6b5 100644 --- a/tools/mlir-neura-opt/mlir-neura-opt.cpp +++ b/tools/mlir-neura-opt/mlir-neura-opt.cpp @@ -1,9 +1,13 @@ // tools/mlir-neura-opt/mlir-neura-opt.cpp +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/InitAllDialects.h" #include "mlir/InitAllPasses.h" +#include "mlir/Conversion/Passes.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Tools/mlir-opt/MlirOptMain.h" @@ -57,6 +61,9 @@ int main(int argc, char **argv) { registry.insert(); registry.insert(); registry.insert(); + registry.insert(); + registry.insert(); + registry.insert(); registry.insert(); registry.insert(); registry.insert(); @@ -64,6 +71,9 @@ int main(int argc, char **argv) { mlir::neura::registerPasses(); mlir::registerPasses(); mlir::registerViewOpGraphPass(); + + // Register all standard conversion passes + mlir::registerConversionPasses(); // Print architecture spec file info if (!architecture_spec_file.empty()) {