diff --git a/.github/workflows/common-build.yml b/.github/workflows/common-build.yml index 41c565b..9cf16ed 100644 --- a/.github/workflows/common-build.yml +++ b/.github/workflows/common-build.yml @@ -44,12 +44,6 @@ jobs: git -C third_party clone --depth 1 --branch v2.5.0 https://github.com/CLIUtils/CLI11.git || true git -C third_party clone --depth 1 --branch v1.17.0 https://github.com/google/googletest.git || true - - name: Save third_party cache - uses: actions/cache@v3 - with: - path: third_party - key: ${{ inputs.os }}-third_party - - name: ccache uses: hendrikmuhs/ccache-action@v1.2 diff --git a/.gitignore b/.gitignore index 781ff97..dd6bc96 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ output/ third_party/ # Default names -output.o +*.o a.out .python-version \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index b799685..767ee45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,6 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(boost) - # CLI11 FetchContent_Declare( cli11 @@ -132,7 +131,7 @@ target_link_libraries(rcc PRIVATE enable_testing() set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -add_executable(abi_test unittests/CodeGen/ABITest.cpp) +add_executable(abi_test unittests/CodeGen/X86_64ABITest.cpp) target_link_libraries(abi_test PRIVATE GTest::gtest_main CodeGen ${llvm_libs}) include(GoogleTest) diff --git a/README.md b/README.md index 8d2786d..6835a14 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,6 @@ Reaver C Compiler (`rcc`) is a C99 compiler. It features a lexer, parser, preprocessor, AST, and compiles to LLVM IR. It supports both x86 and AArch64. - ## Why? For fun! We started [`v1.0`](https://github.com/saturn691/ReaverCompiler/tree/v1.0) @@ -97,6 +96,23 @@ cd build ctest ``` +### Running Integration Tests + +To run the provided integration tests, run the following command: + +```bash +./test.py +``` + +To run the additional integration tests, run the following commands: + +```bash +# Only run this once +git submodule update --init --recursive +# More options available. Does not pass all tests yet +./writing-a-c-compiler-tests/test_compiler --chapter 18 --skip-invaliid build/rcc +``` + ## Credits - [William Huynh](https://www.linkedin.com/in/wh691/) diff --git a/include/AST/Type.hpp b/include/AST/Type.hpp index ca09aaa..a31a669 100644 --- a/include/AST/Type.hpp +++ b/include/AST/Type.hpp @@ -234,6 +234,8 @@ class FnType final : public Type bool operator<(const BaseType &other) const override; bool isComplete() const noexcept override; + std::string getParamName(size_t i) const noexcept; + const BaseType *getParamType(size_t i) const noexcept; Ptr params_; Ptr retType_; @@ -255,7 +257,7 @@ class ParamType final : public Type bool isComplete() const noexcept override; - size_t size() const; + size_t size() const noexcept; const BaseType *at(size_t i) const; Params types_; diff --git a/include/CodeGen/AArch64ABI.hpp b/include/CodeGen/AArch64ABI.hpp new file mode 100644 index 0000000..3fffc61 --- /dev/null +++ b/include/CodeGen/AArch64ABI.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include "CodeGen/ABI.hpp" + +namespace CodeGen +{ + +class AArch64ABI : public ABI +{ +public: + AArch64ABI(llvm::Module &module); + + FunctionParamsInfo getFunctionParams( + llvm::Type *retType, + std::vector ¶mTypes) const override; + llvm::FunctionType *getFunctionType( + llvm::Type *retType, + std::vector ¶mTypes) const override; + std::vector getParamType(llvm::Type *type) const override; + llvm::Align getTypeAlign(llvm::Type *type) const override; + unsigned getTypeSize(AST::Types type) const override; + + bool useByVal() const override + { + return false; + } + +private: + llvm::Module &module_; +}; +} // namespace CodeGen diff --git a/include/CodeGen/ABI.hpp b/include/CodeGen/ABI.hpp index d230960..fe3331e 100644 --- a/include/CodeGen/ABI.hpp +++ b/include/CodeGen/ABI.hpp @@ -38,47 +38,8 @@ class ABI virtual std::vector getParamType(llvm::Type *type) const = 0; virtual llvm::Align getTypeAlign(llvm::Type *type) const = 0; virtual unsigned getTypeSize(AST::Types type) const = 0; + virtual bool useByVal() const = 0; }; -class X86_64ABI : public ABI -{ -public: - enum class ArgClass - { - INTEGER, // Integral types that fit in one of the GP registers - SSE, // Types that fit into a vector register - SSEUP, // Tail part of SSE - X87, // 80-bit (rarely used in modern code), returned via x87 FPU - X87UP, // Tail part of X87 - COMPLEX_X87, // Complex long double - MEMORY, // Passed by memory via the stack - NO_CLASS // Types that don't fit into any of the above - }; - - struct ArgClassInfo - { - ArgClass cls = ArgClass::NO_CLASS; - unsigned size = 0; - unsigned align = 0; - bool multiple = false; - }; - using ArgClasses = std::vector; - - X86_64ABI(llvm::Module &module); - - FunctionParamsInfo getFunctionParams( - llvm::Type *retType, - std::vector ¶mTypes) const override; - llvm::FunctionType *getFunctionType( - llvm::Type *retType, - std::vector ¶mTypes) const override; - std::vector getParamType(llvm::Type *type) const override; - llvm::Align getTypeAlign(llvm::Type *type) const override; - unsigned getTypeSize(AST::Types type) const override; -private: - ArgClasses getArgClassification(llvm::Type *type) const; - ArgClassInfo mergeClassifications(ArgClassInfo lhs, ArgClassInfo rhs) const; - llvm::Module &module_; -}; } // namespace CodeGen \ No newline at end of file diff --git a/include/CodeGen/CodeGenModule.hpp b/include/CodeGen/CodeGenModule.hpp index c446215..e1442dd 100644 --- a/include/CodeGen/CodeGenModule.hpp +++ b/include/CodeGen/CodeGenModule.hpp @@ -5,8 +5,9 @@ #include #include "AST/Visitor.hpp" -#include "CodeGen/ABI.hpp" +#include "CodeGen/AArch64ABI.hpp" #include "CodeGen/TypeChecker.hpp" +#include "CodeGen/X86_64ABI.hpp" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -31,7 +32,8 @@ class CodeGenModule : public Visitor CodeGenModule( std::string sourceFile, std::string outputFile, - TypeMap &typeMap); + TypeMap &typeMap, + std::string targetTriple); void emitLLVM(); void emitObject(); void optimize(); diff --git a/include/CodeGen/X86_64ABI.hpp b/include/CodeGen/X86_64ABI.hpp new file mode 100644 index 0000000..9a2d3bf --- /dev/null +++ b/include/CodeGen/X86_64ABI.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include "CodeGen/ABI.hpp" + +namespace CodeGen +{ + +class X86_64ABI : public ABI +{ +public: + enum class ArgClass + { + INTEGER, // Integral types that fit in one of the GP registers + SSE, // Types that fit into a vector register + SSEUP, // Tail part of SSE + X87, // 80-bit (rarely used in modern code), returned via x87 FPU + X87UP, // Tail part of X87 + COMPLEX_X87, // Complex long double + MEMORY, // Passed by memory via the stack + NO_CLASS // Types that don't fit into any of the above + }; + + struct ArgClassInfo + { + ArgClass cls = ArgClass::NO_CLASS; + unsigned size = 0; + unsigned align = 0; + bool multiple = false; + }; + using ArgClasses = std::vector; + + X86_64ABI(llvm::Module &module); + + FunctionParamsInfo getFunctionParams( + llvm::Type *retType, + std::vector ¶mTypes) const override; + llvm::FunctionType *getFunctionType( + llvm::Type *retType, + std::vector ¶mTypes) const override; + std::vector getParamType(llvm::Type *type) const override; + llvm::Align getTypeAlign(llvm::Type *type) const override; + unsigned getTypeSize(AST::Types type) const override; + + bool useByVal() const override + { + return true; + } + +private: + ArgClasses getArgClassification(llvm::Type *type) const; + ArgClassInfo mergeClassifications(ArgClassInfo lhs, ArgClassInfo rhs) const; + llvm::Module &module_; +}; +} // namespace CodeGen diff --git a/src/AST/Type.cpp b/src/AST/Type.cpp index 1a678b9..eacac3b 100644 --- a/src/AST/Type.cpp +++ b/src/AST/Type.cpp @@ -199,6 +199,16 @@ bool FnType::isComplete() const noexcept return retType_->isComplete() && params_->isComplete(); } +std::string FnType::getParamName(size_t i) const noexcept +{ + return params_->types_.at(i).first; +} + +const BaseType *FnType::getParamType(size_t i) const noexcept +{ + return params_->types_.at(i).second.get(); +} + ParamType::ParamType(Params types) : types_(std::move(types)) { } @@ -267,7 +277,7 @@ bool ParamType::isComplete() const noexcept return true; } -size_t ParamType::size() const +size_t ParamType::size() const noexcept { return types_.size(); } diff --git a/src/CodeGen/AArch64ABI.cpp b/src/CodeGen/AArch64ABI.cpp new file mode 100644 index 0000000..d86fb43 --- /dev/null +++ b/src/CodeGen/AArch64ABI.cpp @@ -0,0 +1,248 @@ +#include "CodeGen/AArch64ABI.hpp" + +// Refer to +// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst +// Unlike x86_64, LLVM handles register allocation of AArch64 mostly in the +// backend. Therefore the rules are just rough. + +namespace CodeGen +{ +/****************************************************************************** + * Private methods * + *****************************************************************************/ + +namespace +{ + +/// Returns 0 for error +size_t getAggregateNumElements(llvm::Type *type) +{ + if (type->isStructTy()) + { + return type->getStructNumElements(); + } + else if (type->isArrayTy()) + { + return type->getArrayNumElements(); + } + else + { + return 0; + } +} + +std::vector flattenHFA(llvm::Type *t) +{ + // The test for homogeneity is applied after data layout is completed + std::vector flatElements; + + if (auto *structType = llvm::dyn_cast(t)) + { + for (size_t i = 0; i < structType->getNumElements(); i++) + { + auto v = flattenHFA(structType->getElementType(i)); + flatElements.insert(flatElements.end(), v.begin(), v.end()); + } + } + else if (auto *arrayType = llvm::dyn_cast(t)) + { + for (size_t i = 0; i < arrayType->getNumElements(); i++) + { + auto v = flattenHFA(arrayType->getElementType()); + flatElements.insert(flatElements.end(), v.begin(), v.end()); + } + } + else + { + flatElements.push_back(t); + } + + return flatElements; +} + +/// Homogeneous Floating-point Aggregates +bool isHFA(llvm::Type *type) +{ + if (!type->isAggregateType()) + { + return false; + } + + std::vector flatElements = flattenHFA(type); + if (flatElements.size() < 1 || flatElements.size() > 4) + { + return false; + } + + return flatElements[0]->isFloatingPointTy() && std::equal( + flatElements.begin() + 1, + flatElements.end(), + flatElements.begin()); +} + +/// Homogeneous Short-Vector Aggregates +constexpr bool isHVA(llvm::Type *type) +{ + // e.g. float32x4_t + return false; +} + +} // namespace + +/****************************************************************************** + * Public methods * + *****************************************************************************/ + +AArch64ABI::AArch64ABI(llvm::Module &module) : module_(module) +{ +} + +ABI::FunctionParamsInfo AArch64ABI::getFunctionParams( + llvm::Type *retType, + std::vector ¶mTypes) const +{ + // ABI 6.8.2: Parameter passing rules + std::vector> actualParamTypes; + bool structReturnInMemory = false; + + // ABI 6.9: Result return + if (retType->isAggregateType()) + { + auto paramType = getParamType(retType); + if (paramType[0]->isPointerTy()) + { + actualParamTypes.push_back(paramType); + structReturnInMemory = true; + retType = llvm::Type::getVoidTy(module_.getContext()); + } + else + { + // structs in LLVM are pointers. This is to pass by value. + retType = paramType[0]; + } + } + + // Stage A: Initialization (N/A, reg allocation handled in backend) + // In fact, we can handle them one at a time, without previous context + for (auto *paramType : paramTypes) + { + actualParamTypes.push_back(getParamType(paramType)); + } + + return {retType, actualParamTypes, structReturnInMemory}; +} + +llvm::FunctionType *AArch64ABI::getFunctionType( + llvm::Type *retType, + std::vector ¶mTypes) const +{ + auto actualParamTypes = getFunctionParams(retType, paramTypes); + + std::vector flatParamTypes; + for (const auto ¶mTypes : actualParamTypes.paramTypes) + { + flatParamTypes.insert( + flatParamTypes.end(), paramTypes.begin(), paramTypes.end()); + } + + return llvm::FunctionType::get( + actualParamTypes.retType, flatParamTypes, false); +} + +std::vector AArch64ABI::getParamType(llvm::Type *type) const +{ + size_t typeSize = module_.getDataLayout().getTypeAllocSize(type); + + // Stage B: Pre-padding and extension of arguments + if (!isHFA(type) && !isHVA(type) && typeSize > 16) + { + // B.4. Passed via memory + return {llvm::PointerType::get(type, 0)}; + } + + if (type->isAggregateType()) + { + // B.5. Align to nearest multiple of 8 + typeSize = llvm::alignTo(typeSize, 8); + } + + // Stage C: Assignment of arguments to register and stack + // A lot of this happens in the backend. We only need aggregate + // handling. + if (isHFA(type)) + { + // Decompose the HFA + auto v = flattenHFA(type); + return {llvm::ArrayType::get(v[0], v.size())}; + } + else if (type->isAggregateType()) + { + // LLVM takes care of everything else in the backend + // C.11. This could be { int, float } or { int, int } -> use GP regs + size_t regsRequired = typeSize / 8; + auto i64 = llvm::Type::getInt64Ty(module_.getContext()); + if (regsRequired == 1) + { + return {i64}; + } + + return {llvm::ArrayType::get(i64, regsRequired)}; + } + + return {type}; +} + +llvm::Align AArch64ABI::getTypeAlign(llvm::Type *type) const +{ + // Opaque structs have no size + if (type->isStructTy() && type->getStructNumElements() == 0) + { + return llvm::Align(1); + } + + // _Alignof returns minimum align (e.g. _Alignof(int[4]) = 4, but LLVM + // defaults to giving 16, as int[4] >= 16 bytes) + int align = module_.getDataLayout().getPrefTypeAlign(type).value(); + + return llvm::Align(align); +} + +unsigned AArch64ABI::getTypeSize(AST::Types ty) const +{ + using Types = AST::Types; + + switch (ty) + { + case Types::VOID: + return 0; + case Types::BOOL: + return 1; + case Types::CHAR: + case Types::UNSIGNED_CHAR: + return 8; + case Types::SHORT: + case Types::UNSIGNED_SHORT: + return 16; + case Types::INT: + case Types::UNSIGNED_INT: + return 32; + case Types::LONG: + case Types::UNSIGNED_LONG: + // ABI 10.1.2: This depends on ILP32/LP64/LLP64. + // We don't support Windows, so use LP64 + case Types::LONG_LONG: + case Types::UNSIGNED_LONG_LONG: + return 64; + + case Types::FLOAT: + return 32; + case Types::DOUBLE: + return 64; + case Types::LONG_DOUBLE: + return 128; + }; + + throw std::runtime_error("Unknown type"); +} + +} // namespace CodeGen \ No newline at end of file diff --git a/src/CodeGen/CodeGenModule.cpp b/src/CodeGen/CodeGenModule.cpp index 3e9efdb..e169fd7 100644 --- a/src/CodeGen/CodeGenModule.cpp +++ b/src/CodeGen/CodeGenModule.cpp @@ -34,7 +34,8 @@ namespace CodeGen CodeGenModule::CodeGenModule( std::string sourceFile, std::string outputFile, - TypeMap &typeMap) + TypeMap &typeMap, + std::string targetTriple) : outputFile_(std::move(outputFile)), typeMap_(typeMap), context_(std::make_unique()), builder_(std::make_unique>(*context_)), @@ -47,7 +48,11 @@ CodeGenModule::CodeGenModule( llvm::InitializeAllAsmPrinters(); llvm::InitializeAllAsmParsers(); - auto targetTriple = llvm::sys::getDefaultTargetTriple(); + if (targetTriple.empty()) + { + targetTriple = llvm::sys::getDefaultTargetTriple(); + } + auto CPU = "generic"; auto features = ""; @@ -68,7 +73,22 @@ CodeGenModule::CodeGenModule( module_->setSourceFileName(sourceFile); module_->setModuleIdentifier(sourceFile); - abi_ = std::make_unique(*module_); + llvm::Triple triple = llvm::Triple(targetTriple); + + if (triple.isAArch64()) + { + abi_ = std::make_unique(*module_); + } + else if (triple.isX86()) + { + abi_ = std::make_unique(*module_); + } + else + { + llvm::errs() << "Warning: Target architecture not recognised, " + "defaulting to x86-64\n"; + abi_ = std::make_unique(*module_); + } } void CodeGenModule::emitLLVM() @@ -251,16 +271,17 @@ void CodeGenModule::visit(const FnDef &node) // arguments pushScope(); - std::vector paramTypes = getParamTypes(type); - auto fnParams = abi_->getFunctionParams(retType, paramTypes); + // Make distinction of RAW paramType (before ABI modifications) + std::vector rawParamTypes = getParamTypes(type); + auto fnParams = abi_->getFunctionParams(retType, rawParamTypes); unsigned argPtr = fnParams.structReturnInMemory ? 1 : 0; - for (size_t j = 0; j < type->params_->types_.size(); j++) + for (size_t j = 0; j < type->params_->size(); j++) { - std::string paramName = type->params_->types_.at(j).first; - llvm::Type *paramType = paramTypes.at(j); + std::string paramName = type->getParamName(j); + llvm::Type *rawParamType = rawParamTypes.at(j); - if (paramType->isStructTy() && fn->getArg(argPtr)->hasByValAttr()) + if (rawParamType->isStructTy() && fn->getArg(argPtr)->getType()->isPointerTy()) { symbolTablePush(paramName, fn->getArg(argPtr)); argPtr++; @@ -269,14 +290,22 @@ void CodeGenModule::visit(const FnDef &node) { llvm::AllocaInst *allocaInst; - if (paramType->isStructTy() && !fn->getArg(argPtr)->hasByValAttr()) + if (rawParamType->isStructTy() && + !fn->getArg(argPtr)->getType()->isPointerTy()) { std::vector tys = fnParams.paramTypes.at(j + fnParams.structReturnInMemory); - auto *assignedType = llvm::StructType::get(*context_, tys); + + // AArch64 uses ArrayType for HFA e.g. [2 x i64] + // x86-64 uses exploded structs e.g. [ i64, i64 ], pack them + auto *assignedType = + (tys[0]->isArrayTy()) + ? tys[0] + : llvm::StructType::get(*context_, tys); + llvm::AllocaInst *tempAlloca = createAlignedAlloca(assignedType); - allocaInst = createAlignedAlloca(paramType, paramName); + allocaInst = createAlignedAlloca(rawParamType, paramName); // Copy the params to an alloca of the (anonymous) struct type for (size_t i = 0; i < tys.size(); i++) @@ -292,14 +321,14 @@ void CodeGenModule::visit(const FnDef &node) // Copy the alloca to the actual alloca builder_->CreateMemCpy( allocaInst, - getAlign(paramType), + getAlign(rawParamType), tempAlloca, getAlign(assignedType), - module_->getDataLayout().getTypeAllocSize(paramType)); + module_->getDataLayout().getTypeAllocSize(rawParamType)); } else { - allocaInst = createAlignedAlloca(paramType, paramName); + allocaInst = createAlignedAlloca(rawParamType, paramName); builder_->CreateStore(fn->getArg(argPtr), allocaInst); argPtr++; } @@ -1105,6 +1134,18 @@ void CodeGenModule::visit(const FnCall &node) builder_->CreateLoad(types[j], gep)); } } + else if (types[0]->isArrayTy()) + { + auto zero = builder_->getInt32(0); + llvm::Value *gep = builder_->CreateInBoundsGEP( + ty, + argL, + {zero, zero} + ); + args.push_back( + builder_->CreateLoad(types[0], gep) + ); + } else if (fn->getArg(args.size())->hasByValAttr()) { // Struct passed by value @@ -1128,6 +1169,19 @@ void CodeGenModule::visit(const FnCall &node) args.push_back(argL); } } + else if (fn->getArg(args.size())->getType()->isPointerTy()) + { + // Struct passed by value, but not explictly using ByVal + llvm::AllocaInst *tempAlloca = createAlignedAlloca(ty); + builder_->CreateMemCpy( + tempAlloca, + getAlign(tempAlloca->getType()), + argL, + getAlign(argL->getType()), + module_->getDataLayout().getTypeAllocSize(ty) + ); + args.push_back(tempAlloca); + } else { // Struct with 1 element @@ -1173,6 +1227,14 @@ void CodeGenModule::visit(const Identifier &node) if (auto **alloca = std::get_if(&symbol)) { currentValue_ = *alloca; + + // Only for automatic arrays, for alignment purposes + if ((*alloca)->getAllocatedType()->isArrayTy()) + { + auto zero = builder_->getInt32(0); + currentValue_ = builder_->CreateInBoundsGEP( + (*alloca)->getAllocatedType(), currentValue_, {zero, zero}); + } } else if (auto **arg = std::get_if(&symbol)) { @@ -1263,13 +1325,15 @@ void CodeGenModule::visit(const Init &node) { if (i < str.size()) { - values.push_back(llvm::ConstantInt::get( - llvm::Type::getInt8Ty(*context_), str[i])); + values.push_back( + llvm::ConstantInt::get( + llvm::Type::getInt8Ty(*context_), str[i])); } else { - values.push_back(llvm::ConstantInt::get( - llvm::Type::getInt8Ty(*context_), 0)); + values.push_back( + llvm::ConstantInt::get( + llvm::Type::getInt8Ty(*context_), 0)); } } currentValue_ = llvm::ConstantArray::get( @@ -1442,8 +1506,9 @@ llvm::Constant *CodeGenModule::visitRecursiveConst(const InitList &node) // Fill the array with zeroes while (values.size() < type->getArrayNumElements()) { - values.push_back(llvm::Constant::getNullValue( - static_cast(type)->getElementType())); + values.push_back( + llvm::Constant::getNullValue( + static_cast(type)->getElementType())); } return llvm::ConstantArray::get( @@ -1452,8 +1517,9 @@ llvm::Constant *CodeGenModule::visitRecursiveConst(const InitList &node) for (size_t i = values.size(); i < type->getStructNumElements(); i++) { - values.push_back(llvm::Constant::getNullValue( - static_cast(type)->getElementType(i))); + values.push_back( + llvm::Constant::getNullValue( + static_cast(type)->getElementType(i))); } return llvm::ConstantStruct::get( @@ -1575,7 +1641,6 @@ void CodeGenModule::visit(const TernaryOp &node) bool isVoidType = getLLVMType(&node)->isVoidTy(); llvm::Value *cond = visitAsRValue(*node.cond_); - llvm::Value *zero = llvm::ConstantInt::get(cond->getType(), 0); llvm::Value *condBool = isNotZero(cond); // True - evaluate LHS. False - evaluate RHS. @@ -1639,9 +1704,8 @@ void CodeGenModule::visit(const UnaryOp &node) auto *expectedType = typeMap_[&node].get(); llvm::Type *type = getLLVMType(node.expr_.get()); bool isFloat = type->isFloatingPointTy(); - llvm::Value *one = - (isFloat) ? llvm::ConstantFP::get(type, 1.0) - : builder_->getInt32(1); + llvm::Value *one = (isFloat) ? llvm::ConstantFP::get(type, 1.0) + : builder_->getInt32(1); llvm::Value *zero = builder_->getInt32(0); switch (node.op_) @@ -1698,7 +1762,7 @@ void CodeGenModule::visit(const UnaryOp &node) ? builder_->CreateICmpEQ( expr, llvm::ConstantPointerNull::get( - static_cast(type)), + llvm::PointerType::get(*context_, 0)), "lnot") : builder_->CreateICmpEQ( expr, @@ -2186,9 +2250,8 @@ llvm::Function *CodeGenModule::createFunction( for (size_t i = argPtr; i < fnParams.paramTypes.size(); i++) { size_t astIndex = i - fnParams.structReturnInMemory; - std::string paramName = fnType->params_->types_[astIndex].first; - llvm::Type *paramType = - getLLVMType(fnType->params_->types_[astIndex].second.get()); + std::string paramName = fnType->getParamName(astIndex); + llvm::Type *paramType = getLLVMType(fnType->getParamType(astIndex)); // Label the function arguments for (size_t j = 0; j < fnParams.paramTypes[i].size(); j++) @@ -2208,15 +2271,19 @@ llvm::Function *CodeGenModule::createFunction( else if (fnParams.paramTypes[i][j]->isPointerTy()) { // This is a struct that is passed by memory - fn->addParamAttr( - argPtr, - llvm::Attribute::getWithByValType( - *context_, paramType)); - fn->addParamAttr( - argPtr, - llvm::Attribute::getWithAlignment( - *context_, - getAlign(llvm::PointerType::get(paramType, 0)))); + if (abi_->useByVal()) + { + fn->addParamAttr( + argPtr, + llvm::Attribute::getWithByValType( + *context_, paramType)); + fn->addParamAttr( + argPtr, + llvm::Attribute::getWithAlignment( + *context_, + getAlign( + llvm::PointerType::get(paramType, 0)))); + } fn->getArg(argPtr)->setName(paramName); } } diff --git a/src/CodeGen/ABI.cpp b/src/CodeGen/X86_64ABI.cpp similarity index 99% rename from src/CodeGen/ABI.cpp rename to src/CodeGen/X86_64ABI.cpp index 3284fca..4d45e11 100644 --- a/src/CodeGen/ABI.cpp +++ b/src/CodeGen/X86_64ABI.cpp @@ -1,4 +1,4 @@ -#include "CodeGen/ABI.hpp" +#include "CodeGen/X86_64ABI.hpp" // Refer to // https://lafibre.info/images/doc/202402_intel_application_binary_interface.pdf @@ -14,7 +14,7 @@ X86_64ABI::X86_64ABI(llvm::Module &module) : module_(module) } ABI::FunctionParamsInfo X86_64ABI::getFunctionParams( - llvm::Type *retType, + llvm:: Type *retType, std::vector ¶mTypes) const { // ABI 3.2.3: Parameter Passing diff --git a/src/main.cpp b/src/main.cpp index c4a40b9..f7f5a47 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ #define TCPP_IMPLEMENTATION #include "Preprocessor.hpp" -std::string preprocess(const std::string &sourcePath) +void preprocess(const std::string &sourcePath, const std::string &outputPath) { // Open sourcePath, spill contents as std::string std::ifstream ifs(sourcePath); @@ -29,7 +29,10 @@ std::string preprocess(const std::string &sourcePath) boost::filesystem::path(sourcePath).parent_path(); boost::filesystem::path includePath = sourceDir / path; - std::string processedCPath = preprocess(includePath.string()); + auto tempFile = boost::filesystem::temp_directory_path() / + boost::filesystem::unique_path(); + std::string processedCPath = tempFile.string(); + preprocess(includePath.string(), processedCPath); std::ifstream ifs(processedCPath); std::string processedC( (std::istreambuf_iterator(ifs)), @@ -43,57 +46,117 @@ std::string preprocess(const std::string &sourcePath) std::string processedC = preprocessor.Process(); - // Create a file with the preprocessed contents - auto tempFile = boost::filesystem::temp_directory_path() / - boost::filesystem::unique_path(); { - std::ofstream ofs(tempFile.string()); + std::ofstream ofs(outputPath); ofs << processedC; } +} + +void link(const std::string &sourcePath, const std::string &outputPath) +{ + FILE *pipe = popen("which clang", "r"); + if (!pipe) + { + std::cerr << "Error: failed to run which clang\n"; + return; + } + + char buffer[256]; + std::string clangPath; + if (fgets(buffer, sizeof(buffer), pipe)) + { + clangPath = buffer; + clangPath.erase( + std::remove(clangPath.begin(), clangPath.end(), '\n'), + clangPath.end()); + } + pclose(pipe); + + if (clangPath.empty()) + { + std::cerr << "Error: clang not found in PATH\n"; + return; + } + + std::cout << "Invoking: " << clangPath << std::endl; - return tempFile.string(); + // Calling std::system is not best practice, however, it works here + std::string cmd = clangPath + " " + sourcePath + " -o " + outputPath; + if (std::system(cmd.c_str()) != 0) + { + std::cerr << "Error: clang invocation failed\n"; + } } void compile( const std::string &sourcePath, - const std::string &out, - bool emitLLVM) + const std::string &outputPath, + const std::string &targetTriple, + bool emitLLVM, + bool useLinker, + bool print) { + bool needLinker = useLinker && !emitLLVM; + std::string outputPathCGM = outputPath; + + // Temporary file needed. *.c -> *.o -> a.out + if (needLinker) + { + auto tempFile = boost::filesystem::temp_directory_path() / + boost::filesystem::unique_path(); + outputPathCGM = tempFile.string(); + } + + auto tempFile = boost::filesystem::temp_directory_path() / + boost::filesystem::unique_path(); + std::string preprocessedPath = tempFile.string(); + // Preprocess the input - std::string preprocessedPath = preprocess(sourcePath); + preprocess(sourcePath, preprocessedPath); // Parse the AST const AST::TranslationUnit *tu = AST::parseAST(preprocessedPath); - // Print the AST - AST::Printer printer(std::cout); - tu->accept(printer); + if (print) + { + AST::Printer printer(std::cout); + tu->accept(printer); + } // Type check the AST CodeGen::TypeChecker typeChecker(std::cerr); tu->accept(typeChecker); // Code generation - CodeGen::CodeGenModule CGM(sourcePath, out, typeChecker.getTypeMap()); + CodeGen::CodeGenModule CGM( + sourcePath, outputPathCGM, typeChecker.getTypeMap(), targetTriple); tu->accept(CGM); if (emitLLVM) { - // Print the LLVM IR CGM.emitLLVM(); } else { CGM.emitObject(); } + + // Link (if possible) + if (needLinker) + { + link(outputPathCGM, outputPath); + } } int main(int argc, char **argv) { CLI::App app; std::string sourcePath; - std::string outputPath = "output.o"; + std::string outputPath; + std::string targetTriple; bool emitLLVM = false; + bool noLink = false; + bool print = false; // Options for the CLI @@ -101,14 +164,39 @@ int main(int argc, char **argv) app.add_option("source", sourcePath, "Source file path") ->required() ->check(CLI::ExistingFile); - app.add_option("-o", outputPath, "Output file path")->capture_default_str(); + app.add_option("-o", outputPath, "Output file path"); + app.add_flag( + "-c", noLink, "Only run preprocess, compile and assemble steps"); app.add_flag("-S", emitLLVM, "Emit LLVM IR instead of object code"); + app.add_flag("-v", print, "Show parser output"); + app.add_flag( + "--target", targetTriple, "Generate code for the given target"); CLI11_PARSE(app, argc, argv); - // Compile the input + // Follows conventions set by clang + if (outputPath.empty()) + { + std::filesystem::path p{sourcePath}; + std::string stem = p.stem().string(); + + // Descending order: assembly -> executable + if (emitLLVM) + { + outputPath = stem + ".ll"; + } + else if (noLink) + { + outputPath = stem + ".o"; + } + else + { + outputPath = "a.out"; + } + } + std::cout << "Compiling: " << sourcePath << std::endl; - compile(sourcePath, outputPath, emitLLVM); + compile(sourcePath, outputPath, targetTriple, emitLLVM, !noLink, print); std::cout << "Compiled to: " << outputPath << std::endl; return 0; diff --git a/test.py b/test.py index f53ea11..1ddc21b 100755 --- a/test.py +++ b/test.py @@ -474,9 +474,8 @@ def main(): Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True) Path(BUILD_FOLDER).mkdir(parents=True, exist_ok=True) - if not make(silent=args.short): - exit(3) - + # TODO: write function called setup()- make is a bit more complicated now + with JUnitXMLFile(J_UNIT_OUTPUT_FILE) as xml_file: run_tests(args, xml_file) diff --git a/unittests/CodeGen/ABITest.cpp b/unittests/CodeGen/X86_64ABITest.cpp similarity index 92% rename from unittests/CodeGen/ABITest.cpp rename to unittests/CodeGen/X86_64ABITest.cpp index f2a4bb1..45374fe 100644 --- a/unittests/CodeGen/ABITest.cpp +++ b/unittests/CodeGen/X86_64ABITest.cpp @@ -1,7 +1,7 @@ -#include "CodeGen/ABI.hpp" +#include "CodeGen/X86_64ABI.hpp" #include "gtest/gtest.h" -class ABITest : public ::testing::Test +class X86_64ABITest : public ::testing::Test { protected: void SetUp() override @@ -20,7 +20,7 @@ class ABITest : public ::testing::Test std::unique_ptr abi_; }; -TEST_F(ABITest, getFunctionType_Basic) +TEST_F(X86_64ABITest, getFunctionType_Basic) { auto retType = llvm::Type::getInt32Ty(*context_); auto paramTypes = @@ -31,7 +31,7 @@ TEST_F(ABITest, getFunctionType_Basic) EXPECT_EQ(type->getReturnType(), llvm::Type::getInt32Ty(*context_)); } -TEST_F(ABITest, getFunctionType_only6IntRegs) +TEST_F(X86_64ABITest, getFunctionType_only6IntRegs) { auto structType1Reg = llvm::StructType::create(*context_, "pair"); structType1Reg->setBody( @@ -119,7 +119,7 @@ TEST_F(ABITest, getFunctionType_only6IntRegs) } } -TEST_F(ABITest, getFunctionType_retval) +TEST_F(X86_64ABITest, getFunctionType_retval) { auto structType2Reg = llvm::StructType::create(*context_, "pair"); structType2Reg->setBody( @@ -139,7 +139,7 @@ TEST_F(ABITest, getFunctionType_retval) llvm::Type::getInt8Ty(*context_)})); } -TEST_F(ABITest, getParamType_Basic) +TEST_F(X86_64ABITest, getParamType_Basic) { auto type = abi_->getParamType(llvm::Type::getInt32Ty(*context_)); @@ -147,7 +147,7 @@ TEST_F(ABITest, getParamType_Basic) EXPECT_EQ(type[0], llvm::Type::getInt32Ty(*context_)); } -TEST_F(ABITest, getParamType_BasicStruct) +TEST_F(X86_64ABITest, getParamType_BasicStruct) { auto structType = llvm::StructType::create(*context_, "pair"); structType->setBody( @@ -160,7 +160,7 @@ TEST_F(ABITest, getParamType_BasicStruct) EXPECT_EQ(type[1], llvm::Type::getDoubleTy(*context_)); } -TEST_F(ABITest, getParamType_AlignStruct) +TEST_F(X86_64ABITest, getParamType_AlignStruct) { auto structType = llvm::StructType::create(*context_, "pair"); structType->setBody({ @@ -173,7 +173,7 @@ TEST_F(ABITest, getParamType_AlignStruct) EXPECT_EQ(type[0], llvm::Type::getInt64Ty(*context_)); } -TEST_F(ABITest, getParamType_ArrayStruct) +TEST_F(X86_64ABITest, getParamType_ArrayStruct) { auto structType = llvm::StructType::create(*context_, "pair"); structType->setBody( @@ -185,13 +185,13 @@ TEST_F(ABITest, getParamType_ArrayStruct) EXPECT_EQ(type[1], llvm::Type::getIntNTy(*context_, 24)); } -TEST_F(ABITest, getTypeAlign) +TEST_F(X86_64ABITest, getTypeAlign) { auto align = abi_->getTypeAlign(llvm::Type::getInt32Ty(*context_)); EXPECT_EQ(align, llvm::Align(4)); } -TEST_F(ABITest, getTypeSize) +TEST_F(X86_64ABITest, getTypeSize) { auto size = abi_->getTypeSize(AST::Types::INT); EXPECT_EQ(size, 32); diff --git a/writing-a-c-compiler-tests b/writing-a-c-compiler-tests index 01c77e8..ef1a0f0 160000 --- a/writing-a-c-compiler-tests +++ b/writing-a-c-compiler-tests @@ -1 +1 @@ -Subproject commit 01c77e82e7dd7101a8a82873e3e41d0357fb04d4 +Subproject commit ef1a0f0893331a42cd1579bf9ca37f653e33805f