diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e4c7adaaf..00b351391 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,13 +19,48 @@ if(NOT LLVM_IR2VEC) find_package(LLVM 17.0.0 REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) + + find_package(Clang 17.0.0 REQUIRED CONFIG) + message(STATUS "Found Clang ${CLANG_PACKAGE_VERSION}") + message(STATUS "Using ClangConfig.cmake in: ${CLANG_DIR}") + include_directories(SYSTEM ${CLANG_INCLUDE_DIRS}) + # llvm_map_components_to_libnames(llvm_libs all) - llvm_map_components_to_libnames(llvm_libs support core irreader analysis TransformUtils) + # llvm_map_components_to_libnames(llvm_libs support core irreader analysis TransformUtils) add_executable(${PROJECT_NAME} ${binsrc}) - target_link_libraries (${PROJECT_NAME} ${llvm_libs} objlib) + target_link_libraries ( + ${PROJECT_NAME} + PRIVATE + clangHandleCXX + clangFormat + clangIndex + clangDirectoryWatcher + clangFrontendTool + clangRewrite + clangCrossTU + clangASTMatchers + clangTooling + clangBasic + clangFrontend + clangDriver + clangSema + clangAST + clangCodeGen + clangSerialization + clangParse + clangStaticAnalyzerFrontend + clangStaticAnalyzerCheckers + clangStaticAnalyzerCore + clangAnalysis + clangARCMigrate + clangRewriteFrontend + clangEdit + clangLex + LLVM + objlib + ) target_include_directories(${PROJECT_NAME} PRIVATE .) add_library(objlib OBJECT ${libsrc}) diff --git a/src/FlowAware.cpp b/src/FlowAware.cpp index 8e1e539cd..ba99aabb6 100644 --- a/src/FlowAware.cpp +++ b/src/FlowAware.cpp @@ -46,13 +46,11 @@ void IR2Vec_FA::getTransitiveUse( if (auto use = dyn_cast(U)) { if (std::find(visitedList.begin(), visitedList.end(), use) == visitedList.end()) { - IR2VEC_DEBUG(outs() << "\nDef " << /* def << */ " "; - def->print(outs(), true); outs() << "\n";); - IR2VEC_DEBUG(outs() << "Use " << /* use << */ " "; - use->print(outs(), true); outs() << "\n";); if (isMemOp(use->getOpcodeName(), operandNum, memWriteOps) && use->getOperand(operandNum) == def) { writeDefsMap[root].push_back(use); + std::cout << "Found dependency - " << use->getOpcodeName() << " ON " + << root->getOpcodeName() << std::endl; } else if (isMemOp(use->getOpcodeName(), operandNum, memAccessOps) && use->getOperand(operandNum) == def) { getTransitiveUse(root, use, visitedList, toAppend); @@ -77,8 +75,6 @@ void IR2Vec_FA::collectWriteDefsMap(Module &M) { std::find(visitedList.begin(), visitedList.end(), &I) == visitedList.end()) { if (I.getNumOperands() > 0) { - IR2VEC_DEBUG(I.print(outs()); outs() << "\n"); - IR2VEC_DEBUG(outs() << "operandnum = " << operandNum << "\n"); if (auto parent = dyn_cast(I.getOperand(operandNum))) { if (std::find(visitedList.begin(), visitedList.end(), parent) == diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index 3946b1b6d..8a62c6c6c 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -10,11 +10,23 @@ #include "CollectIR.h" #include "FlowAware.h" #include "Symbolic.h" +#include "utils.h" #include "version.h" -#include "llvm/Support/CommandLine.h" #include #include +#include "llvm/Support/CommandLine.h" +#include +#include +#include +#include +#include +#include +#include + +#include +#include // For BasicAA + using namespace llvm; using namespace IR2Vec; @@ -37,6 +49,14 @@ cl::opt cl_collectIR( cl::opt cl_iname(cl::Positional, cl::desc("Input file path"), cl::Required, cl::cat(category)); +cl::opt cl_cpp("cpp", cl::Optional, + cl::desc("Input file is a .cpp file?"), cl::init(false), + cl::cat(category)); + +cl::opt cl_memdep("memdep", cl::Optional, + cl::desc("Running mem dep analysis on input .ll file"), + cl::init(false), cl::cat(category)); + cl::opt cl_oname("o", cl::Required, cl::desc("Output file path"), cl::cat(category)); // for on demand generation of embeddings taking function name @@ -70,9 +90,102 @@ void printVersion(raw_ostream &ostream) { cl::PrintVersionMessage(); } -int main(int argc, char **argv) { - cl::SetVersionPrinter(printVersion); - cl::HideUnrelatedOptions(category); +void generateSymEncodingsFunction(std::string funcName) { + auto M = getLLVMIR(); + IR2Vec_Symbolic SYM(*M); + std::ofstream o; + o.open(oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + SYM.generateSymbolicEncodingsForFunction(&o, funcName); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by on-demand generation of symbolic encodings " + "is: %.6f " + "seconds.\n", + elapsed); + } else { + SYM.generateSymbolicEncodingsForFunction(&o, funcName); + } + o.close(); +} + +void generateFAEncodingsFunction(std::string funcName) { + auto M = getLLVMIR(); + IR2Vec_FA FA(*M); + std::ofstream o, missCount, cyclicCount; + o.open(oname, std::ios_base::app); + missCount.open("missCount_" + oname, std::ios_base::app); + cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, + &cyclicCount); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by on-demand generation of flow-aware encodings " + "is: %.6f " + "seconds.\n", + elapsed); + } else { + FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, + &cyclicCount); + } + o.close(); +} + +void generateFAEncodings() { + auto M = getLLVMIR(); + IR2Vec_FA FA(*M); + std::ofstream o, missCount, cyclicCount; + o.open(oname, std::ios_base::app); + missCount.open("missCount_" + oname, std::ios_base::app); + cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by normal generation of flow-aware encodings " + "is: %.6f " + "seconds.\n", + elapsed); + } else { + FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); + } + o.close(); +} + +void generateSYMEncodings() { + auto M = getLLVMIR(); + IR2Vec_Symbolic SYM(*M); + std::ofstream o; + o.open(oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + SYM.generateSymbolicEncodings(&o); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by normal generation of symbolic encodings is: " + "%.6f " + "seconds.\n", + elapsed); + } else { + SYM.generateSymbolicEncodings(&o); + } + o.close(); +} + +void collectIRfunc() { + auto M = getLLVMIR(); + CollectIR cir(M); + std::ofstream o; + o.open(oname, std::ios_base::app); + cir.generateTriplets(o); + o.close(); +} + +void setGlobalVars(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv); fa = cl_fa; @@ -89,111 +202,250 @@ int main(int argc, char **argv) { WT = cl_WT; debug = cl_debug; printTime = cl_printTime; + cpp_input = cl_cpp; + memdep = cl_memdep; +} +void checkFailureConditions() { bool failed = false; - if (!((sym ^ fa) ^ collectIR)) { - errs() << "Either of sym, fa or collectIR should be specified\n"; + + if (!(sym || fa || collectIR)) { + errs() << "Either of sym, fa, or collectIR should be specified\n"; failed = true; } + if (failed) + exit(1); + if (sym || fa) { if (level != 'p' && level != 'f') { errs() << "Invalid level specified: Use either p or f\n"; failed = true; } } else { - if (!collectIR) { - errs() << "Either of sym, fa or collectIR should be specified\n"; - failed = true; - } else if (level) + // assert collectIR is True. Else + assert(collectIR == true); + + if (collectIR && level) { errs() << "[WARNING] level would not be used in collectIR mode\n"; + } } if (failed) exit(1); +} + +void checkModuleFunctions(llvm::Module &M) { + + // std::cout << "MDA: Module loaded successfully " << (M.getName()).data() << + // std::endl; + + // std::cout << "Instruction Count " << M.getInstructionCount() << std::endl; + + int count = 0; + + PassBuilder PB; + FunctionAnalysisManager FAM; + + // We need to initialize the other pass managers even if we don't directly use + // them + LoopAnalysisManager LAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + // Register all the passes with the PassBuilder + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + // Register required alias analyses and memory dependence analysis + FAM.registerPass([] { return MemoryDependenceAnalysis(); }); + FAM.registerPass([] { return BasicAA(); }); // Basic Alias Analysis + + for (auto &F : M) { + count += 1; + if (!F.isDeclaration()) { + // std::cout << "ENTERING FOR MEMDEPRESULTS" << std::endl; + auto &MDR = FAM.getResult(F); + + // std::cout << "TESTING FOR MEMDEPRESULTS :: MDR ready" << std::endl; + // std::cout << "getDefaultBlockScanLimit() " << + // MDR.getDefaultBlockScanLimit() << std::endl; + + for (BasicBlock &BB : F) { + // std::cout << "TESTING FOR MEMDEPRESULTS :: BASIC BLOCK" << std::endl; + for (Instruction &I : BB) { + // std::cout << "TESTING FOR MEMDEPRESULTS" << std::endl; + // Get the memory dependence information for the instruction + MemDepResult memdep = MDR.getDependency(&I); + + if (!memdep.getInst()) { + // std::cout << "No memory dependence found for " << + // I.getOpcodeName() << std::endl; + continue; + } else { + std::cout << "Found Dependency - " << I.getOpcodeName() << " ON " + << memdep.getInst()->getOpcodeName() << std::endl; + } + } + } + } + } + // std::cout << "Total functions: " << count << std::endl; +} + +void runMDA() { auto M = getLLVMIR(); - // newly added + + // check if M is a vaid module or not + if (!M) { + std::cout << "Invalid module" << std::endl; + return; + } + + checkModuleFunctions(*M); +} + +// bool check_file(std::string filename) { +// std::ifstream file(filename); +// return file.good(); +// } + +// using namespace clang; +// void generateLLVMIR(const std::string &cppFilePath) { +// // Initialize targets +// InitializeNativeTarget(); +// InitializeNativeTargetAsmPrinter(); + +// // Create the compiler instance +// CompilerInstance compiler; +// llvm::LLVMContext context; +// // Diagnostics +// auto diagOpts = std::make_shared(); +// auto diagID = new DiagnosticIDs(); +// auto diagClient = new TextDiagnosticPrinter(llvm::errs(), &*diagOpts); +// DiagnosticsEngine diags(diagID, &*diagOpts, diagClient); + +// // Create the driver +// std::string tripleStr = llvm::sys::getDefaultTargetTriple(); +// driver::Driver driver("clang", tripleStr, diags); + +// // Build the compilation +// std::vector args = { +// "clang", // Dummy executable name +// "-emit-llvm", +// "-O0", +// "-c", +// cppFilePath.c_str() +// }; + +// std::unique_ptr +// compilation(driver.BuildCompilation(args)); if (!compilation) { +// std::cerr << "Error building compilation" << std::endl; +// return; +// } + +// const driver::JobList &jobs = compilation->getJobs(); +// if (jobs.size() != 1) { +// std::cerr << "Expected a single job, but got " << jobs.size() << +// std::endl; return; +// } + +// const driver::Command &cmd = llvm::cast(*jobs.begin()); + +// // Create compiler invocation from the job's arguments +// std::shared_ptr invocation = +// std::make_shared(); +// CompilerInvocation::CreateFromArgs(*invocation, cmd.getArguments(), +// diags); compiler.setInvocation(invocation); + +// // Set up the target options (this part can be expanded for +// cross-compilation) compiler.getTargetOpts().Triple = +// llvm::sys::getDefaultTargetTriple(); + +// // Create and execute the action (generating LLVM IR) +// auto codeGenAction = std::make_unique(&context); + +// if (!compiler.ExecuteAction(*codeGenAction)) { +// std::cerr << "Error generating LLVM IR" << std::endl; +// return; +// } + +// // Get the generated LLVM module +// std::unique_ptr module = codeGenAction->takeModule(); +// if (!module) { +// std::cerr << "Error: Failed to take LLVM module" << std::endl; +// return; +// } + +// // Output the LLVM IR to a file or stdout +// std::error_code EC; +// llvm::raw_fd_ostream output("output.ll", EC, llvm::sys::fs::OF_None); +// if (EC) { +// std::cerr << "Error: " << EC.message() << std::endl; +// return; +// } + +// module->print(output, nullptr); +// std::cout << "LLVM IR has been generated and saved to output.ll" << +// std::endl; +// } + +// void writeModuleToFile(llvm::Module *M, const std::string &filename) { +// std::error_code EC; +// llvm::raw_fd_ostream OS(filename, EC, llvm::sys::fs::OF_TextWithCRLF); + +// if (EC) { +// llvm::errs() << "Could not open file: " << EC.message() << "\n"; +// return; +// } + +// M->print(OS, nullptr); // Use the print function to write the LLVM IR in +// text form OS.flush(); +// } + +int main(int argc, char **argv) { + cl::SetVersionPrinter(printVersion); + cl::HideUnrelatedOptions(category); + + setGlobalVars(argc, argv); + + checkFailureConditions(); + + // return 0; + + if (memdep) { + runMDA(); + return 0; + } + // runMDA(); + // return 0; + + // generateLLVMIR(iname.c_str()); + + // std::cout << "Code reached beyond llvm ir output" << std::endl; + + // auto module = Act->getModule(); + + // if (module == NULL) { + // std::cout << "Error in getModule" << std::endl; + // return 0; + // } + + // // newly added if (sym && !(funcName.empty())) { - IR2Vec_Symbolic SYM(*M); - std::ofstream o; - o.open(oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - SYM.generateSymbolicEncodingsForFunction(&o, funcName); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by on-demand generation of symbolic encodings " - "is: %.6f " - "seconds.\n", - elapsed); - } else { - SYM.generateSymbolicEncodingsForFunction(&o, funcName); - } - o.close(); + generateSymEncodingsFunction(funcName); } else if (fa && !(funcName.empty())) { - IR2Vec_FA FA(*M); - std::ofstream o, missCount, cyclicCount; - o.open(oname, std::ios_base::app); - missCount.open("missCount_" + oname, std::ios_base::app); - cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, - &cyclicCount); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by on-demand generation of flow-aware encodings " - "is: %.6f " - "seconds.\n", - elapsed); - } else { - FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, - &cyclicCount); - } - o.close(); + generateFAEncodingsFunction(funcName); } else if (fa) { - IR2Vec_FA FA(*M); - std::ofstream o, missCount, cyclicCount; - o.open(oname, std::ios_base::app); - missCount.open("missCount_" + oname, std::ios_base::app); - cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by normal generation of flow-aware encodings " - "is: %.6f " - "seconds.\n", - elapsed); - } else { - FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); - } - o.close(); + generateFAEncodings(); } else if (sym) { - IR2Vec_Symbolic SYM(*M); - std::ofstream o; - o.open(oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - SYM.generateSymbolicEncodings(&o); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by normal generation of symbolic encodings is: " - "%.6f " - "seconds.\n", - elapsed); - } else { - SYM.generateSymbolicEncodings(&o); - } - o.close(); + generateSYMEncodings(); } else if (collectIR) { - CollectIR cir(M); - std::ofstream o; - o.open(oname, std::ios_base::app); - cir.generateTriplets(o); - o.close(); + collectIRfunc(); } - return 0; + // return 0; } diff --git a/src/include/utils.h b/src/include/utils.h index ab7921ddf..b216489f4 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -13,12 +13,40 @@ #include "llvm/Demangle/Demangle.h" //for getting function base name #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include -#include +#include +#include +#include +#include +#include +#include +#include + +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/Tool.h" + +#include +#include +#include +#include +#include + +#include +#include +#include #include +#include namespace IR2Vec { @@ -46,8 +74,13 @@ extern float WO; extern float WA; extern float WT; extern bool debug; +extern bool cpp_input; +extern bool memdep; extern std::map opcMap; +std::unique_ptr readCPPtoIR(const char *FileName); std::unique_ptr getLLVMIR(); +// std::unique_ptr readCPP(); +std::unique_ptr readIR(); void scaleVector(Vector &vec, float factor); // newly added std::string getDemagledName(const llvm::Function *function); diff --git a/src/test-suite/CMakeLists.txt b/src/test-suite/CMakeLists.txt index c1e5d114a..8a72f4660 100644 --- a/src/test-suite/CMakeLists.txt +++ b/src/test-suite/CMakeLists.txt @@ -69,12 +69,17 @@ endif() # sanity checks and lit configs configure_file(sanity_check.sh.cmake sanity_check.sh @ONLY) file(COPY PE-benchmarks-llfiles-llvm17 DESTINATION ./) +file(COPY PE-benchmarks DESTINATION ./) file(COPY sqlite3.ll DESTINATION ./) file(COPY oracle DESTINATION ./) file(COPY ../../vocabulary DESTINATION ./) file(COPY index-llvm17.files DESTINATION ./) +file(COPY index-llvm17-source.files DESTINATION ./) configure_file(lit.site.cfg.py.in lit.site.cfg.py @ONLY) file(COPY test-lit.py DESTINATION ./) -file(COPY test-ir2vec.lit DESTINATION ./) +file(COPY test-fa.lit DESTINATION ./) +file(COPY test-sym.lit DESTINATION ./) +# file(COPY test-cpp-sym.lit DESTINATION ./) +# file(COPY test-cpp-fa.lit DESTINATION ./) diff --git a/src/test-suite/index-llvm17-source.files b/src/test-suite/index-llvm17-source.files new file mode 100644 index 000000000..7a434920b --- /dev/null +++ b/src/test-suite/index-llvm17-source.files @@ -0,0 +1,118 @@ +./PE-benchmarks/channel-assignment.cpp +./PE-benchmarks/find-two-non-repeating-element.cpp +./PE-benchmarks/aho-corasick-algorithm.cpp +./PE-benchmarks/count-possible-ways-to-construct-buildings.cpp +./PE-benchmarks/little-and-big-endian-mystery.cpp +./PE-benchmarks/rat-in-a-maze.cpp +./PE-benchmarks/word-wrap.cpp +./PE-benchmarks/strongly-connected-components.cpp +./PE-benchmarks/z-algorithm-linear-time.cpp +./PE-benchmarks/n-queen-problem.cpp +./PE-benchmarks/shortest-common-supersequence.cpp +./PE-benchmarks/topological-sorting.cpp +./PE-benchmarks/binomial-coefficient.cpp +./PE-benchmarks/find-k-closest-elements-given-value.cpp +./PE-benchmarks/find-length-of-the-longest-consecutive-path-in-a-character-matrix.cpp +./PE-benchmarks/longest-bitonic-subsequence.cpp +./PE-benchmarks/rotate-bits-of-an-integer.cpp +./PE-benchmarks/graph-coloring.cpp +./PE-benchmarks/trie-suffixes.cpp +./PE-benchmarks/biconnectivity.cpp +./PE-benchmarks/weighted-job-scheduling.cpp +./PE-benchmarks/minimum-cost-polygon-triangulation.cpp +./PE-benchmarks/Find_the_closest_pair_from_two_sorted_arrays.cpp +./PE-benchmarks/binary-insertion-sort.cpp +./PE-benchmarks/count-of-n-digit-numbers-whose-sum-of-digits-equals-to-given-sum.cpp +./PE-benchmarks/longest-path-directed-acyclic-graph.cpp +./PE-benchmarks/find-common-elements-three-sorted-arrays.cpp +./PE-benchmarks/find-minimum-number-of-coins-that-make-a-change.cpp +./PE-benchmarks/naive-algorithm.cpp +./PE-benchmarks/sudoku.cpp +./PE-benchmarks/detect-cycle-undirected-graph.cpp +./PE-benchmarks/coin-change.cpp +./PE-benchmarks/longest-palindromic-subsequence.cpp +./PE-benchmarks/minimum-positive-points-to-reach-destination.cpp +./PE-benchmarks/karatsuba.cpp +./PE-benchmarks/kmp-algorithm.cpp +./PE-benchmarks/quicksort-for-linked-list.cpp +./PE-benchmarks/detect-cycle-in-a-graph.cpp +./PE-benchmarks/hamiltonian-cycle-backtracking.cpp +./PE-benchmarks/tug-of-war.cpp +./PE-benchmarks/Iterative_QuickSort.cpp +./PE-benchmarks/tower-of-hanoi.cpp +./PE-benchmarks/tarjan-algorithm.cpp +./PE-benchmarks/maximum-sum-increasing-subsequence.cpp +./PE-benchmarks/edit-distance.cpp +./PE-benchmarks/finite-automata-algorithm.cpp +./PE-benchmarks/snake-ladder.cpp +./PE-benchmarks/m-coloring-problem.cpp +./PE-benchmarks/boolean-parenthesization-problem.cpp +./PE-benchmarks/largest-sum-contiguous-subarray.cpp +./PE-benchmarks/minimum-cut-in-a-directed-graph.cpp +./PE-benchmarks/mobile-numeric-keypad-problem_space_optm.cpp +./PE-benchmarks/count-number-binary-strings-without-consecutive-1s.cpp +./PE-benchmarks/eulerian-path-and-circuit.cpp +./PE-benchmarks/vertex-cover-problem.cpp +./PE-benchmarks/largest-independent-set-problem.cpp +./PE-benchmarks/permutations-of-a-given-string.cpp +./PE-benchmarks/reservoir-sampling.cpp +./PE-benchmarks/mergeSort_LinkedList.cpp +./PE-benchmarks/subset-sum-problem.cpp +./PE-benchmarks/optimized-naive-algorithm.cpp +./PE-benchmarks/collect-maximum-points-in-a-grid-using-two-traversals.cpp +./PE-benchmarks/transitive-closure-of-a-graph.cpp +./PE-benchmarks/rabin-karp-algorithm.cpp +./PE-benchmarks/sort-array-wave-form-2.cpp +./PE-benchmarks/lexicographic-rank-of-a-string.cpp +./PE-benchmarks/the-knights-tour.cpp +./PE-benchmarks/maximum-size-sub-matrix-with-all-1s-in-a-binary-matrix.cpp +./PE-benchmarks/union-find.cpp +./PE-benchmarks/egg-dropping-puzzle.cpp +./PE-benchmarks/optimal-binary-search-tree.cpp +./PE-benchmarks/quicksort-on-singly-linked-list.cpp +./PE-benchmarks/insertion-sort-for-singly-linked-list.cpp +./PE-benchmarks/dfa-based-division.cpp +./PE-benchmarks/euler-circuit-directed-graph.cpp +./PE-benchmarks/kth-smallestlargest-element-unsorted-array-set-2-expected-linear-time.cpp +./PE-benchmarks/sorted-array-number-x-find-pair-array-whose-sum-closest-x.cpp +./PE-benchmarks/boyer-moore-algorithm.cpp +./PE-benchmarks/minimum-number-of-jumps-to-reach-end-of-a-given-array.cpp +./PE-benchmarks/ugly-numbers.cpp +./PE-benchmarks/min-cost-path.cpp +./PE-benchmarks/magic-square.cpp +./PE-benchmarks/box-stacking.cpp +./PE-benchmarks/longest-palindrome-substring.cpp +./PE-benchmarks/merge-sort-for-doubly-linked-list.cpp +./PE-benchmarks/floyd-warshall.cpp +./PE-benchmarks/construction-of-lcp-array-from-suffix-array.cpp +./PE-benchmarks/program-wish-womens-day.cpp +./PE-benchmarks/maximum-profit-by-buying-and-selling-a-share-at-most-twice.cpp +./PE-benchmarks/bfs.cpp +./PE-benchmarks/boruvkas-algorithm.cpp +./PE-benchmarks/kth-smallestlargest-element-unsorted-array-set-3-worst-case-linear-time.cpp +./PE-benchmarks/sieve-of-eratosthenes.cpp +./PE-benchmarks/find-parity.cpp +./PE-benchmarks/birthday-paradox.cpp +./PE-benchmarks/anagram-substring-search-search-permutations.cpp +./PE-benchmarks/dfs.cpp +./PE-benchmarks/program-for-nth-fibonacci-number.cpp +./PE-benchmarks/partition-problem.cpp +./PE-benchmarks/count-1s-sorted-binary-array.cpp +./PE-benchmarks/maximum-length-chain-of-pairs.cpp +./PE-benchmarks/mobile-numeric-keypad-problem.cpp +./PE-benchmarks/matrix-chain-multiplication.cpp +./PE-benchmarks/Nearly_sorted_Algo.cpp +./PE-benchmarks/bellman-ford-algorithm.cpp +./PE-benchmarks/subset-sum.cpp +./PE-benchmarks/maximum-sum-rectangle-in-a-2d-matrix.cpp +./PE-benchmarks/count-ways-reach-nth-stair.cpp +./PE-benchmarks/palindrome-partitioning.cpp +./PE-benchmarks/cut-vertices.cpp +./PE-benchmarks/longest-increasing-subsequence.cpp +./PE-benchmarks/minimum-adjacent-swaps-to-move-maximum-and-minimum-to-corners.cpp +./PE-benchmarks/longest-even-length-substring-sum-first-second-half.cpp +./PE-benchmarks/sort-n-numbers-range-0-n2-1-linear-time.cpp +./PE-benchmarks/total-number-of-non-decreasing-numbers-with-n-digits.cpp +./PE-benchmarks/cutting-a-rod.cpp +./PE-benchmarks/overlapping-subproblems-property.cpp +./PE-benchmarks/efficient-constructtion-of-finite-automata.cpp diff --git a/src/test-suite/sanity_check.sh.cmake b/src/test-suite/sanity_check.sh.cmake index db6e9b525..e03d7d4b2 100644 --- a/src/test-suite/sanity_check.sh.cmake +++ b/src/test-suite/sanity_check.sh.cmake @@ -35,35 +35,121 @@ functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" "selectKItems" "getMinDiceThrows" "countSort" "subset_sum" "SolveSudoku" "SCC" "solveKTUtil" "topologicalSort" "transitiveClosure" "insertSuffix" "tugOfWar" "isUgly" "Union" "printVertexCover" "findMaxProfit" "solveWordWrap") -perform_vector_comparison() { - LEVEL=$1 - FILE_PREFIX=$2 +perform_program_vector_comparison_cpp() { + LEVEL="p" + FILE_PREFIX="p" echo -e "${BLUE}${BOLD}Running ir2vec on ${FILE_PREFIX}-level for ${EncodingType} encoding type" ORIG_FILE=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/ir2vec.txt - VIR_FILE=ir2vec_${FILE_PREFIX}.txt + VIR_FILE_CPP=ir2vec_${FILE_PREFIX}_${PASS}_CPP.txt + + # Generate IR2Vec embeddings through c++ input + while IFS= read -r d; do + ${IR2VEC_PATH} -${PASS} -cpp -level ${LEVEL} -o ${VIR_FILE_CPP} ${d} &> /dev/null + done < index-${SEED_VERSION}-source.files + wait + + TEMP=temp_${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}_${PASS}_CPP + if ls *${VIR_FILE_CPP} 1> /dev/null 2>&1; then + mkdir -p ${TEMP} + mv *${VIR_FILE_CPP} ${TEMP}/ + + d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${ORIG_FILE}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${VIR_FILE_CPP})) + if [ "$d" == "" ]; then + echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of CPP ${FILE_PREFIX}-level are Identical.${NC}" + else + echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of CPP ${FILE_PREFIX}-level are Different.${NC}" + exit 1 + fi + else + echo -e "$(tput bold)${RED}[Error] No CPP embeddings are generated.${NC}" + exit 1 + fi + rm -rf ${TEMP} +} + +perform_program_vector_comparison() { + LEVEL="p" + FILE_PREFIX="p" + + echo -e "${BLUE}${BOLD}Running ir2vec on ${FILE_PREFIX}-level for ${EncodingType} encoding type" + + ORIG_FILE=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/ir2vec.txt + VIR_FILE=ir2vec_${FILE_PREFIX}_${PASS}.txt # SQLite specific variables. if [[ "$ENABLE_SQLITE" == "ON" ]]; then - SQLITE_VIR=sqlite3_${FILE_PREFIX}.txt + SQLITE_VIR=sqlite3_${FILE_PREFIX}_${PASS}.txt SQLITE_INPUT=./sqlite3.ll SQLITE_ORIG=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/sqlite3.txt fi + # Generate IR2Vec embeddings through IR input + while IFS= read -r d; do + ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null + done < index-${SEED_VERSION}.files + wait + + # SQLITE is currently only tested against the program (p) level + if [[ "$ENABLE_SQLITE" == "ON" && "$FILE_PREFIX" == "p" ]]; then + ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${SQLITE_VIR} ${SQLITE_INPUT} &> /dev/null + fi + + TEMP=temp_${EncodingType}_${SEED_VERSION}_${FILE_PREFIX} + if ls *${VIR_FILE} 1> /dev/null 2>&1; then + mkdir -p ${TEMP} + mv *${VIR_FILE} ${TEMP}/ + + d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${ORIG_FILE}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${VIR_FILE})) + if [ "$d" == "" ]; then + echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" + else + echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" + exit 1 + fi + else + echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" + exit 1 + fi + + # SQLite tests only if its enabled + if [[ "$ENABLE_SQLITE" == "ON" ]]; then + if [[ ! -e "$SQLITE_VIR" ]]; then + echo -e "$(tput bold)${RED}[Error] No embeddings are generated for SQLite benchmark.${NC}" + exit 1 + fi + mv ${SQLITE_VIR} ${TEMP}/ + + d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${SQLITE_ORIG}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${SQLITE_VIR})) + + if [ "$d" == "" ]; then + echo -e "${GREEN}${BOLD}[Test Passed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" + else + echo -e "$(tput bold)${RED}[Test Failed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" + exit 1 + fi + fi + rm -rf ${TEMP} +} + +perform_function_vector_comparison() { + LEVEL=$1 + FILE_PREFIX=$2 + + echo -e "${BLUE}${BOLD}Running ir2vec on ${FILE_PREFIX}-level for ${EncodingType} encoding type" + + ORIG_FILE=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/ir2vec.txt + VIR_FILE=ir2vec_${FILE_PREFIX}_${PASS}.txt + # if file prefix is p or f, run the first while loop, else, run the second while loop - if [[ "$FILE_PREFIX" == "p" || "$FILE_PREFIX" == "f" ]]; then + if [[ "$FILE_PREFIX" == "f" ]]; then while IFS= read -r d; do ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null done < index-${SEED_VERSION}.files wait - - # SQLITE is currently only tested against the program (p) level - if [[ "$ENABLE_SQLITE" == "ON" && "$FILE_PREFIX" == "p" ]]; then - ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${SQLITE_VIR} ${SQLITE_INPUT} &> /dev/null - fi else while IFS= read -r d_on do @@ -76,62 +162,24 @@ perform_vector_comparison() { fi TEMP=temp_${EncodingType}_${SEED_VERSION}_${FILE_PREFIX} - if [[ "$LEVEL" == "p" ]]; then - if ls *${VIR_FILE} 1> /dev/null 2>&1; then - mkdir -p ${TEMP} - mv *${VIR_FILE} ${TEMP}/ - - d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${ORIG_FILE}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${VIR_FILE})) - if [ "$d" == "" ]; then - echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" - else - echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - exit 1 - fi - else - echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" - exit 1 - fi - - # SQLite tests only if its enabled - if [[ "$ENABLE_SQLITE" == "ON" ]]; then - if [[ ! -e "$SQLITE_VIR" ]]; then - echo -e "$(tput bold)${RED}[Error] No embeddings are generated for SQLite benchmark.${NC}" - exit 1 - fi - mv ${SQLITE_VIR} ${TEMP}/ - - d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${SQLITE_ORIG}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${SQLITE_VIR})) - - if [ "$d" == "" ]; then - echo -e "${GREEN}${BOLD}[Test Passed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" - else - echo -e "$(tput bold)${RED}[Test Failed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - exit 1 - fi - fi - - else - if ls *${VIR_FILE} 1> /dev/null 2>&1 + if ls *${VIR_FILE} 1> /dev/null 2>&1; then + mkdir -p ${TEMP} + mv *${VIR_FILE} ${TEMP}/ + # removing demangled file and function names before '=' + sed 's/.*=//' ${ORIG_FILE} > orig_file_${FILE_PREFIX}.txt + sed 's/.*=//' ${TEMP}/${VIR_FILE}> vir_file_${FILE_PREFIX}.txt + d_f=$(diff orig_file_${FILE_PREFIX}.txt vir_file_${FILE_PREFIX}.txt ) + + if [ "$d_f" == "" ] then - mkdir -p ${TEMP} - mv *${VIR_FILE} ${TEMP}/ - # removing demangled file and function names before '=' - sed 's/.*=//' ${ORIG_FILE} > orig_file_${FILE_PREFIX}.txt - sed 's/.*=//' ${TEMP}/${VIR_FILE}> vir_file_${FILE_PREFIX}.txt - d_f=$(diff orig_file_${FILE_PREFIX}.txt vir_file_${FILE_PREFIX}.txt ) - - if [ "$d_f" == "" ] - then - echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" - - else - echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - exit 1 - fi + echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" else - echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" + echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" exit 1 fi + else + echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" + exit 1 fi + rm -rf ${TEMP} } diff --git a/src/test-suite/test-cpp-fa.lit b/src/test-suite/test-cpp-fa.lit new file mode 100644 index 000000000..446c7ebf9 --- /dev/null +++ b/src/test-suite/test-cpp-fa.lit @@ -0,0 +1,5 @@ +// RUN: bash %s FA llvm17 + +source sanity_check.sh + +perform_program_vector_comparison_cpp diff --git a/src/test-suite/test-cpp-sym.lit b/src/test-suite/test-cpp-sym.lit new file mode 100644 index 000000000..dcef1ab55 --- /dev/null +++ b/src/test-suite/test-cpp-sym.lit @@ -0,0 +1,5 @@ +// RUN: bash %s SYM llvm17 + +source sanity_check.sh + +perform_program_vector_comparison_cpp diff --git a/src/test-suite/test-fa.lit b/src/test-suite/test-fa.lit new file mode 100644 index 000000000..8ae7f6b80 --- /dev/null +++ b/src/test-suite/test-fa.lit @@ -0,0 +1,7 @@ +// RUN: bash %s FA llvm17 + +source sanity_check.sh + +perform_program_vector_comparison +perform_function_vector_comparison "f" "f" +perform_function_vector_comparison "f" "onDemand" diff --git a/src/test-suite/test-ir2vec.lit b/src/test-suite/test-ir2vec.lit deleted file mode 100644 index bdae9a625..000000000 --- a/src/test-suite/test-ir2vec.lit +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: bash %s FA llvm17 -// RUN: bash %s SYM llvm17 - -source sanity_check.sh - -perform_vector_comparison "p" "p" -perform_vector_comparison "f" "f" -perform_vector_comparison "f" "onDemand" diff --git a/src/test-suite/test-sym.lit b/src/test-suite/test-sym.lit new file mode 100644 index 000000000..647936fe8 --- /dev/null +++ b/src/test-suite/test-sym.lit @@ -0,0 +1,7 @@ +// RUN: bash %s SYM llvm17 + +source sanity_check.sh + +perform_program_vector_comparison +perform_function_vector_comparison "f" "f" +perform_function_vector_comparison "f" "onDemand" diff --git a/src/utils.cpp b/src/utils.cpp index 515afeb38..6550c8df8 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -10,7 +10,10 @@ #include "IR2Vec.h" #include "vocabulary.h" #include +#include +#include // for std::stringstream #include + using namespace llvm; using namespace IR2Vec; @@ -27,17 +30,37 @@ float IR2Vec::WO; float IR2Vec::WA; float IR2Vec::WT; bool IR2Vec::debug; +bool IR2Vec::cpp_input; +bool IR2Vec::memdep; + +// static std::string temp_ll_file = "/tmp/temp_ir.ll"; + std::map IR2Vec::opcMap = IR2Vec::Vocabulary::getVocabulary(); -std::unique_ptr IR2Vec::getLLVMIR() { + +std::unique_ptr IR2Vec::readIR() { + static llvm::LLVMContext context; SMDiagnostic err; - static LLVMContext context; auto M = parseIRFile(iname, err, context); if (!M) { err.print(iname.c_str(), outs()); exit(1); } + + return M; +} + +std::unique_ptr IR2Vec::getLLVMIR() { + + // auto M = cpp_input ? readCPP() : readIR(); + auto M = readIR(); + + if (!M) { + errs() << "Error generating LLVM IR. \n"; + exit(1); + } + return M; } @@ -95,3 +118,194 @@ std::string IR2Vec::updatedRes(IR2Vec::Vector tmp, llvm::Function *f, return res; } + +std::string GetExecutablePath(const char *Argv0, void *MainAddr) { + return llvm::sys::fs::getMainExecutable(Argv0, MainAddr); +} + +void testReturnAddrFunction() { return; } +llvm::ExitOnError ExitOnErr; + +using namespace clang; +std::unique_ptr IR2Vec::readCPPtoIR(const char *fileName) { + + llvm::LLVMContext llvmContext; + // This just needs to be some symbol in the binary; C++ doesn't + // allow taking the address of ::main however. + void *MainAddr = (void *)(intptr_t)testReturnAddrFunction; + // std::string Path = GetExecutablePath(fileName, MainAddr); + std::string Path = fileName; + std::cout << "ExecutablePath " << Path << std::endl; + + llvm::IntrusiveRefCntPtr DiagOpts( + new clang::DiagnosticOptions()); + DiagOpts->ShowColors = true; + DiagOpts->ShowCarets = true; + DiagOpts->ShowOptionNames = true; + DiagOpts->VerifyDiagnostics = true; + DiagOpts->ShowFixits = true; + + TextDiagnosticPrinter *DiagClient = + new TextDiagnosticPrinter(llvm::errs(), DiagOpts.get()); + + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + DiagnosticsEngine Diags(DiagID, DiagOpts.get(), DiagClient); + + const std::string TripleStr = llvm::sys::getProcessTriple(); + llvm::Triple T(TripleStr); + + ExitOnErr.setBanner("clang interpreter"); + + clang::driver::Driver TheDriver(Path, T.str(), Diags); + TheDriver.setTitle("clang interpreter"); + TheDriver.setCheckInputsExist(false); + + // FIXME: This is a hack to try to force the driver to do something we can + // recognize. We need to extend the driver library to support this use model + // (basically, exactly one input, and the operation mode is hard wired). + + const char *cmd_args[] = {"clang++-17", fileName, "-std=c++17", "-v", + "-fsanitize=address"}; + int cmd_size = sizeof(cmd_args) / sizeof(cmd_args[0]); + + SmallVector Args(cmd_args, cmd_args + cmd_size); + std::unique_ptr C( + TheDriver.BuildCompilation(Args)); + if (!C) { + std::cerr << "Unable to build compilation" << std::endl; + return nullptr; + } + + // FIXME: This is copied from ASTUnit.cpp; simplify and eliminate. + + // We expect to get back exactly one command job, if we didn't something + // failed. Extract that job from the compilation. + const driver::JobList &Jobs = C->getJobs(); + + auto actions = C->getActions(); + std::cout << "actions.size() : " << actions.size() << std::endl; + std::cout << "Jobs.size() : " << Jobs.size() << std::endl; + for (auto job : Jobs) { + bool isCommand = isa(job); + std::cout << "isCommand : " << isCommand << std::endl; + std::cout << "job : " << job.getCreator().getName() << std::endl; + } + + if (actions.size() != 1) { + std::cerr << "Expected a single action : " << actions.size() << std::endl; + return nullptr; + } + + if (Jobs.size() != 1 || !isa(*Jobs.begin())) { + + std::cerr << "is command driver : " << isa(*Jobs.begin()) + << std::endl; + + SmallString<256> Msg; + llvm::raw_svector_ostream OS(Msg); + Jobs.Print(OS, "; ", true); + + std::cerr << Msg.c_str() << std::endl; + + std::cerr << "Unable to get a single command job from the driver" + << std::endl; + return nullptr; + } + + const driver::Command &Cmd = cast(*Jobs.begin()); + if (llvm::StringRef(Cmd.getCreator().getName()) != "clang") { + std::cout << "Not a clang command: " << Cmd.getCreator().getName() + << std::endl; + return nullptr; + } + + // Initialize a compiler invocation object from the clang (-cc1) arguments. + const llvm::opt::ArgStringList &CCArgs = Cmd.getArguments(); + + for (const auto &arg : CCArgs) { + std::cout << "arg : " << arg << std::endl; + } + + std::unique_ptr CI(new CompilerInvocation); + CompilerInvocation::CreateFromArgs(*CI, CCArgs, Diags); + + std::cout << "Command Created" << std::endl; + + // Show the invocation, with -v. + if (CI->getHeaderSearchOpts().Verbose) { + llvm::errs() << "clang invocation:\n"; + Jobs.Print(llvm::errs(), "\n", true); + llvm::errs() << "\n"; + } + + std::cout << "Header invocation generated" << std::endl; + + // FIXME: This is copied from cc1_main.cpp; simplify and eliminate. + + // Create a compiler instance to handle the actual work. + CompilerInstance Clang; + Clang.setInvocation(std::move(CI)); + + std::cout << "Compiler instance created" << std::endl; + + // Create the compilers actual diagnostics engine. + Clang.createDiagnostics(); + if (!Clang.hasDiagnostics()) { + std::cerr << "Error in Clang Diagnostics" << std::endl; + return nullptr; + } + + std::cout << "Diagnostics created" << std::endl; + + // Infer the builtin include path if unspecified. + if (Clang.getHeaderSearchOpts().UseBuiltinIncludes && + Clang.getHeaderSearchOpts().ResourceDir.empty()) { + std::cout + << "Resource Directory empty. Reading from env. CLANG_RESOURCE_DIR" + << std::endl; + const char *CP = ::getenv("CLANG_RESOURCE_DIR"); + + if (!CP) { + std::cerr << "Error in getting CLANG_RESOURCE_DIR" << std::endl; + return nullptr; + } + Clang.getHeaderSearchOpts().ResourceDir = CP; + + std::cout << "Resource Directory set to " << CP << std::endl; + } + // Clang.getHeaderSearchOpts().ResourceDir = + // CompilerInvocation::GetResourcesPath(fileName, MainAddr); + + std::cout << "Header search options set" << std::endl; + + Clang.createTarget(); + if (!Clang.hasTarget()) { + llvm::errs() << "Failed to create target\n"; + return nullptr; + } + + Clang.createFileManager(); + Clang.createSourceManager(Clang.getFileManager()); + + // Create and execute the frontend to generate an LLVM bitcode module. + std::unique_ptr Act(new EmitLLVMOnlyAction(&llvmContext)); + std::cout << "CodeGenAction created" << std::endl; + + auto result = Clang.ExecuteAction(*Act); + std::cout << "CodeGenAction executed " << result << std::endl; + + if (!result) { + std::cerr << "Error generating LLVM IR" << std::endl; + return nullptr; + } + + std::cout << "LLVM IR generated" << std::endl; + + std::unique_ptr Module = Act->takeModule(); + if (!Module) { + std::cerr << "Error generating LLVM IR - Nullptr" << std::endl; + return nullptr; + } + + return Module; +}