diff --git a/XML/.gitignore b/XML/.gitignore index 7de31292..58760c33 100644 --- a/XML/.gitignore +++ b/XML/.gitignore @@ -5,3 +5,4 @@ *.o *.out *.exe +*.ll \ No newline at end of file diff --git a/XML/.zanuda b/XML/.zanuda index a5705ca5..b8d51fc1 100644 --- a/XML/.zanuda +++ b/XML/.zanuda @@ -1 +1,2 @@ -forward mutability_check ignore bin/XML.ml \ No newline at end of file +forward mutability_check ignore bin/XML.ml,bin/XML_llvm.ml,lib/middleend/infer.ml,lib/common/ast.ml,lib/common/ast.mli,lib/common/pprinter.ml +forward physical_equality ignore lib/middleend/infer.ml \ No newline at end of file diff --git a/XML/XML.opam b/XML/XML.opam index 3a82a71d..4b5db614 100644 --- a/XML/XML.opam +++ b/XML/XML.opam @@ -1,8 +1,10 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -synopsis: "A short LLVM demo" -maintainer: ["Mikhail Gavrilenko" "Danila Rudnev-Stepanyan"] -authors: ["Mikhail Gavrilenko" "Danila Rudnev-Stepanyan"] +synopsis: "A small ML compiler" +maintainer: [ + "Mikhail Gavrilenko" "Danila Rudnev-Stepanyan" "Daniel Vlasenko" +] +authors: ["Mikhail Gavrilenko" "Danila Rudnev-Stepanyan" "Daniel Vlasenko"] license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" homepage: "https://github.com/Kakadu/comp24" bug-reports: "https://github.com/Kakadu/comp24/issues" @@ -33,7 +35,7 @@ build: [ ] dev-repo: "git+https://github.com/Kakadu/comp24.git" depexts: [ - [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} + [ "llvm-18-dev" "clang-18" "llc-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} ] pin-depends: [ ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] diff --git a/XML/XML.opam.template b/XML/XML.opam.template index f4e537bf..c9653196 100644 --- a/XML/XML.opam.template +++ b/XML/XML.opam.template @@ -1,5 +1,5 @@ depexts: [ - [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} + [ "llvm-18-dev" "clang-18" "llc-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} ] pin-depends: [ ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] diff --git a/XML/bin/XML.ml b/XML/bin/XML.ml index 95d69d5c..706ec3df 100644 --- a/XML/bin/XML.ml +++ b/XML/bin/XML.ml @@ -1,8 +1,9 @@ -(** Copyright 2024-2025, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) open Format +open Common.Pprinter (* ------------------------------- *) (* Command-line Options *) @@ -17,6 +18,8 @@ type options = ; mutable show_cc : bool ; mutable show_ll : bool ; mutable gc_stats : bool + ; mutable check_types : bool + ; mutable show_types : bool } (* ------------------------------- *) @@ -36,6 +39,17 @@ let to_asm ~gc_stats ast : string = let compile_and_write options source_code = let ast = Common.Parser.parse_str source_code in + (if options.check_types + then + let open Middleend.Infer in + match infer_program env_with_things ast with + | Ok (env, names) -> + if options.show_types + then ( + pprint_env env names; + exit 0) + else () + | Error err -> Format.printf "Type error: %a\n" Middleend.Infer.pprint_err err); if options.show_ast then ( printf "%a\n" Common.Pprinter.pprint_program ast; @@ -106,6 +120,8 @@ let () = ; show_cc = false ; show_ll = false ; gc_stats = false + ; check_types = true + ; show_types = false } in let usage_msg = @@ -136,6 +152,12 @@ let () = ; ( "--gc-stats" , Arg.Unit (fun () -> options.gc_stats <- true) , " Enable GC statistics and force a collection at program start/end" ) + ; ( "-notypes" + , Arg.Unit (fun () -> options.check_types <- false) + , " Do not typecheck the program before compilation" ) + ; ( "-typedtree" + , Arg.Unit (fun () -> options.show_types <- true) + , " Show all names with their types and exit" ) ] in let handle_anon_arg filename = diff --git a/XML/bin/XML.mli b/XML/bin/XML.mli new file mode 100644 index 00000000..26ede79c --- /dev/null +++ b/XML/bin/XML.mli @@ -0,0 +1,3 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/bin/XML_llvm.ml b/XML/bin/XML_llvm.ml new file mode 100644 index 00000000..873d47f4 --- /dev/null +++ b/XML/bin/XML_llvm.ml @@ -0,0 +1,187 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Common.Pprinter + +(* ------------------------------- *) +(* Command-line Options *) +(* ------------------------------- *) + +type options = + { mutable input_file_name : string option + ; mutable from_file_name : string option + ; mutable output_file_name : string option + ; mutable optimization_lvl : string option + ; mutable target : string + ; mutable show_ast : bool + ; mutable show_anf : bool + ; mutable show_cc : bool + ; mutable show_ll : bool + ; mutable check_types : bool + ; mutable show_types : bool + } + +(* ------------------------------- *) +(* Compiler Entry Points *) +(* ------------------------------- *) + +let to_llvm_ir ast options = + let cc_program = Middleend.Cc.cc_program ast in + let anf_ast = Middleend.Anf.anf_program cc_program in + let ll_anf = Middleend.Ll.lambda_lift_program anf_ast in + (* let buf = Buffer.create 1024 in + let ppf = formatter_of_buffer buf in *) + (* let triple = "x86_64-pc-linux-gnu" in *) + let target = options.target in + let opt = options.optimization_lvl in + Backend.Codegen_llvm.gen_program_ir ll_anf target opt +;; + +let compile_and_write options source_code = + let ast = Common.Parser.parse_str source_code in + (if options.check_types + then + let open Middleend.Infer in + match infer_program env_with_things ast with + | Ok (env, names) -> + if options.show_types + then ( + pprint_env env names; + exit 0) + else () + | Error err -> Format.printf "Type error: %a\n" Middleend.Infer.pprint_err err); + if options.show_ast + then ( + (* printf "%a\n" Common.Pprinter.pprint_program ast; *) + printf "%s\n" (Common.Ast.show_program ast); + exit 0); + let cc_ast = Middleend.Cc.cc_program ast in + if options.show_cc + then ( + printf "%a\n" Common.Pprinter.pprint_program cc_ast; + exit 0); + let anf_ast = Middleend.Anf.anf_program cc_ast in + if options.show_anf + then ( + Middleend.Pprinter.print_anf_program std_formatter anf_ast; + exit 0); + let anf_after_ll = Middleend.Ll.lambda_lift_program anf_ast in + if options.show_ll + then ( + (* Middleend.Pprinter.print_anf_program std_formatter anf_after_ll; *) + printf "%s\n" (Middleend.Anf.show_aprogram anf_after_ll); + exit 0); + let llvm_ir_code = to_llvm_ir ast options in + match options.output_file_name with + | Some out_file -> + (try + let oc = open_out out_file in + output_string oc llvm_ir_code; + close_out oc + with + | Sys_error msg -> + eprintf "Error: Could not write to output file '%s': %s\n" out_file msg; + exit 1) + | None -> print_string llvm_ir_code +;; + +let read_channel_to_string ic = + let buf = Buffer.create 1024 in + try + while true do + Buffer.add_string buf (input_line ic ^ "\n") + done; + "" (* Недостижимо *) + with + | End_of_file -> Buffer.contents buf +;; + +let read_file path = + try + let ch = open_in path in + let s = really_input_string ch (in_channel_length ch) in + close_in ch; + s + with + | Sys_error msg -> + eprintf "Error: Could not read input file '%s': %s\n" path msg; + exit 1 +;; + +(* ------------------------------- *) +(* Main Driver *) +(* ------------------------------- *) + +let () = + let options = + { input_file_name = None + ; from_file_name = None + ; output_file_name = None + ; show_ast = false + ; show_anf = false + ; show_cc = false + ; show_ll = false + ; optimization_lvl = None + ; target = "riscv64-unknown-linux-gnu" + ; check_types = true + ; show_types = false + } + in + let usage_msg = + "MiniML Compiler\n\n" + ^ "Usage: dune exec ./bin/compile.exe -- [input_file.ml]\n" + ^ "If no input file is provided, reads from standard input.\n\n" + ^ "Options:" + in + let arg_specs = + [ ( "-o" + , Arg.String (fun fname -> options.output_file_name <- Some fname) + , " Set the output file name for the llvm ir code" ) + ; ( "--ast" + , Arg.Unit (fun () -> options.show_ast <- true) + , " Show the parsed Abstract Syntax Tree and exit" ) + ; ( "--anf" + , Arg.Unit (fun () -> options.show_anf <- true) + , " Show the ANF representation and exit" ) + ; ( "--cc" + , Arg.Unit (fun () -> options.show_cc <- true) + , " Show the representation after applying CC and exit" ) + ; ( "-fromfile" + , Arg.String (fun fname -> options.from_file_name <- Some fname) + , " Read source from file (preferred over positional arg)" ) + ; ( "--ll" + , Arg.Unit (fun () -> options.show_ll <- true) + , " Show ANF after lambda lifting and exit" ) + ; ( "-O" + , Arg.String (fun opt -> options.optimization_lvl <- Some opt) + , " Set IR optimization level, \"O0\" by default" ) + ; ( "-t" + , Arg.String (fun targ -> options.target <- targ) + , " Set target platform, \"riscv64-unknown-linux-gnu\" by default" ) + ; ( "-notypes" + , Arg.Unit (fun () -> options.check_types <- false) + , " Do not typecheck the program before compilation" ) + ; ( "-typedtree" + , Arg.Unit (fun () -> options.show_types <- true) + , " Show all names with their types and exit" ) + ] + in + let handle_anon_arg filename = + match options.input_file_name with + | None -> options.input_file_name <- Some filename + | Some _ -> + eprintf "Error: Only one input file is allowed.\n"; + Arg.usage arg_specs usage_msg; + exit 1 + in + Arg.parse arg_specs handle_anon_arg usage_msg; + let source_code = + match options.from_file_name, options.input_file_name with + | Some path, _ -> read_file path + | None, Some path -> read_file path + | None, None -> read_channel_to_string stdin + in + compile_and_write options source_code +;; diff --git a/XML/bin/XML_llvm.mli b/XML/bin/XML_llvm.mli new file mode 100644 index 00000000..26ede79c --- /dev/null +++ b/XML/bin/XML_llvm.mli @@ -0,0 +1,3 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/bin/dune b/XML/bin/dune index c264f986..49495b0b 100644 --- a/XML/bin/dune +++ b/XML/bin/dune @@ -2,3 +2,8 @@ (public_name XML) (name XML) (libraries XML.Backend XML.Common XML.Middleend)) + +(executable + (public_name XML_llvm) + (name XML_llvm) + (libraries XML.Backend XML.Common XML.Middleend)) diff --git a/XML/bin/runtime.c b/XML/bin/runtime.c index f8f351f3..35ebbe53 100644 --- a/XML/bin/runtime.c +++ b/XML/bin/runtime.c @@ -207,7 +207,9 @@ static void* gc_alloc_bytes(size_t n, GCType* vt) { if (alloc_ptr + n > from_end) { gc_collect(); - if (alloc_ptr + n > from_end) panic("GC: out of memory"); + char msg[100]; + sprintf(msg, "GC: out of memory: asked for %ld bytes)", n); + if (alloc_ptr + n > from_end) panic(msg); } uint8_t* p = alloc_ptr; alloc_ptr += n; @@ -238,6 +240,16 @@ value create_tuple(int64_t n) { return (value)alloc_block(n); } +value create_tuple_init(int64_t n, int64_t* init_arr) { + Block* tuple = (Block*)create_tuple(n); + + for (int i = 0; i < n; i++) { + tuple->elems[i] = init_arr[i]; + } + + return (value)tuple; +} + value field(value val, int64_t index) { if (IS_INT(val)) panic("field: attempt to access field of an integer"); if (val == 0) panic("field: null pointer dereference"); @@ -274,52 +286,43 @@ Closure* copy_closure(const Closure* src) { static value rv_call(void* fn, value* argv, int64_t n) { int64_t spill = (n > RV_GP_ARGS) ? (n - RV_GP_ARGS) : 0; size_t spill_bytes = (size_t)spill * WORD_SZ; - value* spill_ptr = (spill > 0) ? argv + RV_GP_ARGS : NULL; + size_t align_adj = (spill_bytes + 7) & ~7; value ret; - asm volatile( - "mv t0, %[sz]\n" - "sub sp, sp, t0\n" - - "beqz %[cnt], 2f\n" - "mv t1, sp\n" - "mv t2, %[spill]\n" - "mv t3, %[cnt]\n" - "li t4, 0\n" + __asm__ volatile( + "mv t6, %[fn]\n" + "sub sp, sp, %[adj]\n" + "mv t0, sp\n" + "addi t1, %[argv], 64\n" + "mv t2, %[spill]\n" + "li t3, 0\n" "1:\n" - "beq t4, t3, 2f\n" - "slli t5, t4, 3\n" - "add t6, t2, t5\n" - "ld t0, 0(t6)\n" - "sd t0, 0(t1)\n" + "beq t3, t2, 2f\n" + "ld t4, 0(t1)\n" + "sd t4, 0(t0)\n" "addi t1, t1, 8\n" - "addi t4, t4, 1\n" - "j 1b\n" + "addi t0, t0, 8\n" + "addi t3, t3, 1\n" + "j 1b\n" "2:\n" - "mv a0, %[a0]\n" - "mv a1, %[a1]\n" - "mv a2, %[a2]\n" - "mv a3, %[a3]\n" - "mv a4, %[a4]\n" - "mv a5, %[a5]\n" - "mv a6, %[a6]\n" - "mv a7, %[a7]\n" - - "mv t6, %[fn]\n" - "jalr ra, t6, 0\n" - - "mv t0, %[sz]\n" - "add sp, sp, t0\n" - - "mv %[ret], a0\n" - : [ret] "=r"(ret) - : [fn] "r"(fn), [a0] "r"((n > 0) ? argv[0] : 0), [a1] "r"((n > 1) ? argv[1] : 0), - [a2] "r"((n > 2) ? argv[2] : 0), [a3] "r"((n > 3) ? argv[3] : 0), - [a4] "r"((n > 4) ? argv[4] : 0), [a5] "r"((n > 5) ? argv[5] : 0), - [a6] "r"((n > 6) ? argv[6] : 0), [a7] "r"((n > 7) ? argv[7] : 0), [spill] "r"(spill_ptr), - [cnt] "r"(spill), [sz] "r"(spill_bytes) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "ld a0, 0(%[argv])\n" + "ld a1, 8(%[argv])\n" + "ld a2, 16(%[argv])\n" + "ld a3, 24(%[argv])\n" + "ld a4, 32(%[argv])\n" + "ld a5, 40(%[argv])\n" + "ld a6, 48(%[argv])\n" + "ld a7, 56(%[argv])\n" + + "jalr ra, t6\n" + + "add sp, sp, %[adj]\n" + + "mv %0, a0\n" + : "=r"(ret) + : [fn] "r"(fn), [argv] "r"(argv), [spill] "r"(spill), [adj] "r"(align_adj) + : "t0", "t1", "t2", "t3", "t4", "t6", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "ra", "memory"); return ret; } @@ -344,7 +347,9 @@ value apply1(Closure* f, value arg) { return rv_call(f->code, argv, n); } - panic("apply1: too many arguments"); + char msg[100]; + sprintf(msg, "apply1: too many arguments, should be: %ld, arg is: %ld", n, arg); + panic(msg); __builtin_unreachable(); } diff --git a/XML/dune-project b/XML/dune-project index 82593997..900634c4 100644 --- a/XML/dune-project +++ b/XML/dune-project @@ -7,15 +7,18 @@ (source (github Kakadu/comp24)) -(authors "Mikhail Gavrilenko" "Danila Rudnev-Stepanyan") +(authors "Mikhail Gavrilenko" "Danila Rudnev-Stepanyan" "Daniel Vlasenko") -(maintainers "Mikhail Gavrilenko" "Danila Rudnev-Stepanyan") +(maintainers + "Mikhail Gavrilenko" + "Danila Rudnev-Stepanyan" + "Daniel Vlasenko") (license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") (package (name XML) - (synopsis "A short LLVM demo") + (synopsis "A small ML compiler") (depends ocaml (dune diff --git a/XML/lib/backend/codegen.ml b/XML/lib/backend/codegen.ml index b6c128c8..642ff5d0 100644 --- a/XML/lib/backend/codegen.ml +++ b/XML/lib/backend/codegen.ml @@ -76,7 +76,6 @@ type 'a r = ('a, cg_error) result let ok x = Ok x let err e = Error e let ( let* ) = Result.bind -let ( let+ ) x f = Result.map f x let fresh_label (prefix : string) (st : cg_state) : string * cg_state = let n = st.next_label in diff --git a/XML/lib/backend/codegen_llvm.ml b/XML/lib/backend/codegen_llvm.ml new file mode 100644 index 00000000..20241531 --- /dev/null +++ b/XML/lib/backend/codegen_llvm.ml @@ -0,0 +1,403 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Common.Ast +open Target + +let context = Llvm.global_context () +let i64_type = Llvm.i64_type context +let gl_align = Target.word_size +let void_type = Llvm.void_type context +let gcheader_type = Llvm.struct_type context [| i64_type |] +let block_elms_type = Llvm.array_type i64_type 0 +let block_type = Llvm.struct_type context [| gcheader_type; i64_type; block_elms_type |] +let ptr_type = Llvm.pointer_type context +let builder = Llvm.builder context +let subst_main = "__user_main" + +module ParamMap = Map.Make (String) + +module FuncMap = struct + module K = struct + type t = ident + + let compare = Stdlib.compare + end + + module M = Map.Make (K) + + type status = + | User + | Lib + + type t = (Llvm.llvalue * Llvm.lltype * status) M.t + + let empty () : t = M.empty + + let bind (t : t) (x : ident) (ll : Llvm.llvalue * Llvm.lltype * status) : t = + M.add x ll t + ;; + + let find (t : t) (x : ident) : (Llvm.llvalue * Llvm.lltype * status) option = + M.find_opt x t + ;; + + let find_exn (t : t) (x : ident) : Llvm.llvalue * Llvm.lltype * status = M.find x t + let keys = M.bindings + + let print_fmap (t : t) = + let _ = + List.map + (fun (id, (fval, _, s)) -> + let stat = + match s with + | User -> "user" + | Lib -> "lib" + in + Format.printf + "Id: %s Arity: %d Status: %s\n" + id + (Array.length (Llvm.params fval)) + stat) + (keys t) + in + () + ;; +end + +(* Return types from runtime.c *) +let initial_fmap the_mod = + let decl fmap id retty argtyps = + let ftyp = Llvm.function_type retty argtyps in + let fval = Llvm.declare_function id ftyp the_mod in + FuncMap.bind fmap id (fval, ftyp, FuncMap.Lib) + in + let fmap = FuncMap.empty () in + let fmap = decl fmap "print_int" void_type [| i64_type |] in + let fmap = decl fmap "alloc_block" i64_type (*ptr*) [| i64_type |] in + let fmap = + decl fmap "alloc_closure" i64_type (*ptr*) [| i64_type (*ptr*); i64_type |] + in + let fmap = + decl fmap "apply1" i64_type (*ptr or int*) [| i64_type (*ptr*); i64_type |] + in + let fmap = decl fmap "print_gc_status" void_type [||] in + let fmap = decl fmap "collect" void_type [||] in + let fmap = decl fmap "create_tuple" i64_type (*ptr*) [| i64_type |] in + let fmap = + decl fmap "create_tuple_init" i64_type (*ptr*) [| i64_type; i64_type (*ptr*) |] + in + let fmap = decl fmap "field" i64_type (*ptr or int*) [| i64_type (*ptr*); i64_type |] in + let fmap = decl fmap "rt_init" void_type [| i64_type |] in + fmap +;; + +let build_call_mb_void ftype fval argvs name = + match Llvm.return_type ftype with + | ty when ty = void_type -> + let _ = Llvm.build_call ftype fval argvs "" builder in + Llvm.const_int i64_type 0 + | _ -> Llvm.build_call ftype fval argvs name builder +;; + +let decl_and_bind fmap the_mod id retty argc = + let id = if id = "main" then subst_main else id in + match FuncMap.find fmap id with + | Some (_, _, FuncMap.Lib) -> fmap + | _ when argc = 0 -> fmap + | _ -> + let argtyps = Array.make argc i64_type in + let ftyp = Llvm.function_type retty argtyps in + let fval = Llvm.declare_function id ftyp the_mod in + FuncMap.bind fmap id (fval, ftyp, FuncMap.User) +;; + +let prefill_fmap (fmap0 : FuncMap.t) the_mod (program : aprogram) : FuncMap.t = + List.fold_left + (fun fm -> function + | Anf_str_value (_rf, name, anf_expr) -> + (match anf_expr with + | Anf_let (_, _, Comp_func (ps, _), _) | Anf_comp_expr (Comp_func (ps, _)) -> + decl_and_bind fm the_mod name i64_type (List.length ps) + | _ -> decl_and_bind fm the_mod name i64_type 0) + | _ -> fm) + fmap0 + program +;; + +let build_alloc_closure fmap func = + let acval, actyp, _ = FuncMap.find_exn fmap "alloc_closure" in + let argc = Array.length (Llvm.params func) in + let argc = Llvm.const_int i64_type argc in + let func_as_i64 = Llvm.build_pointercast func i64_type "func_as_i64" builder in + Llvm.build_call actyp acval [| func_as_i64; argc |] "closure_tmp" builder +;; + +let gen_im_expr_ir fmap env = function + | Imm_num n -> Llvm.const_int i64_type ((n lsl 1) lor 1) + | Imm_ident id -> + (match ParamMap.find_opt id env with + | Some v -> + let temp = Llvm.build_load i64_type v id builder in + Llvm.set_alignment gl_align temp; + temp + | None -> + let id = if id = "main" then subst_main else id in + (match FuncMap.find fmap id with + | Some (fval, ftyp, _) -> + if Array.length (Llvm.params fval) = 0 + then build_call_mb_void ftyp fval [||] "calltmp" + else build_alloc_closure fmap fval + | None -> invalid_arg ("Name not bound: " ^ id))) +;; + +let create_entry_alloca the_fun var_name = + let builder = Llvm.builder_at context (Llvm.instr_begin (Llvm.entry_block the_fun)) in + Llvm.build_alloca i64_type var_name builder +;; + +(* working with tagged integers *) +let gen_tagged_binop fmap env op lhs rhs = + let left = gen_im_expr_ir fmap env lhs in + let right = gen_im_expr_ir fmap env rhs in + let one = Llvm.const_int i64_type 1 in + match op with + | "+" -> + let temp = Llvm.build_add left right "addtmp1" builder in + Llvm.build_sub temp one "addtmp2" builder + | "-" -> + let temp = Llvm.build_sub left right "subtmp1" builder in + Llvm.build_add temp one "subtmp2" builder + | "*" -> + let left' = Llvm.build_lshr left one "multmp1" builder in + let right' = Llvm.build_sub right one "multmp2" builder in + let temp = Llvm.build_mul left' right' "multmp3" builder in + Llvm.build_add temp one "multmp4" builder + | "<" -> + let temp = Llvm.build_icmp Llvm.Icmp.Slt left right "slttmp" builder in + Llvm.build_zext temp i64_type "slttmp_as_i64" builder + | "<=" -> + let temp = Llvm.build_icmp Llvm.Icmp.Sle left right "sletmp" builder in + Llvm.build_zext temp i64_type "sletmp_as_i64" builder + | ">" -> + let temp = Llvm.build_icmp Llvm.Icmp.Sgt left right "sgttmp" builder in + Llvm.build_zext temp i64_type "sgttmp_as_i64" builder + | ">=" -> + let temp = Llvm.build_icmp Llvm.Icmp.Sge left right "sgetmp" builder in + Llvm.build_zext temp i64_type "sgetmp_as_i64" builder + | "=" -> + let temp = Llvm.build_icmp Llvm.Icmp.Eq left right "eqtmp" builder in + Llvm.build_zext temp i64_type "eqtmp_as_i64" builder + | "<>" -> + let temp = Llvm.build_icmp Llvm.Icmp.Ne left right "neqtmp" builder in + Llvm.build_zext temp i64_type "neqtmp_as_i64" builder + | _ -> invalid_arg ("Unsupported binary operator: " ^ op) +;; + +let build_apply_part fmap fclos args = + let apval, aptyp, _ = FuncMap.find_exn fmap "apply1" in + List.fold_left + (fun clos arg -> + let clos_as_i64 = Llvm.build_pointercast clos i64_type "clos_as_i64" builder in + build_call_mb_void aptyp apval [| clos_as_i64; arg |] "apptmp") + fclos + args +;; + +let rec gen_comp_expr_ir fmap env = function + | Comp_imm imm -> gen_im_expr_ir fmap env imm + | Comp_binop (op, lhs, rhs) -> gen_tagged_binop fmap env op lhs rhs + | Comp_app (Imm_ident f, args) -> + let f_map = if f = "main" then subst_main else f in + (match FuncMap.find fmap f_map with + | Some (fval, ftype, _) -> + let pvs = Llvm.params fval in + let argvs = List.map (fun arg -> gen_im_expr_ir fmap env arg) args in + if List.length args = Array.length pvs + then build_call_mb_void ftype fval (Array.of_list argvs) "calltmp" + else ( + let fclos = build_alloc_closure fmap fval in + build_apply_part fmap fclos argvs) + | None -> + (match ParamMap.find_opt f env with + | Some clos_ptr -> + let clos_val = Llvm.build_load i64_type clos_ptr (f ^ "_val") builder in + Llvm.set_alignment gl_align clos_val; + let argvs = List.map (fun arg -> gen_im_expr_ir fmap env arg) args in + build_apply_part fmap clos_val argvs + | None -> invalid_arg ("Id: " ^ f ^ " not found"))) + | Comp_app (Imm_num _, _) -> invalid_arg "cannot apply number as a function" + | Comp_branch (cond, br_then, br_else) -> + let cv = gen_im_expr_ir fmap env cond in + let zero = Llvm.const_int i64_type 0 in + let cond_val = Llvm.build_icmp Llvm.Icmp.Ne cv zero "cond" builder in + let start_bb = Llvm.insertion_block builder in + let the_fun = Llvm.block_parent start_bb in + let then_bb = Llvm.append_block context "then" the_fun in + Llvm.position_at_end then_bb builder; + let then_val, _ = gen_anf_expr fmap env br_then in + let new_then_bb = Llvm.insertion_block builder in + let else_bb = Llvm.append_block context "else" the_fun in + Llvm.position_at_end else_bb builder; + let else_val, _ = gen_anf_expr fmap env br_else in + let new_else_bb = Llvm.insertion_block builder in + let merge_bb = Llvm.append_block context "ifcont" the_fun in + Llvm.position_at_end merge_bb builder; + let incoming = [ then_val, new_then_bb; else_val, new_else_bb ] in + let phi = Llvm.build_phi incoming "iftmp" builder in + Llvm.position_at_end start_bb builder; + let _ = Llvm.build_cond_br cond_val then_bb else_bb builder in + Llvm.position_at_end new_then_bb builder; + let _ = Llvm.build_br merge_bb builder in + Llvm.position_at_end new_else_bb builder; + let _ = Llvm.build_br merge_bb builder in + Llvm.position_at_end merge_bb builder; + phi + | Comp_alloc imms | Comp_tuple imms -> + let ctval, cttyp, _ = FuncMap.find_exn fmap "create_tuple_init" in + let argc = Llvm.const_int i64_type (List.length imms) in + let argv = List.map (fun im -> gen_im_expr_ir fmap env im) imms in + let alloca = Llvm.build_array_alloca i64_type argc "tuple_vals_alloca" builder in + List.iteri + (fun i elem -> + let ptr_to_elem = + Llvm.build_gep + i64_type + alloca + [| Llvm.const_int i64_type i |] + "ptr_to_elem" + builder + in + let store = Llvm.build_store elem ptr_to_elem builder in + Llvm.set_alignment gl_align store; + ()) + argv; + let alloca_as_i64 = Llvm.build_pointercast alloca i64_type "alloca_as_i64" builder in + Llvm.build_call cttyp ctval [| argc; alloca_as_i64 |] "tuple_tmp" builder + | Comp_load (imexpr, offset) -> + let vbase = gen_im_expr_ir fmap env imexpr in + let voffst = Llvm.const_int i64_type (offset / Target.word_size) in + let fifn, fity, _ = FuncMap.find_exn fmap "field" in + Llvm.build_call fity fifn [| vbase; voffst |] "load_tmp" builder + | Comp_func (_, _) -> invalid_arg "anonymous functions should be lambda-lifted" + +and gen_anf_expr fmap env = function + | Anf_comp_expr comp -> + let v = gen_comp_expr_ir fmap env comp in + v, env + | Anf_let (_, name, comp_expr, body) -> + let init_val = gen_comp_expr_ir fmap env comp_expr in + let the_fun = Llvm.block_parent (Llvm.insertion_block builder) in + let alloca = create_entry_alloca the_fun name in + let store = Llvm.build_store init_val alloca builder in + Llvm.set_alignment gl_align store; + let new_env = ParamMap.add name alloca env in + gen_anf_expr fmap new_env body +;; + +let gen_function fmap the_mod name params body = + let name = if name = "main" then subst_main else name in + let param_types = Array.map (fun _ -> i64_type) (Array.of_list params) in + let f_type = Llvm.function_type i64_type param_types in + let the_fun = + match Llvm.lookup_function name the_mod with + | None -> Llvm.declare_function name f_type the_mod + | Some f -> + if Array.length (Llvm.basic_blocks f) = 0 + then () + else invalid_arg ("Redefinition of function: " ^ name); + if Array.length (Llvm.params f) = List.length params + then () + else invalid_arg ("Redefinition of function with different number of args: " ^ name); + f + in + let bb = Llvm.append_block context "entry" the_fun in + Llvm.position_at_end bb builder; + let env = + List.fold_left2 + (fun env name pval -> + Llvm.set_value_name name pval; + let alloca = create_entry_alloca the_fun name in + let store = Llvm.build_store pval alloca builder in + Llvm.set_alignment gl_align store; + ParamMap.add name alloca env) + ParamMap.empty + params + (Array.to_list (Llvm.params the_fun)) + in + let ret_val, _ = gen_anf_expr fmap env body in + let _ = Llvm.build_ret ret_val builder in + if Llvm_analysis.verify_function the_fun + then () + else ( + Stdlib.Format.printf + "invalid function generated\n%s\n" + (Llvm.string_of_llvalue the_fun); + Llvm_analysis.assert_valid_function the_fun); + the_fun +;; + +let gen_astructure_item fmap the_mod main_fn env = function + | Anf_str_eval expr -> + let _, new_env = gen_anf_expr fmap env expr in + new_env + | Anf_str_value (_, name, Anf_comp_expr (Comp_func (params, body))) -> + let _ = gen_function fmap the_mod name params body in + env + | Anf_str_value (_, name, expr) -> + Llvm.position_at_end (Llvm.entry_block main_fn) builder; + let value, _ = gen_anf_expr fmap env expr in + let alloca = create_entry_alloca main_fn name in + let store = Llvm.build_store value alloca builder in + Llvm.set_alignment gl_align store; + ParamMap.add name alloca env +;; + +let optimize_ir the_mod (triple : string) (opt : string option) = + let target = Llvm_target.Target.by_triple triple in + let machine = Llvm_target.TargetMachine.create ~triple target in + let opts = Llvm_passbuilder.create_passbuilder_options () in + let optflag = + match opt with + | Some opt -> opt + | _ -> "O0" + in + let optflag = "default<" ^ optflag ^ ">" in + (match Llvm_passbuilder.run_passes the_mod optflag machine opts with + | Error e -> invalid_arg e + | Ok () -> ()); + Llvm_passbuilder.dispose_passbuilder_options opts +;; + +let gen_program_ir (program : aprogram) (triple : string) (opt : string option) = + let the_module = Llvm.create_module context "main" in + Llvm_all_backends.initialize (); + Llvm.set_target_triple triple the_module; + assert (Llvm_executionengine.initialize ()); + let fmap = prefill_fmap (initial_fmap the_module) the_module program in + let main_ty = Llvm.function_type i64_type [||] in + let main_fn = Llvm.define_function "main" main_ty the_module in + Llvm.position_at_end (Llvm.entry_block main_fn) builder; + let initfn, initty, _ = FuncMap.find_exn fmap "rt_init" in + let _ = + build_call_mb_void initty initfn [| Llvm.const_int i64_type (5 * 1024) |] "inittmp" + in + let env = ParamMap.empty in + let _ = + List.fold_left + (fun env item -> gen_astructure_item fmap the_module main_fn env item) + env + program + in + let bbs = Llvm.basic_blocks main_fn in + Llvm.position_at_end bbs.(Array.length bbs - 1) builder; + let col_fn, col_ty, _ = FuncMap.find_exn fmap "collect" in + let _ = build_call_mb_void col_ty col_fn [||] "_" in + let _ = Llvm.build_ret (Llvm.const_int i64_type 0) builder in + optimize_ir the_module triple opt; + match Llvm_analysis.verify_module the_module with + | Some r -> invalid_arg r + | None -> Llvm.string_of_llmodule the_module +;; diff --git a/XML/lib/backend/codegen_llvm.mli b/XML/lib/backend/codegen_llvm.mli new file mode 100644 index 00000000..eeaac5c4 --- /dev/null +++ b/XML/lib/backend/codegen_llvm.mli @@ -0,0 +1,74 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +module FuncMap : sig + module K : sig + type t + + val compare : 'a -> 'a -> int + end + + type status = + | User + | Lib + + type t + + module M : sig + type key + type 'a t + + val empty : 'a t + val is_empty : 'a t -> bool + val mem : key -> 'a t -> bool + val add : key -> 'a -> 'a t -> 'a t + val update : key -> ('a option -> 'a option) -> 'a t -> 'a t + val singleton : key -> 'a -> 'a t + val remove : key -> 'a t -> 'a t + val merge : (key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t + val union : (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int + val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + val iter : (key -> 'a -> unit) -> 'a t -> unit + val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + val for_all : (key -> 'a -> bool) -> 'a t -> bool + val exists : (key -> 'a -> bool) -> 'a t -> bool + val filter : (key -> 'a -> bool) -> 'a t -> 'a t + val filter_map : (key -> 'a -> 'b option) -> 'a t -> 'b t + val partition : (key -> 'a -> bool) -> 'a t -> 'a t * 'a t + val cardinal : 'a t -> int + val bindings : 'a t -> (key * 'a) list + val min_binding : 'a t -> key * 'a + val min_binding_opt : 'a t -> (key * 'a) option + val max_binding : 'a t -> key * 'a + val max_binding_opt : 'a t -> (key * 'a) option + val choose : 'a t -> key * 'a + val choose_opt : 'a t -> (key * 'a) option + val split : key -> 'a t -> 'a t * 'a option * 'a t + val find : key -> 'a t -> 'a + val find_opt : key -> 'a t -> 'a option + val find_first : (key -> bool) -> 'a t -> key * 'a + val find_first_opt : (key -> bool) -> 'a t -> (key * 'a) option + val find_last : (key -> bool) -> 'a t -> key * 'a + val find_last_opt : (key -> bool) -> 'a t -> (key * 'a) option + val map : ('a -> 'b) -> 'a t -> 'b t + val mapi : (key -> 'a -> 'b) -> 'a t -> 'b t + val to_seq : 'a t -> (key * 'a) Seq.t + val to_rev_seq : 'a t -> (key * 'a) Seq.t + val to_seq_from : key -> 'a t -> (key * 'a) Seq.t + val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t + val of_seq : (key * 'a) Seq.t -> 'a t + end + + val empty : unit -> t + val bind : t -> string -> Llvm.llvalue * Llvm.lltype * status -> t + val find : t -> string -> (Llvm.llvalue * Llvm.lltype * status) option + val find_exn : t -> string -> Llvm.llvalue * Llvm.lltype * status + val keys : 'a M.t -> (string * 'a) list + val print_fmap : t -> unit +end + +(** [gen_program_ir prog triple opt] generates a LLMV IR string from the program [prog] +for the target architecture specified by [triple] with optimization level [opt] if not None, O0 otherwise*) +val gen_program_ir : Middleend.Anf.aprogram -> string -> string option -> string diff --git a/XML/lib/backend/dune b/XML/lib/backend/dune index 76598ff6..9bd21b08 100644 --- a/XML/lib/backend/dune +++ b/XML/lib/backend/dune @@ -1,9 +1,19 @@ (library (name backend) (public_name XML.Backend) - (modules codegen emission machine target) - (libraries angstrom base stdio XML.Common XML.Middleend) + (modules codegen codegen_llvm emission machine target) + (libraries + angstrom + base + llvm + llvm.analysis + llvm.executionengine + llvm.passbuilder + llvm.all_backends + stdio + XML.Common + XML.Middleend) (preprocess - (pps ppx_deriving.show ppx_deriving.eq ppx_deriving_qcheck)) + (pps ppx_deriving.show ppx_deriving.eq ppx_deriving_qcheck ppx_inline_test)) (instrumentation (backend bisect_ppx))) diff --git a/XML/lib/backend/emission.ml b/XML/lib/backend/emission.ml index a6a48979..6931887b 100644 --- a/XML/lib/backend/emission.ml +++ b/XML/lib/backend/emission.ml @@ -2,7 +2,7 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open Base.Format +open Format open Base open Machine open Target diff --git a/XML/lib/backend/machine.ml b/XML/lib/backend/machine.ml index 33597658..aac90943 100644 --- a/XML/lib/backend/machine.ml +++ b/XML/lib/backend/machine.ml @@ -11,7 +11,24 @@ type reg = | Zero [@@deriving eq] -type offset = reg * int [@@deriving eq] +let gen_reg = + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.map (fun gen0 -> A gen0) QCheck.Gen.(0 -- 7) + ; 1, QCheck.Gen.map (fun gen0 -> T gen0) QCheck.Gen.(0 -- 6) + ; 1, QCheck.Gen.map (fun gen0 -> S gen0) QCheck.Gen.(0 -- 11) + ; 1, QCheck.Gen.pure RA + ; 1, QCheck.Gen.pure SP + ; 1, QCheck.Gen.pure Zero + ] +;; + +type offset = reg * (int[@gen QCheck.Gen.nat_small]) [@@deriving eq] + +let gen_offset = + QCheck.Gen.map + (fun (gen0, gen1) -> gen0, gen1) + (QCheck.Gen.pair gen_reg QCheck.Gen.nat_small) +;; let pp_reg ppf = let open Format in @@ -25,9 +42,11 @@ let pp_reg ppf = ;; let pp_offset ppf (reg, off) = Format.fprintf ppf "%d(%a)" off pp_reg reg +let gen_label = QCheck.Gen.return "label" +let gen_comm = QCheck.Gen.return "comment" type instr = - | Addi of reg * reg * int (* ADD immediate *) + | Addi of reg * reg * (int[@gen QCheck.Gen.nat_small]) (* ADD immediate *) | Add of reg * reg * reg (* ADD *) | Sub of reg * reg * reg (* SUB *) | Mul of reg * reg * reg (* MUL *) @@ -35,26 +54,118 @@ type instr = | Seqz of reg * reg (* SEQZ: set equal zero *) | Snez of reg * reg (* SNEZ: set not equal zero *) | Xor of reg * reg * reg (* XOR *) - | Xori of reg * reg * int (* XOR immediate *) - | Beq of reg * reg * string (* BEQ: branch if equal *) - | Blt of reg * reg * string (* BLT: branch if less than *) - | Ble of reg * reg * string (* BLE: branch if less or equal *) - | Lla of reg * string (* LLA: load address *) - | Li of reg * int (* LI: load immediate *) + | Xori of reg * reg * (int[@gen QCheck.Gen.nat_small]) (* XOR immediate *) + | Beq of reg * reg * (string[@gen gen_label]) (* BEQ: branch if equal *) + | Blt of reg * reg * (string[@gen gen_label]) (* BLT: branch if less than *) + | Ble of reg * reg * (string[@gen gen_label]) (* BLE: branch if less or equal *) + | Lla of reg * (string[@gen gen_label]) (* LLA: load address *) + | Li of reg * (int[@gen QCheck.Gen.nat_small]) (* LI: load immediate *) | Ld of reg * offset (* LD: load doubleword *) | Sd of reg * offset (* SD: store doubleword *) | Mv of reg * reg (* MV: move *) - | Comment of string (* Assembler comment *) - | Label of string (* Assembler label *) - | Call of string (* CALL *) - | J of string (* J: jump *) + | Comment of (string[@gen gen_comm]) (* Assembler comment *) + | Label of (string[@gen gen_label]) (* Assembler label *) + | Call of (string[@gen gen_label]) (* CALL *) + | J of (string[@gen gen_label]) (* J: jump *) | Ecall (* ECALL *) | Ret (* return *) - | La of reg * string (* Load Address of labeled function into the reg *) - | Slli of reg * reg * int (* << imm *) - | Srai of reg * reg * int (* >> imm *) + | La of reg * (string[@gen gen_label]) + (* Load Address of labeled function into the reg *) + | Slli of reg * reg * (int[@gen QCheck.Gen.nat_small]) (* << imm *) + | Srai of reg * reg * (int[@gen QCheck.Gen.nat_small]) (* >> imm *) [@@deriving eq] +let gen_instr = + QCheck.Gen.oneof_weighted + [ ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Addi (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg QCheck.Gen.nat_small) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Add (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Sub (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Mul (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Slt (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Seqz (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Snez (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Xor (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_reg) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Xori (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg QCheck.Gen.nat_small) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Beq (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_label) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Blt (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_label) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Ble (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg gen_label) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Lla (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_label) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Li (gen0, gen1)) + (QCheck.Gen.pair gen_reg QCheck.Gen.nat_small) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Ld (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_offset) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Sd (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_offset) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Mv (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_reg) ) + ; 1, QCheck.Gen.map (fun gen0 -> Comment gen0) gen_comm + ; 1, QCheck.Gen.map (fun gen0 -> Label gen0) gen_label + ; 1, QCheck.Gen.map (fun gen0 -> Call gen0) gen_label + ; 1, QCheck.Gen.map (fun gen0 -> J gen0) gen_label + ; 1, QCheck.Gen.pure Ecall + ; 1, QCheck.Gen.pure Ret + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> La (gen0, gen1)) + (QCheck.Gen.pair gen_reg gen_label) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Slli (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg QCheck.Gen.nat_small) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Srai (gen0, gen1, gen2)) + (QCheck.Gen.triple gen_reg gen_reg QCheck.Gen.nat_small) ) + ] +;; + let pp_instr ppf = let open Format in function diff --git a/XML/lib/backend/machine.mli b/XML/lib/backend/machine.mli index 401ef15d..3d3dc620 100644 --- a/XML/lib/backend/machine.mli +++ b/XML/lib/backend/machine.mli @@ -10,11 +10,14 @@ type reg = | SP | Zero +val gen_reg : reg QCheck.Gen.t val equal_reg : reg -> reg -> bool val pp_reg : Format.formatter -> reg -> unit type offset = reg * int +val gen_offset : offset QCheck.Gen.t + type instr = | Addi of reg * reg * int (* ADD immediate *) | Add of reg * reg * reg (* ADD *) @@ -43,6 +46,7 @@ type instr = | Slli of reg * reg * int (* << imm *) | Srai of reg * reg * int (* >> imm (arith) *) +val gen_instr : instr QCheck.Gen.t val pp_instr : Format.formatter -> instr -> unit val addi : (instr -> 'a) -> reg -> reg -> int -> 'a val add : (instr -> 'a) -> reg -> reg -> reg -> 'a diff --git a/XML/lib/common/ast.ml b/XML/lib/common/ast.ml index c6b2a830..9655a087 100644 --- a/XML/lib/common/ast.ml +++ b/XML/lib/common/ast.ml @@ -30,111 +30,189 @@ let is_not_keyword = function | _ -> true ;; -let rec gen_filtered_ident base_gen = - let open QCheck.Gen in - base_gen - >>= fun ident -> - if is_not_keyword ident then return ident else gen_filtered_ident base_gen -;; +let gen_id_first_char = QCheck.Gen.oneof_weighted [ 5, char_range 'a' 'z'; 1, return '_' ] +let gen_digit = char_range '0' '9' -let gen_ident = - let base_gen = - map2 - (fun start_sym rest_sym -> Base.Char.to_string start_sym ^ rest_sym) - (oneof [ char_range 'A' 'Z'; char_range 'a' 'z'; return '_' ]) - (small_string - ~gen: - (oneof - [ char_range 'A' 'Z'; char_range 'a' 'z'; char_range '0' '9'; return '_' ])) - in - gen_filtered_ident base_gen +let gen_id_char = + QCheck.Gen.oneof_weighted + [ 5, gen_id_first_char; 5, char_range 'A' 'Z'; 5, gen_digit; 1, return '\'' ] ;; -let gen_ident_uc = - let base_gen = - map2 - (fun start_sym rest_sym -> Base.Char.to_string start_sym ^ rest_sym) - (char_range 'A' 'Z') - (small_string - ~gen: - (oneof - [ char_range 'A' 'Z'; char_range 'a' 'z'; char_range '0' '9'; return '_' ])) - in - gen_filtered_ident base_gen -;; - -let gen_ident_lc include_us = - let start_sym = - if include_us then oneof [ char_range 'a' 'z'; return '_' ] else char_range 'a' 'z' +let gen_ident = + let gen_name = + let* fst = gen_id_first_char >|= Base.Char.to_string in + let range = if Base.String.( = ) fst "_" then 1 -- 4 else 0 -- 4 in + let* rest = string_size ~gen:gen_id_char range in + return (fst ^ rest) in - let base_gen = - map2 - (fun start_sym rest_sym -> Base.Char.to_string start_sym ^ rest_sym) - start_sym - (small_string - ~gen: - (oneof - [ char_range 'A' 'Z'; char_range 'a' 'z'; char_range '0' '9'; return '_' ])) + let rec loop gen = + gen >>= fun name -> if is_not_keyword name then return name else loop gen in - gen_filtered_ident base_gen + loop gen_name ;; module List1 = struct - type 'a t = 'a * ('a list[@gen list_size (int_bound 5) gen_a]) - [@@deriving eq, show { with_path = false }, qcheck] + type 'a t = 'a * 'a list [@@deriving eq, show { with_path = false }] + + let gen gen_a = + QCheck.Gen.map + (fun (gen0, gen1) -> gen0, gen1) + (QCheck.Gen.pair gen_a (list_size (int_bound 5) gen_a)) + ;; + + let arb gen_a = QCheck.make (gen gen_a) end module List2 = struct - type 'a t = 'a * 'a * ('a list[@gen list_size (int_bound 5) gen_a]) - [@@deriving eq, show { with_path = false }, qcheck] + type 'a t = 'a * 'a * 'a list [@@deriving eq, show { with_path = false }] + + let gen gen_a = + QCheck.Gen.map + (fun (gen0, gen1, gen2) -> gen0, gen1, gen2) + (QCheck.Gen.triple gen_a gen_a (QCheck.Gen.list gen_a)) + ;; + + let arb gen_a = QCheck.make (gen gen_a) end module Constant = struct type t = - | Const_integer of (int[@gen small_nat]) (** integer as [52] *) - | Const_char of (char[@gen gen_charc]) (** char as ['w'] *) - | Const_string of (string[@gen small_string ~gen:gen_charc]) - (** string as ["Kakadu"] *) - [@@deriving eq, show { with_path = false }, qcheck] + | Const_integer of int (** integer as [52] *) + | Const_char of char (** char as ['w'] *) + | Const_string of string (** string as ["Kakadu"] *) + [@@deriving eq, show { with_path = false }] + + let gen = + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.map (fun gen0 -> Const_integer gen0) nat_small + ; 1, QCheck.Gen.map (fun gen0 -> Const_char gen0) gen_charc + ; 1, QCheck.Gen.map (fun gen0 -> Const_string gen0) (string_small_of gen_charc) + ] + ;; + + let arb = QCheck.make gen end module TypeExpr = struct + let gen_ref inner_gen = + let open QCheck.Gen in + map ref inner_gen + ;; + + type level = (int[@gen nat_small]) [@@deriving eq, show { with_path = false }, qcheck] + type t = - | Type_arrow of t * t - (** [Type_arrow(T1, T2)] represents: - [T1 -> T2] *) - | Type_var of (ident[@gen gen_ident]) - | Type_tuple of t List2.t (** [Type_tuple([T1, T2, ... Tn])] *) - | Type_construct of ident * t list - (** [Type_construct(lident, l)] represents: - - [tconstr] when [l=[]], - - [T tconstr] when [l=[T]], - - [(T1, ..., Tn) tconstr] when [l=[T1 ; ... ; Tn]]. *) - [@@deriving eq, show { with_path = false }, qcheck] + | Type_arrow of t * t (** Function type [t1 -> t2] *) + | Type_tuple of t List2.t (** Tuple type [t1 * t2 * ...] *) + | Type_var of tv ref (** Type variable ['a] *) + | Quant_type_var of ident (** Quantified type variable ['a. 'a] *) + | Type_construct of ident * t list (** *) + [@@deriving eq, show { with_path = false }] + + and tv = + | Unbound of (ident[@gen gen_ident]) * level (** Free type variable *) + | Link of (t[@gen gen_sized (n / 2)]) (** Unified type variable *) + [@@deriving eq, show { with_path = false }] + + let rec gen_sized n = + match n with + | 0 -> QCheck.Gen.map (fun gen0 -> Quant_type_var gen0) gen_ident + | _ -> + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.map (fun gen0 -> Quant_type_var gen0) gen_ident + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Type_arrow (gen0, gen1)) + (QCheck.Gen.pair (gen_sized (n / 2)) (gen_sized (n / 2))) ) + ; 1, QCheck.Gen.map (fun gen0 -> Type_tuple gen0) (List2.gen (gen_sized (n / 2))) + ; 1, QCheck.Gen.map (fun gen0 -> Type_var gen0) (gen_ref (gen_tv_sized (n / 2))) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Type_construct (gen0, gen1)) + (QCheck.Gen.pair gen_ident (QCheck.Gen.list (gen_sized (n / 2)))) ) + ] + + and gen_tv_sized n = + match n with + | 0 -> + QCheck.Gen.map + (fun (gen0, gen1) -> Unbound (gen0, gen1)) + (QCheck.Gen.pair gen_ident gen_level) + | _ -> + QCheck.Gen.oneof_weighted + [ ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Unbound (gen0, gen1)) + (QCheck.Gen.pair gen_ident gen_level) ) + ; 1, QCheck.Gen.map (fun gen0 -> Link gen0) (gen_sized (n / 2)) + ] + ;; + + let gen = QCheck.Gen.sized gen_sized + let gen_tv = QCheck.Gen.sized gen_tv_sized + let arb_sized n = QCheck.make (gen_sized n) + let arb_tv_sized n = QCheck.make (gen_tv_sized n) + let arb = QCheck.make gen + let arb_tv = QCheck.make gen_tv end module Pattern = struct type t = - | Pat_constraint of t * (TypeExpr.t[@gen TypeExpr.gen_sized (n / 2)]) - (** Pattern [(P : T)] *) + | Pat_constraint of t * TypeExpr.t (** Pattern [(P : T)] *) | Pat_any (** The pattern [_]. *) - | Pat_var of (ident[@gen gen_ident_lc false]) (** A variable pattern such as [x] *) + | Pat_var of ident (** A variable pattern such as [x] *) | Pat_constant of Constant.t (** Patterns such as [52], ['w'], ["uwu"] *) | Pat_tuple of t List2.t (** Patterns [(P1, ..., Pn)]. *) - | Pat_construct of (ident[@gen gen_ident_uc]) * t option + | Pat_construct of ident * t option (** [Pat_construct(C, args)] represents: - [C] when [args] is [None], - [C P] when [args] is [Some (P)] - [C (P1, ..., Pn)] when [args] is [Some (Pat_tuple [P1; ...; Pn])] *) - [@@deriving eq, show { with_path = false }, qcheck] + [@@deriving eq, show { with_path = false }] + + let rec gen_sized n = + match n with + | 0 -> + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.pure Pat_any + ; 1, QCheck.Gen.map (fun gen0 -> Pat_var gen0) gen_ident + ; 1, QCheck.Gen.map (fun gen0 -> Pat_constant gen0) Constant.gen + ] + | _ -> + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.pure Pat_any + ; 1, QCheck.Gen.map (fun gen0 -> Pat_var gen0) gen_ident + ; 1, QCheck.Gen.map (fun gen0 -> Pat_constant gen0) Constant.gen + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Pat_constraint (gen0, gen1)) + (QCheck.Gen.pair (gen_sized (n / 2)) (TypeExpr.gen_sized (n / 2))) ) + ; 1, QCheck.Gen.map (fun gen0 -> Pat_tuple gen0) (List2.gen (gen_sized (n / 2))) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Pat_construct (gen0, gen1)) + (QCheck.Gen.pair gen_ident (QCheck.Gen.option (gen_sized (n / 2)))) ) + ] + ;; + + let gen = QCheck.Gen.sized gen_sized + let arb_sized n = QCheck.make (gen_sized n) + let arb = QCheck.make gen end module Expression = struct type rec_flag = | Nonrecursive | Recursive - [@@deriving eq, show { with_path = false }, qcheck] + [@@deriving eq, show { with_path = false }] + + let gen_rec_flag = + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.pure Nonrecursive; 1, QCheck.Gen.pure Recursive ] + ;; + + let arb_rec_flag = QCheck.make gen_rec_flag type 'expr value_binding = { pat : Pattern.t @@ -160,35 +238,94 @@ module Expression = struct ;; type t = - | Exp_ident of (ident[@gen gen_ident_lc true]) (** Identifiers such as [x] *) + | Exp_ident of ident (** (ident[@gen gen_ident])ifiers such as [x] *) | Exp_constant of Constant.t (** Expressions constant such as [1], ['a'], ["true"]**) | Exp_tuple of t List2.t (** Expressions [(E1, E2, ..., En)] *) - | Exp_function of (t case[@gen gen_case gen_sized (n / 2)]) List1.t + | Exp_function of t case List1.t (** [Exp_function (P1, [P2; ...; Pn])] represents [function P1 | ... | Pn] *) - | Exp_fun of (Pattern.t[@gen Pattern.gen_sized (n / 2)]) List1.t * t + | Exp_fun of Pattern.t List1.t * t (**[Exp_fun (P1, [P2; ...; Pn], E)] represents: [fun P1 ... Pn -> E] *) | Exp_apply of t * t (** [Pexp_apply(E0, E1)] represents [E0 E1]*) - | Exp_match of t * (t case[@gen gen_case gen_sized (n / 2)]) List1.t - (** [match E0 with P1 -> E1 || Pn -> En] *) - | Exp_constraint of t * (TypeExpr.t[@gen TypeExpr.gen_sized (n / 2)]) (** [(E : T)] *) + | Exp_match of t * t case List1.t (** [match E0 with P1 -> E1 || Pn -> En] *) + | Exp_constraint of t * TypeExpr.t (** [(E : T)] *) | Exp_if of t * t * t option (** [if E1 then E2 else E3] *) - | Exp_let of - rec_flag * (t value_binding[@gen gen_value_binding gen_sized (n / 2)]) List1.t * t + | Exp_let of rec_flag * t value_binding List1.t * t (** [Exp_let(flag, [(P1,E1) ; ... ; (Pn,En)], E)] represents: - [let P1 = E1 and ... and Pn = EN in E] when [flag] is [Nonrecursive], - [let rec P1 = E1 and ... and Pn = EN in E] when [flag] is [Recursive]. *) - | Exp_construct of (ident[@gen gen_ident_uc]) * t option + | Exp_construct of ident * t option (** [Exp_construct(C, exp)] represents: - [C] when [exp] is [None], - [C E] when [exp] is [Some E], - [C (E1, ..., En)] when [exp] is [Some (Exp_tuple[E1;...;En])] *) - [@@deriving eq, show { with_path = false }, qcheck] + [@@deriving eq, show { with_path = false }] + + let rec gen_sized n = + match n with + | 0 -> + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.map (fun gen0 -> Exp_ident gen0) gen_ident + ; 1, QCheck.Gen.map (fun gen0 -> Exp_constant gen0) Constant.gen + ] + | _ -> + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.map (fun gen0 -> Exp_ident gen0) gen_ident + ; 1, QCheck.Gen.map (fun gen0 -> Exp_constant gen0) Constant.gen + ; 1, QCheck.Gen.map (fun gen0 -> Exp_tuple gen0) (List2.gen (gen_sized (n / 2))) + ; ( 1 + , QCheck.Gen.map + (fun gen0 -> Exp_function gen0) + (List1.gen (gen_case gen_sized (n / 2))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Exp_fun (gen0, gen1)) + (QCheck.Gen.pair + (List1.gen (Pattern.gen_sized (n / 2))) + (gen_sized (n / 2))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Exp_apply (gen0, gen1)) + (QCheck.Gen.pair (gen_sized (n / 2)) (gen_sized (n / 2))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Exp_match (gen0, gen1)) + (QCheck.Gen.pair + (gen_sized (n / 2)) + (List1.gen (gen_case gen_sized (n / 2)))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Exp_constraint (gen0, gen1)) + (QCheck.Gen.pair (gen_sized (n / 2)) (TypeExpr.gen_sized (n / 2))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Exp_if (gen0, gen1, gen2)) + (QCheck.Gen.triple + (gen_sized (n / 2)) + (gen_sized (n / 2)) + (QCheck.Gen.option (gen_sized (n / 2)))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1, gen2) -> Exp_let (gen0, gen1, gen2)) + (QCheck.Gen.triple + gen_rec_flag + (List1.gen (gen_value_binding gen_sized (n / 2))) + (gen_sized (n / 2))) ) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Exp_construct (gen0, gen1)) + (QCheck.Gen.pair gen_ident (QCheck.Gen.option (gen_sized (n / 2)))) ) + ] + ;; + + let gen = QCheck.Gen.sized gen_sized + let arb_sized n = QCheck.make (gen_sized n) + let arb = QCheck.make gen end module Structure = struct @@ -200,42 +337,24 @@ module Structure = struct when [rec] is [Nonrecursive], - [let rec P1 = E1 and ... and Pn = EN ] when [rec] is [Recursiv e ee]. *) - | Str_adt of ident list * ident * (ident * TypeExpr.t option) List1.t - (** [Str_type(C0, [(C1, [(T11; T12; ... ; T1n_1)]); (C2, [(T21;T22; ... ; T2n_2)]); ... ; - (Cm, [(Tm1;Tm2; ... ; Tmn_n)]) ])] represents: - - [type C0 = - | C1 of T11 * ... * T1n_1 - | ... - | Cm of Tm1 * ... * Tmn_n - ] - - n_i: [n_i >= 0] - Invariant: [m > 0] *) [@@deriving eq, show { with_path = false }] let gen_structure_item n = - frequency - [ 0, map (fun expr -> Str_eval expr) (Expression.gen_sized (n / 2)) - ; ( 0 + oneof_weighted + [ 1, map (fun expr -> Str_eval expr) (Expression.gen_sized (n / 2)) + ; ( 1 , let* rec_flag = oneof [ return Expression.Nonrecursive; return Expression.Recursive ] in let* bind1 = Expression.gen_value_binding Expression.gen_sized (n / 2) in let* bindl = - small_list (Expression.gen_value_binding Expression.gen_sized (n / 2)) + list_small (Expression.gen_value_binding Expression.gen_sized (n / 2)) in return (Str_value (rec_flag, (bind1, bindl))) ) - ; ( 1 - , let* tparam = small_list (gen_ident_lc true) in - let* idt = gen_ident_lc true in - let* cons1 = Gen.pair gen_ident_uc (Gen.option (TypeExpr.gen_sized (n / 20))) in - let* consl = - small_list (Gen.pair gen_ident_uc (Gen.option (TypeExpr.gen_sized (n / 20)))) - in - return (Str_adt (tparam, idt, (cons1, consl))) ) ] ;; + + let arb_structure_item = QCheck.make (gen_structure_item 10) end type program = Structure.structure_item list [@@deriving eq, show { with_path = false }] diff --git a/XML/lib/common/ast.mli b/XML/lib/common/ast.mli index da329dc5..6227a185 100644 --- a/XML/lib/common/ast.mli +++ b/XML/lib/common/ast.mli @@ -9,10 +9,7 @@ val pp_ident : Format.formatter -> ident -> unit val show_ident : ident -> string val gen_charc : char QCheck.Gen.t val is_not_keyword : string -> bool -val gen_filtered_ident : string QCheck.Gen.t -> string QCheck.Gen.t val gen_ident : string QCheck.Gen.t -val gen_ident_uc : string QCheck.Gen.t -val gen_ident_lc : bool -> string QCheck.Gen.t module List1 : sig type 'a t = 'a * 'a list @@ -48,12 +45,18 @@ module Constant : sig end module TypeExpr : sig + type level = int + type t = - | Type_arrow of t * t (** Represents a function type: [T1 -> T2]. *) - | Type_var of ident (** Represents a type variable: ['a]. *) - | Type_tuple of t List2.t (** Represents a tuple type: [(T1, T2, ..., Tn)]. *) - | Type_construct of ident * t list - (** Represents a type constructor with arguments: [C T1 ... Tn]. *) + | Type_arrow of t * t (** Function type [t1 -> t2] *) + | Type_tuple of t List2.t (** Tuple type [t1 * t2 * ...] *) + | Type_var of tv ref (** Type variable ['a] *) + | Quant_type_var of ident (** Quantified type variable ['a. 'a] *) + | Type_construct of ident * t list (** *) + + and tv = + | Unbound of ident * level (** Free type variable *) + | Link of t (** Unified type variable *) val equal : t -> t -> bool val pp : Format.formatter -> t -> unit @@ -173,9 +176,6 @@ module Structure : sig when [rec] is [rec_flag.Nonrecursive]. - [let rec P1 = E1 and ... and Pn = En] when [rec] is [rec_flag.Recursive]. *) - | Str_adt of ident list * ident * (ident * TypeExpr.t option) List1.t - (** A type declaration for an algebraic data type (ADT), - such as [type t1 = ... | ... | tn = ...]. *) val equal_structure_item : structure_item -> structure_item -> bool val pp_structure_item : Format.formatter -> structure_item -> unit diff --git a/XML/lib/common/parser.ml b/XML/lib/common/parser.ml index 55bcd7ef..90bb1761 100644 --- a/XML/lib/common/parser.ml +++ b/XML/lib/common/parser.ml @@ -151,7 +151,7 @@ let pmultiargsapp pty = let ptypevar = let* id = token "'" *> (pident_lc <|> pident_cap) in - return (TypeExpr.Type_var id) + return (TypeExpr.Type_var { contents = Unbound (id, 0) }) ;; let ptypetuple ptype = @@ -564,60 +564,20 @@ let pstrlet = return (Structure.Str_value (recflag, (bindingfs, bindingtl))) ;; -let pstradt = - let* _ = token "type" in - let* type_param = - option - [] - (pparenth (sep_by (token ",") (token "'" *> pident_lc)) - <|> many (token "'" *> pident_lc)) - in - let* type_name = pass_ws *> pident_lc in - let var = - let* name = option None (pass_ws *> pident_cap >>= fun n -> return (Some n)) in - match name with - | Some x -> - (* Constructor case: Can have "of" *) - let* ctype = - option - None - (token "of" - *> let* types = sep_by (token "*") ptype_adt in - match types with - | x :: y :: xs -> return (Some (TypeExpr.Type_tuple (x, y, xs))) - | [ x ] -> return (Some x) - | [] -> fail "Expected type after 'of'") - in - return (x, ctype) - | None -> - (* Lowercase type alias case: Must have a type expression *) - let* ctype = - let* types = sep_by (token "*") ptype_adt in - match types with - | x :: y :: xs -> return (Some (TypeExpr.Type_tuple (x, y, xs))) (* Tuple case *) - | [ x ] -> return (Some x) (* Single type *) - | [] -> fail "Expected type definition" - in - return ("", ctype) - in - let* _ = token "=" in - let* fvar = - option - None - (option None (token "|" *> return None) *> (var >>= fun v -> return (Some v))) - in - let* varl = many (token "|" *> var) in - match fvar with - | Some fvar -> return (Structure.Str_adt (type_param, type_name, (fvar, varl))) - | None -> fail "Expected at least one variant" -;; - -let pstr_item = pseval <|> pstrlet <|> pstradt +let pstr_item = pseval <|> pstrlet let pstructure = let psemicolon = many (token ";;") in sep_by psemicolon pstr_item <* psemicolon <* pass_ws ;; +let parse_exp_str str = + parse_string ~consume:All (pass_ws *> pexpr <* pass_ws) str |> Result.ok_or_failwith +;; + +let parse_pat_str str = + parse_string ~consume:All (pass_ws *> ppattern <* pass_ws) str |> Result.ok_or_failwith +;; + let parse str = parse_string ~consume:All pstructure str let parse_str str = parse str |> Result.ok_or_failwith diff --git a/XML/lib/common/parser.mli b/XML/lib/common/parser.mli index ceac05ff..3b77d055 100644 --- a/XML/lib/common/parser.mli +++ b/XML/lib/common/parser.mli @@ -6,6 +6,8 @@ open Ast val parse : string -> (program, string) result val parse_str : string -> program +val parse_exp_str : string -> Expression.t +val parse_pat_str : string -> Pattern.t val pass_ws : unit Angstrom.t val pass_ws1 : unit Angstrom.t val token : string -> string Angstrom.t diff --git a/XML/lib/common/pprinter.ml b/XML/lib/common/pprinter.ml index 5ac80715..aa76b2a7 100644 --- a/XML/lib/common/pprinter.ml +++ b/XML/lib/common/pprinter.ml @@ -38,38 +38,148 @@ let pprint_constant fmt = | Const_string s -> fprintf fmt "%S" s ;; -let rec pprint_type fmt = +let rearr_typvars typ = + let open Base in + let open TypeExpr in + let var_counter = ref 0 in + let rec rename t var_map = + match t with + | Type_arrow (t1, t2) -> + let t1', map1 = rename t1 var_map in + let t2', map2 = rename t2 map1 in + Type_arrow (t1', t2'), map2 + | Type_tuple (t1, t2, tl) -> + let t1', map1 = rename t1 var_map in + let t2', map2 = rename t2 map1 in + let ts = tl in + List.fold_left ts ~init:([], map2) ~f:(fun (acc_ts, acc_map) t_elem -> + let t_elem', new_map = rename t_elem acc_map in + t_elem' :: acc_ts, new_map) + |> fun (rev_ts, final_map) -> Type_tuple (t1', t2', List.rev rev_ts), final_map + | Type_var tv_ref -> + (match !tv_ref with + | Unbound _ -> Type_var tv_ref, var_map + | Link linked_t -> rename linked_t var_map) + | Quant_type_var id -> + (match Map.find var_map id with + | Some new_id -> Quant_type_var new_id, var_map + | None -> + let idx = !var_counter in + var_counter := idx + 1; + let new_id = + if idx < 26 + then String.make 1 (Char.of_int_exn (97 + idx)) + else ( + let prefix_count = (idx / 26) - 1 in + let suffix_idx = Int.rem idx 26 in + "'" + ^ String.make (prefix_count + 1) (Char.of_int_exn (97 + (idx / 26) - 1)) + ^ String.make 1 (Char.of_int_exn (97 + suffix_idx))) + in + let new_map = Map.set var_map ~key:id ~data:new_id in + Quant_type_var new_id, new_map) + | Type_construct (id, args) -> + List.fold_left args ~init:([], var_map) ~f:(fun (acc_args, acc_map) arg -> + let arg', new_map = rename arg acc_map in + arg' :: acc_args, new_map) + |> fun (rev_args, final_map) -> Type_construct (id, List.rev rev_args), final_map + in + fst (rename typ (Map.empty (module String))) +;; + +let rec pprint_type_tuple fmt = + let open Stdlib.Format in let open TypeExpr in function - | Type_arrow (tye1, tye2) -> fprintf fmt "(%a -> %a)" pprint_type tye1 pprint_type tye2 - | Type_var id -> fprintf fmt "'%s" id - | Type_tuple (tye1, tye2, tyel) -> + | [] -> () + | [ h ] -> + (match h with + | Type_arrow (_, _) -> fprintf fmt "(%a)" pprint_type h + | _ -> fprintf fmt "%a" pprint_type h) + | h :: tl -> + (match h with + | Type_arrow (_, _) -> fprintf fmt "(%a) * %a" pprint_type h pprint_type_tuple tl + | _ -> fprintf fmt "%a * %a" pprint_type h pprint_type_tuple tl) + +and pprint_type_list_with_parens fmt ty_list = + let open Stdlib.Format in + let rec print_types fmt = function + | [] -> () + | [ ty ] -> pprint_type_with_parens_if_tuple fmt ty + | ty :: rest -> + fprintf fmt "%a %a" pprint_type_with_parens_if_tuple ty print_types rest + in + print_types fmt ty_list + +and pprint_type fmt typ = + let open TypeExpr in + let rec is_arrow = function + | Type_arrow _ -> true + | Type_var { contents = Link t } -> is_arrow t + | _ -> false + in + let rec is_tuple = function + | Type_tuple _ -> true + | Type_var { contents = Link t } -> is_tuple t + | _ -> false + in + let open Stdlib.Format in + match typ with + | Type_arrow (t1, t2) when is_arrow t1 -> + fprintf fmt "(%a) -> %a" pprint_type t1 pprint_type t2 + | Type_arrow (t1, t2) -> fprintf fmt "%a -> %a" pprint_type t1 pprint_type t2 + | Type_tuple (t1, t2, tl) -> fprintf fmt - "(%s)" - (String.concat + "%s" + (Base.String.concat ~sep:" * " - (List.map (tye1 :: tye2 :: tyel) ~f:(fun t -> asprintf "%a" pprint_type t))) - | Type_construct (id, tyel) -> - let tyel_str = - String.concat - ~sep:", " - (List.map tyel ~f:(fun t -> - match t with - | Type_var tye -> asprintf "'%s" tye - | Type_tuple (t1, t2, rest) -> - let tuple_types = t1 :: t2 :: rest in - let tuple_str = String.concat ~sep:" * " (List.map tuple_types ~f:show) in - "(" ^ tuple_str ^ ")" - | _ -> show t)) - in - let tyel_strf = - match List.length tyel with - | 0 -> "" - | 1 -> tyel_str ^ " " - | _ -> "(" ^ tyel_str ^ ") " - in - fprintf fmt "%s%s" tyel_strf id + (List.map + ~f:(fun t -> + if is_tuple t || is_arrow t + then asprintf "(%a)" pprint_type t + else asprintf "%a" pprint_type t) + (t1 :: t2 :: tl))) + | Type_var { contents = Unbound (id, _) } -> fprintf fmt "'%s" id + | Type_var { contents = Link t } -> pprint_type fmt t + | Quant_type_var id -> fprintf fmt "'%s" id + | Type_construct (name, []) -> fprintf fmt "%s" name + | Type_construct (name, ty_list) -> + fprintf fmt "%a %s" pprint_type_list_with_parens ty_list name + +and pprint_type_with_parens_if_tuple fmt ty = + let open Stdlib.Format in + match ty with + | Type_tuple _ -> fprintf fmt "(%a)" pprint_type ty + | _ -> pprint_type fmt ty +;; + +let filter_env (env : (ident * TypeExpr.t) list) (names : ident list) = + List.fold_left + ~f:(fun acc name -> + match Stdlib.List.assoc_opt name env, Stdlib.List.assoc_opt name acc with + | Some ty, None -> (name, ty) :: acc + | _ -> acc) + ~init:[] + names +;; + +let pprint_env env names = + let open Stdlib.Format in + let new_env = filter_env env names in + List.iter + ~f:(fun (key, typ) -> + if + String.length key > 0 + && Stdlib.Char.code key.[0] >= 65 + && Stdlib.Char.code key.[0] <= 90 + then () + else if String.equal key "-" + then printf "%s : %a\n" key pprint_type typ + else ( + let typ = rearr_typvars typ in + printf "val %s : %a\n" key pprint_type typ)) + new_env ;; let rec pprint_pattern fmt = @@ -256,29 +366,6 @@ let pprint_structure_item fmt n = asprintf "%a" (fun fmt -> pprint_value_binding fmt n) vb)) in fprintf fmt "let %a%s;;\n\n" pprint_rec rec_flag bindings_str - | Str_adt (tparam, id, (constr1, constrl)) -> - let tparam_ident_str = - match List.length tparam with - | 0 -> "" - | 1 -> asprintf "'%s " (List.hd_exn tparam) - | _ -> - "('" - ^ String.concat ~sep:", '" (List.map tparam ~f:(fun param -> asprintf "%s" param)) - ^ ") " - in - let var_t_str = - match constr1 :: constrl with - | [] -> "" - | _ -> - " | " - ^ String.concat - ~sep:"\n | " - (List.map (constr1 :: constrl) ~f:(fun (id, typ) -> - match typ with - | Some t -> asprintf "%s of %a" id pprint_type t - | None -> asprintf "%s" id)) - in - fprintf fmt "type %s%s =\n%s\n;;\n\n" tparam_ident_str id var_t_str ;; let pprint_program fmt = List.iter ~f:(pprint_structure_item fmt 0) diff --git a/XML/lib/common/pprinter.mli b/XML/lib/common/pprinter.mli index c709f7d2..4bc58c7e 100644 --- a/XML/lib/common/pprinter.mli +++ b/XML/lib/common/pprinter.mli @@ -6,6 +6,7 @@ open Ast val pprint_constant : Format.formatter -> Constant.t -> unit val pprint_type : Format.formatter -> TypeExpr.t -> unit +val pprint_env : (ident * TypeExpr.t) list -> ident list -> unit val pprint_pattern : Format.formatter -> Pattern.t -> unit val pprint_rec : Format.formatter -> Expression.rec_flag -> unit val pprint_expression : Format.formatter -> int -> Expression.t -> unit diff --git a/XML/lib/middleend/anf.ml b/XML/lib/middleend/anf.ml index fc1379b3..e5a2f332 100644 --- a/XML/lib/middleend/anf.ml +++ b/XML/lib/middleend/anf.ml @@ -11,6 +11,7 @@ open Common.Ast type im_expr = | Imm_num of int | Imm_ident of ident +[@@deriving show { with_path = false }] (* Complex/Computable expression *) type comp_expr = @@ -24,16 +25,19 @@ type comp_expr = im_expr list (* Allocate a memory block and initialize it with values. *) | Comp_load of im_expr * int (* Load a value from memory: Comp_load(address, byte_offset). *) +[@@deriving show { with_path = false }] and anf_expr = | Anf_comp_expr of comp_expr | Anf_let of rec_flag * ident * comp_expr * anf_expr +[@@deriving show { with_path = false }] type astructure_item = | Anf_str_eval of anf_expr | Anf_str_value of rec_flag * ident * anf_expr +[@@deriving show { with_path = false }] -type aprogram = astructure_item list +type aprogram = astructure_item list [@@deriving show { with_path = false }] type anf_error = [ `Only_simple_var_params @@ -258,7 +262,6 @@ let norm_item (item : structure_item) (st : nstate) : (astructure_item * nstate) let* body_anf, st' = norm_body expr st in ok (Anf_str_value (rec_flag, name, body_anf), st') | _ -> err `Unsupported_toplevel_let) - | _ -> err `Unsupported_toplevel_item ;; let anf_program_res (program : structure_item list) : (aprogram, anf_error) result = diff --git a/XML/lib/middleend/anf.mli b/XML/lib/middleend/anf.mli index c545c016..9a1c2fc9 100644 --- a/XML/lib/middleend/anf.mli +++ b/XML/lib/middleend/anf.mli @@ -32,7 +32,7 @@ type astructure_item = | Anf_str_eval of anf_expr | Anf_str_value of rec_flag * ident * anf_expr -type aprogram = astructure_item list +type aprogram = astructure_item list [@@deriving show { with_path = false }] type anf_error = [ `Only_simple_var_params diff --git a/XML/lib/middleend/cc.ml b/XML/lib/middleend/cc.ml index 6e76fb55..3b0515d5 100644 --- a/XML/lib/middleend/cc.ml +++ b/XML/lib/middleend/cc.ml @@ -148,6 +148,7 @@ let rec closure_expr toplvl_set env expr = in let free_vars = free_vars_in fun_bound_vars body in let captured_vars = SSet.diff free_vars toplvl_set in + (* let captured_vars = SSet.filter (fun v -> not (List.mem v std_lib_names)) free_vars in *) let captured_vars_list = SSet.elements captured_vars in let new_pats_for_capture = List.map (fun v -> Pat_var v) captured_vars_list in let saturated_patterns = new_pats_for_capture @ patterns in @@ -209,6 +210,9 @@ and transform_bindings toplvl_set env rec_flag bindings = in let free_vars = free_vars_in bound_for_body body in let captured_vars = SSet.diff free_vars toplvl_set in + (* let captured_vars = + SSet.filter (fun v -> not (List.mem v std_lib_names)) free_vars + in *) let captured_vars_list = SSet.elements captured_vars in let new_pats_for_capture = List.map (fun v -> Pat_var v) captured_vars_list in let saturated_patterns = new_pats_for_capture @ patterns in @@ -278,7 +282,6 @@ let closure_structure_item_result toplvl_set = function (match new_bindings with | [] -> Error Empty_toplevel_let | hd :: tl -> Ok (Str_value (rec_flag, (hd, tl)), new_toplvl_set)) - | Str_adt _ as item -> Ok (item, toplvl_set) ;; let closure_structure_item toplvl_set item = diff --git a/XML/lib/middleend/dune b/XML/lib/middleend/dune index 35a9e47c..9bd1699a 100644 --- a/XML/lib/middleend/dune +++ b/XML/lib/middleend/dune @@ -1,7 +1,7 @@ (library (name middleend) (public_name XML.Middleend) - (modules Anf Pprinter Cc Ll) + (modules Anf Pprinter Cc Ll Infer) (libraries angstrom base stdio XML.Common) (preprocess (pps ppx_deriving.show ppx_deriving.eq ppx_deriving_qcheck)) diff --git a/XML/lib/middleend/infer.ml b/XML/lib/middleend/infer.ml new file mode 100644 index 00000000..781b277a --- /dev/null +++ b/XML/lib/middleend/infer.ml @@ -0,0 +1,523 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Common.Ast +open Common.Ast.Expression +open Common.Ast.Structure +open Common.Ast.Pattern +open Common.Ast.TypeExpr +open Common.Pprinter + +type error = + | Occurs_check + | Cannot_unify of TypeExpr.t * TypeExpr.t + | Cannot_unify_tuple_size + | Cannot_unify_constructors of string * string + | Cannot_unify_quantified + | Unbound_variable of string + | Operator_not_found of string + | Invalid_let_rec_rhs + | Invalid_let_rec_lhs + +let pprint_err ppf = function + | Occurs_check -> Format.fprintf ppf "Occurs check" + | Cannot_unify (t1, t2) -> + Format.fprintf ppf "Cannot unify types: %a and %a" pprint_type t1 pprint_type t2 + | Cannot_unify_tuple_size -> Format.fprintf ppf "Cannot unify tuples of different sizes" + | Cannot_unify_constructors (c1, c2) -> + Format.fprintf ppf "Cannot unify different constructors: %s and %s" c1 c2 + | Cannot_unify_quantified -> Format.fprintf ppf "Cannot unify quantified variable" + | Unbound_variable id -> Format.fprintf ppf "Unbound variable %s" id + | Operator_not_found op -> Format.fprintf ppf "Operator not found: %s" op + | Invalid_let_rec_rhs -> + Format.fprintf + ppf + "This kind of expression is not allowed as right-hand side of `let rec'" + | Invalid_let_rec_lhs -> + Format.fprintf ppf "Only variables are allowed as left-hand side of `let rec'" +;; + +type 'a t = ('a, error) result + +let return x = Ok x +let fail e = Error e +let ( let* ) = Result.bind +let current_level = ref 0 +let enter_level () = incr current_level +let leave_level () = decr current_level + +let rec occurs_check tv = function + | Type_var tv' when tv == tv' -> fail Occurs_check + | Type_var ({ contents = Unbound (name, l) } as tv') -> + let min_lvl = + match !tv with + | Unbound (_, l') -> min l l' + | _ -> l + in + tv' := Unbound (name, min_lvl); + return () + | Type_var { contents = Link t } -> occurs_check tv t + | Type_arrow (t1, t2) -> + let* () = occurs_check tv t1 in + let* () = occurs_check tv t2 in + return () + | Type_tuple (t1, t2, tl) -> + List.fold_left + (fun acc t -> + let* () = acc in + occurs_check tv t) + (return ()) + (t1 :: t2 :: tl) + | Type_construct (_, lst) -> + List.fold_left + (fun acc t -> + let* () = acc in + occurs_check tv t) + (return ()) + lst + | _ -> return () +;; + +let rec unify t1 t2 = + match t1, t2 with + | t1, t2 when t1 == t2 -> return () + | Type_var { contents = Link t1 }, t2 | t1, Type_var { contents = Link t2 } -> + unify t1 t2 + | Type_var ({ contents = Unbound _ } as tv), t' + | t', Type_var ({ contents = Unbound _ } as tv) -> + let* () = occurs_check tv t' in + tv := Link t'; + return () + | Type_arrow (l1, l2), Type_arrow (r1, r2) -> + let* () = unify l1 r1 in + unify l2 r2 + | Type_tuple (l1, l2, ltl), Type_tuple (r1, r2, rtl) -> + if List.length ltl <> List.length rtl + then fail Cannot_unify_tuple_size + else + List.fold_left2 + (fun acc l r -> + let* () = acc in + unify l r) + (return ()) + (l1 :: l2 :: ltl) + (r1 :: r2 :: rtl) + | Type_construct (lc, llst), Type_construct (rc, rlst) -> + if lc <> rc + then fail (Cannot_unify_constructors (lc, rc)) + else + List.fold_left2 + (fun acc l r -> + let* () = acc in + unify l r) + (return ()) + llst + rlst + | Quant_type_var _, _ | _, Quant_type_var _ -> fail Cannot_unify_quantified + | _ -> fail (Cannot_unify (t1, t2)) +;; + +let rec generalize = function + | Type_var { contents = Unbound (name, l) } when l >= !current_level -> + Quant_type_var name + | Type_var { contents = Link ty } -> generalize ty + | Type_arrow (ty1, ty2) -> Type_arrow (generalize ty1, generalize ty2) + | Type_tuple (t1, t2, tl) -> + Type_tuple (generalize t1, generalize t2, List.map generalize tl) + | Type_construct (c, lst) -> Type_construct (c, List.map generalize lst) + | ty -> ty +;; + +type env = (ident * TypeExpr.t) list + +let gensym_counter = ref 0 +let reset_gensym : unit -> unit = fun () -> gensym_counter := 0 + +let gensym : unit -> string = + fun () -> + let n = !gensym_counter in + let () = incr gensym_counter in + if n < 26 then String.make 1 (Char.chr (Char.code 'a' + n)) else "t" ^ string_of_int n +;; + +let newvar () = Type_var (ref (Unbound (gensym (), !current_level))) + +let inst = + let rec loop subst = function + | Quant_type_var name -> + (match List.assoc_opt name subst with + | Some typ -> typ, subst + | None -> + let tv = newvar () in + tv, (name, tv) :: subst) + | Type_var { contents = Link ty } -> loop subst ty + | Type_arrow (t1, t2) -> + let t1', subst = loop subst t1 in + let t2', subst = loop subst t2 in + Type_arrow (t1', t2'), subst + | Type_tuple (t1, t2, tl) -> + let t1', subst = loop subst t1 in + let t2', subst = loop subst t2 in + let tl'_rev, subst = + List.fold_left + (fun (acc, subst) t -> + let t', subst = loop subst t in + t' :: acc, subst) + ([], subst) + tl + in + let tl' = List.rev tl'_rev in + Type_tuple (t1', t2', tl'), subst + | Type_construct (constr, lst) -> + let lst'_rev, subst = + List.fold_left + (fun (acc, subst) t -> + let t', subst = loop subst t in + t' :: acc, subst) + ([], subst) + lst + in + let lst' = List.rev lst'_rev in + Type_construct (constr, lst'), subst + | ty -> ty, subst + in + fun ty -> fst (loop [] ty) +;; + +let rec infer_pat env = function + | Pat_any -> + let fresh = newvar () in + return (env, fresh) + | Pat_var id -> + let fresh = newvar () in + let new_env = (id, fresh) :: env in + return (new_env, fresh) + | Pat_constant const -> + (match const with + | Const_char _ -> return (env, Type_construct ("char", [])) + | Const_integer _ -> return (env, Type_construct ("int", [])) + | Const_string _ -> return (env, Type_construct ("string", []))) + | Pat_tuple (p1, p2, ptl) -> + let* new_env, ty1 = infer_pat env p1 in + let* new_env1, ty2 = infer_pat new_env p2 in + let* new_env2, tytl = + List.fold_left + (fun acc exp -> + let* eacc, tacc = acc in + let* curr_env, ty = infer_pat eacc exp in + return (curr_env, ty :: tacc)) + (return (new_env1, [])) + ptl + in + return (new_env2, Type_tuple (ty1, ty2, List.rev tytl)) + | Pat_construct (name, pat) -> + let ty = List.assoc name env in + let inst_ty = inst ty in + (match inst_ty, pat with + | Type_arrow (arg, body), Some p -> + let* new_env, new_ty = infer_pat env p in + let* () = unify arg new_ty in + return (new_env, body) + | _ -> return (env, inst_ty)) + | Pat_constraint (p, ty) -> + let* new_env, new_ty = infer_pat env p in + let* () = unify ty new_ty in + return (new_env, new_ty) +;; + +let add_rec_names env vb_list = + List.fold_left + (fun cenv { pat; _ } -> + let* cenv = cenv in + match pat with + | Pat_var id | Pat_constraint (Pat_var id, _) -> + let* ncenv, typ_p = infer_pat cenv pat in + return ((id, typ_p) :: ncenv) + | _ -> fail Invalid_let_rec_lhs) + (return env) + vb_list +;; + +let rec get_pat_names acc = function + | Pat_var id -> id :: acc + | Pat_tuple (pat1, pat2, rest) -> + Base.List.fold_left ~f:get_pat_names ~init:acc (pat1 :: pat2 :: rest) + | Pat_construct ("Some", Some pat) -> get_pat_names acc pat + | Pat_constraint (pat, _) -> get_pat_names acc pat + | _ -> acc +;; + +let rec infer_vb env { pat; expr } = + (* we don't need local names *) + let* _, typ_e = infer_exp env expr in + let* new_env, typ_p = infer_pat env pat in + let* () = unify typ_p typ_e in + let pat_names = get_pat_names [] pat in + let new_env1 = + List.fold_left + (fun env name -> + let typ = List.assoc name env in + let env = List.remove_assoc name env in + (name, generalize typ) :: env) + new_env + pat_names + in + return new_env1 + +and infer_vb_rec env { pat; expr } = + match pat with + | Pat_var id | Pat_constraint (Pat_var id, _) -> + let* new_env, typ_p = infer_pat env pat in + let new_env = (id, typ_p) :: new_env in + let* new_env1, typ_e = + match expr with + | Exp_ident eid when id = eid -> fail Invalid_let_rec_rhs + | _ -> infer_exp new_env expr + in + let* () = unify typ_p typ_e in + let pat_names = get_pat_names [] pat in + let new_env2 = + List.fold_left + (fun env name -> + let typ = List.assoc name env in + let env = List.remove_assoc name env in + (name, generalize typ) :: env) + new_env1 + pat_names + in + return new_env2 + | _ -> fail Invalid_let_rec_lhs + +and infer_exp env = function + | Exp_ident id -> + (match List.assoc_opt id env with + | Some ty -> return (env, inst ty) + | None -> fail (Unbound_variable id)) + | Exp_constant const -> + (match const with + | Const_char _ -> return (env, Type_construct ("char", [])) + | Const_integer _ -> return (env, Type_construct ("int", [])) + | Const_string _ -> return (env, Type_construct ("string", []))) + | Exp_fun ((pat, pats), exp) -> + let* new_env, typ_p = infer_pat env pat in + let* newest_env, typ_exp = + match pats with + | hd :: tl -> infer_exp new_env (Exp_fun ((hd, tl), exp)) + | [] -> infer_exp new_env exp + in + return (newest_env, Type_arrow (typ_p, typ_exp)) + | Exp_apply (Exp_ident op, Exp_tuple (exp1, exp2, [])) -> + (match op with + | "*" | "/" | "+" | "-" | "<" | ">" | "=" | "<>" | "<=" | ">=" | "&&" | "||" -> + let* new_env, typ1 = infer_exp env exp1 in + let* new_env1, typ2 = infer_exp new_env exp2 in + let* arg_typ, res_typ = + match List.assoc_opt op env with + | Some (Type_arrow (arg, Type_arrow (_, res))) -> return (inst arg, inst res) + | _ -> fail (Operator_not_found op) + in + let* () = unify typ1 arg_typ in + let* () = unify typ2 arg_typ in + return (new_env1, res_typ) + | _ -> + let* new_env, typ_op = infer_exp env (Exp_ident op) in + let* new_env1, typ_args = infer_exp new_env (Exp_tuple (exp1, exp2, [])) in + let typ_res = newvar () in + let* () = unify typ_op (Type_arrow (typ_args, typ_res)) in + return (new_env1, typ_res)) + | Exp_apply (Exp_ident "-", arg) -> + let* new_env1, typ_arg = infer_exp env arg in + let* () = unify typ_arg (Type_construct ("int", [])) in + return (new_env1, Type_construct ("int", [])) + | Exp_apply (f, arg) -> + let* new_env, typ_f = infer_exp env f in + let* new_env1, typ_arg = infer_exp new_env arg in + let typ_res = newvar () in + let* () = unify typ_f (Type_arrow (typ_arg, typ_res)) in + return (new_env1, typ_res) + | Exp_construct (name, Some exp) -> infer_exp env (Exp_apply (Exp_ident name, exp)) + | Exp_construct (name, None) -> infer_exp env (Exp_ident name) + | Exp_tuple (e1, e2, etl) -> + let* new_env, ty1 = infer_exp env e1 in + let* new_env1, ty2 = infer_exp new_env e2 in + let* new_env2, tytl = + List.fold_left + (fun acc exp -> + let* eacc, tacc = acc in + let* curr_env, ty = infer_exp eacc exp in + return (curr_env, ty :: tacc)) + (return (new_env1, [])) + etl + in + return (new_env2, Type_tuple (ty1, ty2, List.rev tytl)) + | Exp_if (cond, the, els) -> + let* new_env, ty1 = infer_exp env cond in + let* () = unify ty1 (Type_construct ("bool", [])) in + let* new_env1, ty2 = infer_exp new_env the in + (match els with + | None -> + let* () = unify ty2 (Type_construct ("unit", [])) in + return (new_env1, ty2) + | Some els -> + let* new_env, ty3 = infer_exp new_env1 els in + let* () = unify ty2 ty3 in + return (new_env, ty3)) + | Exp_let (Nonrecursive, (vb, vbs), exprb) -> + enter_level (); + let* new_env = + List.fold_left + (fun env bind -> + let* env = env in + infer_vb env bind) + (return env) + (vb :: vbs) + in + leave_level (); + infer_exp new_env exprb + | Exp_let (Recursive, (vb, vbs), exprb) -> + let new_env = add_rec_names env (vb :: vbs) in + enter_level (); + let* new_env1 = + List.fold_left + (fun env bind -> + let* env = env in + infer_vb_rec env bind) + new_env + (vb :: vbs) + in + leave_level (); + infer_exp new_env1 exprb + | Exp_match (expr, (case, rest)) -> + let* new_env, typ_main = infer_exp env expr in + let fresh = newvar () in + let* typ_res = + List.fold_left + (fun acc_typ curr_case -> + let* acc_typ = acc_typ in + let pat_names = get_pat_names [] curr_case.first in + let* pat_env, typ_pat = infer_pat new_env curr_case.first in + let* () = unify typ_pat typ_main in + let* pat_env = + List.fold_left + (fun env name -> + let* env = env in + let typ = List.assoc name env in + let env = List.remove_assoc name env in + return ((name, generalize typ) :: env)) + (return pat_env) + pat_names + in + let* _, typ_exp = infer_exp pat_env curr_case.second in + let* () = unify acc_typ typ_exp in + return acc_typ) + (return fresh) + (case :: rest) + in + return (new_env, typ_res) + | Exp_function (case, rest) -> + let fresh_p = newvar () in + let fresh_e = newvar () in + let* typ_res = + List.fold_left + (fun acc_typ curr_case -> + let* acc_typ = acc_typ in + let* env_pat, typ_pat = infer_pat env curr_case.first in + let* () = unify typ_pat fresh_p in + let* _, typ_exp = infer_exp env_pat curr_case.second in + let* () = unify acc_typ typ_exp in + return acc_typ) + (return fresh_e) + (case :: rest) + in + return (env, Type_arrow (fresh_p, typ_res)) + | Exp_constraint (e, ty) -> + let* new_env, new_ty = infer_exp env e in + let* () = unify ty new_ty in + return (new_env, new_ty) +;; + +let infer_structure_item env = function + | Str_eval exp -> + let* _, typ = infer_exp env exp in + return (("-", typ) :: env, []) + | Str_value (Nonrecursive, (vb, vbs)) -> + let new_names = + List.fold_left (fun names { pat; _ } -> get_pat_names names pat) [] (vb :: vbs) + in + let* new_env = + List.fold_left + (fun env bind -> + let* env = env in + infer_vb env bind) + (return env) + (vb :: vbs) + in + return (new_env, new_names) + | Str_value (Recursive, (vb, vbs)) -> + let new_names = + List.fold_left (fun names { pat; _ } -> get_pat_names names pat) [] (vb :: vbs) + in + let new_env = add_rec_names env (vb :: vbs) in + let* new_env1 = + List.fold_left + (fun env bind -> + let* env = env in + infer_vb_rec env bind) + new_env + (vb :: vbs) + in + return (new_env1, new_names) +;; + +let infer_program env prog = + let* new_env, new_names = + List.fold_left + (fun acc str_item -> + let* env, names = acc in + let* new_env, new_names = infer_structure_item env str_item in + return (new_env, new_names @ names)) + (return (env, [])) + prog + in + return (new_env, new_names) +;; + +let env_with_things = + let type_bool = Type_construct ("bool", []) in + let type_unit = Type_construct ("unit", []) in + let type_int = Type_construct ("int", []) in + let things_list = + [ "||", Type_arrow (type_bool, Type_arrow (type_bool, type_bool)) + ; "&&", Type_arrow (type_bool, Type_arrow (type_bool, type_bool)) + ; "print_int", Type_arrow (type_int, type_unit) + ; "print_gc_status", Type_arrow (type_unit, type_unit) + ; "collect", Type_arrow (type_unit, type_unit) + ; "alloc_block", Type_arrow (type_int, type_unit) + ; "+", Type_arrow (type_int, Type_arrow (type_int, type_int)) + ; "-", Type_arrow (type_int, Type_arrow (type_int, type_int)) + ; "*", Type_arrow (type_int, Type_arrow (type_int, type_int)) + ; "/", Type_arrow (type_int, Type_arrow (type_int, type_int)) + ; "=", Type_arrow (Quant_type_var "a", Type_arrow (Quant_type_var "a", type_bool)) + ; "<>", Type_arrow (Quant_type_var "a", Type_arrow (Quant_type_var "a", type_bool)) + ; "<", Type_arrow (Quant_type_var "a", Type_arrow (Quant_type_var "a", type_bool)) + ; "<=", Type_arrow (Quant_type_var "a", Type_arrow (Quant_type_var "a", type_bool)) + ; ">", Type_arrow (Quant_type_var "a", Type_arrow (Quant_type_var "a", type_bool)) + ; ">=", Type_arrow (Quant_type_var "a", Type_arrow (Quant_type_var "a", type_bool)) + ; "None", Type_construct ("option", [ Quant_type_var "a" ]) + ; ( "Some" + , Type_arrow (Quant_type_var "a", Type_construct ("option", [ Quant_type_var "a" ])) + ) + ; "true", type_bool + ; "false", type_bool + ; "()", type_unit + ; "[]", Type_construct ("list", [ Quant_type_var "a" ]) + ; ( "::" + , Type_arrow + ( Type_tuple + (Quant_type_var "a", Type_construct ("list", [ Quant_type_var "a" ]), []) + , Type_construct ("list", [ Quant_type_var "a" ]) ) ) + ] + in + things_list +;; diff --git a/XML/lib/middleend/infer.mli b/XML/lib/middleend/infer.mli new file mode 100644 index 00000000..ff6e0cfb --- /dev/null +++ b/XML/lib/middleend/infer.mli @@ -0,0 +1,45 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Common.Ast + +type error + +val pprint_err : Format.formatter -> error -> unit + +(** resets counter for type variables *) +val reset_gensym : unit -> unit + +(** [infer_exp env exp] infers type of the expression [exp] in the environment [env] and returns + updated environment and type of [exp] *) +val infer_exp + : (ident * TypeExpr.t) list + -> Expression.t + -> ((ident * TypeExpr.t) list * TypeExpr.t, error) result + +(** [infer_pat env pat] infers type of the pattern [pat] in the environment [env] and returns + updated environment and type of [pat] *) +val infer_pat + : (ident * TypeExpr.t) list + -> Pattern.t + -> ((ident * TypeExpr.t) list * TypeExpr.t, error) result + +(** [infer_structure_item env item] infers type of the item [item] in the environment [env] and returns + updated environment and new names *) +val infer_structure_item + : (ident * TypeExpr.t) list + -> Structure.structure_item + -> ((ident * TypeExpr.t) list * ident list, error) result + +(** [infer_program env prog] infers all types in program [prog] with initial environment [env] and returns + updated environment and names of all new global identificators + + for basic environment, use [env_with_things] *) +val infer_program + : (ident * TypeExpr.t) list + -> Structure.structure_item list + -> ((ident * TypeExpr.t) list * ident list, error) result + +(** [env_with_things] is the basic environment that contains built-in functions and constructors *) +val env_with_things : (ident * TypeExpr.t) list diff --git a/XML/lib/middleend/ll.ml b/XML/lib/middleend/ll.ml index b069a7c3..602bd189 100644 --- a/XML/lib/middleend/ll.ml +++ b/XML/lib/middleend/ll.ml @@ -43,18 +43,13 @@ let occurs_im x = function ;; let rec escapes_comp x = function - | Comp_imm i -> occurs_im x i - | Comp_binop (_op, a, b) -> occurs_im x a || occurs_im x b - | Comp_app (f, args) -> - List.exists (occurs_im x) args - || - (match f with - | Imm_ident y when String.equal y x -> false - | _ -> occurs_im x f) - | Comp_branch (c, t, e) -> occurs_im x c || escapes_anf x t || escapes_anf x e - | Comp_func (_ps, body) -> SSet.mem x (fv_anf body) - | Comp_tuple is | Comp_alloc is -> List.exists (occurs_im x) is - | Comp_load (addr, _off) -> occurs_im x addr + | Comp_func (ps, body) -> (not (List.mem x ps)) && SSet.mem x (fv_anf body) + | Comp_branch (_c, t, e) -> escapes_anf x t || escapes_anf x e + | Comp_app (_, _) + | Comp_binop (_, _, _) + | Comp_tuple _ | Comp_alloc _ + | Comp_load (_, _) + | Comp_imm _ -> false and escapes_anf x = function | Anf_comp_expr ce -> escapes_comp x ce @@ -104,7 +99,15 @@ let rec lift_anf (env : ctx) (n : supply) (e : anf_expr) (Nonrecursive, lifted_name, Anf_comp_expr (Comp_func (fvs @ ps, fbody'))) in let (body', defs_e2), n3 = lift_anf env_body n2 body in - (body', defs_body @ (def_item :: defs_e2)), n3) + (* (body', defs_body @ (def_item :: defs_e2)), n3 *) + let new_body = + if SSet.mem x (fv_anf body') + then + (* create a closure with old name *) + Anf_let (Nonrecursive, x, Comp_imm (Imm_ident lifted_name), body') + else body' + in + (new_body, defs_body @ (def_item :: defs_e2)), n3) | Comp_imm (Imm_ident y) -> (match SMap.find_opt y env with | Some (lf, fvs) -> diff --git a/XML/many_tests/codegen.t b/XML/many_tests/codegen.t index 105b5591..d6636ac5 100644 --- a/XML/many_tests/codegen.t +++ b/XML/many_tests/codegen.t @@ -1352,7 +1352,7 @@ $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./prog.exe 30 - $ ../bin/XML.exe -o tuple_linked_list.s < let rec sum_list lst = > if lst = 0 then 0 else > let (head, tail) = lst in @@ -1409,7 +1409,7 @@ $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./prog.exe 42 - $ ../bin/XML.exe -o tuple_gc_stress.s < let rec make_list n acc = > if n = 0 then acc else > make_list (n - 1) (n, acc) diff --git a/XML/many_tests/codegen_llvm.t b/XML/many_tests/codegen_llvm.t new file mode 100644 index 00000000..d869594b --- /dev/null +++ b/XML/many_tests/codegen_llvm.t @@ -0,0 +1,353 @@ + $ clang-18 --target=riscv64-linux-gnu --sysroot=/usr/riscv64-unknown-linux-gnu -c ./../bin/runtime.c -o runtime.o + +====================== Factorial ====================== + + $ dune exec ./../bin/XML_llvm.exe -- -o factorial.ll < let rec fac n = if n = 0 then 1 else n * fac (n - 1) + > + > let main = print_int (fac 4) + + $ cat factorial.ll + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @fac(i64 %n) { + entry: + %t_4 = alloca i64, align 8 + %t_3 = alloca i64, align 8 + %t_2 = alloca i64, align 8 + %t_1 = alloca i64, align 8 + %t_0 = alloca i64, align 8 + %n1 = alloca i64, align 8 + store i64 %n, ptr %n1, align 8 + %n2 = load i64, ptr %n1, align 8 + %eqtmp = icmp eq i64 %n2, 1 + %eqtmp_as_i64 = zext i1 %eqtmp to i64 + store i64 %eqtmp_as_i64, ptr %t_0, align 8 + %t_03 = load i64, ptr %t_0, align 8 + %cond = icmp ne i64 %t_03, 0 + br i1 %cond, label %then, label %else + + then: ; preds = %entry + br label %ifcont + + else: ; preds = %entry + %n4 = load i64, ptr %n1, align 8 + %subtmp1 = sub i64 %n4, 3 + %subtmp2 = add i64 %subtmp1, 1 + store i64 %subtmp2, ptr %t_1, align 8 + %t_15 = load i64, ptr %t_1, align 8 + %calltmp = call i64 @fac(i64 %t_15) + store i64 %calltmp, ptr %t_2, align 8 + %n6 = load i64, ptr %n1, align 8 + %t_27 = load i64, ptr %t_2, align 8 + %multmp1 = lshr i64 %n6, 1 + %multmp2 = sub i64 %t_27, 1 + %multmp3 = mul i64 %multmp1, %multmp2 + %multmp4 = add i64 %multmp3, 1 + store i64 %multmp4, ptr %t_3, align 8 + %t_38 = load i64, ptr %t_3, align 8 + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi i64 [ 3, %then ], [ %t_38, %else ] + store i64 %iftmp, ptr %t_4, align 8 + %t_49 = load i64, ptr %t_4, align 8 + ret i64 %t_49 + } + + define i64 @main() { + entry: + %main = alloca i64, align 8 + %t_7 = alloca i64, align 8 + %t_6 = alloca i64, align 8 + call void @rt_init(i64 5120) + %calltmp = call i64 @fac(i64 9) + store i64 %calltmp, ptr %t_6, align 8 + %t_61 = load i64, ptr %t_6, align 8 + call void @print_int(i64 %t_61) + store i64 0, ptr %t_7, align 8 + %t_72 = load i64, ptr %t_7, align 8 + store i64 %t_72, ptr %main, align 8 + call void @collect() + ret i64 0 + } + + $ llc-18 factorial.ll -o factorial.s + $ clang-18 --target=riscv64-linux-gnu factorial.s runtime.o -o factorial.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./factorial.exe + 24 + +====================== Fibonacci ====================== + $ dune exec -- ../bin/XML_llvm.exe -o fibonacci.ll < let rec fib n = if n <= 1 then n else fib (n - 1) + fib (n - 2) + > + > let main = print_int (fib 6) + + + $ llc-18 fibonacci.ll -o fibonacci.s + $ clang-18 --target=riscv64-linux-gnu -static fibonacci.s runtime.o -o fibonacci.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./fibonacci.exe + 8 + +====================== Ififif ====================== + $ ../bin/XML_llvm.exe -o ififif.ll < let large x = if 0<>x then print_int 0 else print_int 1 + > let main = + > let x = if (if (if 0 = 1 + > then 0 = 1 else (let t42 = print_int 42 in 1 = 1)) + > then 0 else 1) = 1 + > then 0 else 1 in + > large x + + $ llc-18 ififif.ll -o ififif.s + $ clang-18 --target=riscv64-linux-gnu -static ififif.s runtime.o -o ififif.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./ififif.exe + 42 + 0 + +====================== Simple Closure ====================== + $ dune exec -- ../bin/XML_llvm.exe -o closure.ll < let simplesum x y = x + y + > + > let partialapp_sum = simplesum 5 + > + > let main = print_int (partialapp_sum 5) + + $ llc-18 closure.ll -o closure.s + $ clang-18 --target=riscv64-linux-gnu -static closure.s runtime.o -o closure.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./closure.exe + 10 + + +====================== CPS Factorial ====================== + $ ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o 010faccps_ll.ll + + $ llc-18 010faccps_ll.ll -o 010faccps_ll.s + $ clang-18 --target=riscv64-linux-gnu -static 010faccps_ll.s runtime.o -o 010faccps_ll.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./010faccps_ll.exe + 24 + + +====================== CPS Fibbo ====================== + $ ../bin/XML_llvm.exe -fromfile manytests/typed/010fibcps_ll.ml -o 010fibcps_ll.ll + + $ llc-18 010fibcps_ll.ll -o 010fibcps_ll.s + $ clang-18 --target=riscv64-linux-gnu -static 010fibcps_ll.s runtime.o -o 010fibcps_ll.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./010fibcps_ll.exe + 8 + + +====================== Other ====================== + + $ ../bin/XML_llvm.exe -fromfile ./manytests/typed/004manyargs.ml -o 004manyargs.ll + + $ llc-18 004manyargs.ll -o 004manyargs.s + $ clang-18 --target=riscv64-linux-gnu -static 004manyargs.s runtime.o -o 004manyargs.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./004manyargs.exe + 1111111111 + 1 + 10 + 100 + + + + $ ../bin/XML_llvm.exe -o tuple_return.ll < let make_pair x y = (x, y) + > + > let main = + > let p = make_pair 10 20 in + > let (a, b) = p in + > print_int (a + b) + + $ llc-18 tuple_return.ll -o tuple_return.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_return.s runtime.o -o tuple_return.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_return.exe + 30 + + + + $ ../bin/XML_llvm.exe -o tuple_swap.ll < let swap p = + > let (a, b) = p in + > (b, a) + > + > let main = + > let p1 = (1, 2) in + > let p2 = swap p1 in + > let (x, y) = p2 in + > print_int x + + $ llc-18 tuple_swap.ll -o tuple_swap.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_swap.s runtime.o -o tuple_swap.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_swap.exe + 2 + + + $ ../bin/XML_llvm.exe -o tuple_order.ll < let f n = + > n + > + > let main = + > let t = (f 10, f 20) in + > let (a, b) = t in + > print_int (a + b) + + $ llc-18 tuple_order.ll -o tuple_order.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_order.s runtime.o -o tuple_order.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_order.exe + 30 + + + $ dune exec -- ../bin/XML_llvm.exe -o tuple_linked_list.ll -notypes < let rec sum_list lst = + > if lst = 0 then 0 else + > let (head, tail) = lst in + > head + sum_list tail + > + > let main = + > let lst = (10, (20, (30, 0))) in + > print_int (sum_list lst) + + + $ llc-18 tuple_linked_list.ll -o tuple_linked_list.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_linked_list.s runtime.o -o tuple_linked_list.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_linked_list.exe + 60 + + + + $ ../bin/XML_llvm.exe -o tuple_large.ll < let main = + > let t = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) in + > let (a, b, c, d, e, f, g, h, i, j) = t in + > print_int j + + $ llc-18 tuple_large.ll -o tuple_large.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_large.s runtime.o -o tuple_large.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_large.exe + 10 + + + $ ../bin/XML_llvm.exe -o tuple_basic.ll < let main = + > let t = (10, 20) in + > let (a, b) = t in + > print_int (a + b) + + $ llc-18 tuple_basic.ll -o tuple_basic.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_basic.s runtime.o -o tuple_basic.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_basic.exe + 30 + + + $ ../bin/XML_llvm.exe -o tuple_nested.ll < let main = + > let complex = (100, (20, 3)) in + > let (a, (b, c)) = complex in + > print_int (a + b + c) + + + $ llc-18 tuple_nested.ll -o tuple_nested.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_nested.s runtime.o -o tuple_nested.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_nested.exe + 123 + + + $ ../bin/XML_llvm.exe -o tuple_arg.ll < let sum_pair p = + > let (x, y) = p in + > x + y + > + > let main = + > let p = (40, 2) in + > print_int (sum_pair p) + + + $ llc-18 tuple_arg.ll -o tuple_arg.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_arg.s runtime.o -o tuple_arg.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_arg.exe + 42 + + $ dune exec -- ../bin/XML_llvm.exe -o tuple_gc_stress.ll -notypes < let rec make_list n acc = + > if n = 0 then acc else + > make_list (n - 1) (n, acc) + > + > let main = + > let _ = print_gc_status in + > let result = make_list 10000 0 in + > let (head, tail) = result in + > let _ = print_gc_status in + > print_int head + + $ llc-18 tuple_gc_stress.ll -o tuple_gc_stress.s + $ clang-18 --target=riscv64-linux-gnu -static tuple_gc_stress.s runtime.o -o tuple_gc_stress.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./tuple_gc_stress.exe + === GC Status === + Current allocated: 0 + Free space: 524288 + Heap size: 524288 + Current bank: 0 + Total allocated: 0 + GC collections: 0 + GC allocations: 0 + ================= + === GC Status === + Current allocated: 231552 + Free space: 292736 + Heap size: 524288 + Current bank: 0 + Total allocated: 1280096 + GC collections: 2 + GC allocations: 30002 + ================= + 1 + + $ ../bin/XML_llvm.exe -o user_main.ll -notypes < let main x = x + > + > let a = print_int (main 5) + + $ llc-18 user_main.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + 5 + + + $ ../bin/XML_llvm.exe -o use_reserved_name.ll -notypes < let some_fun x y = x + y + > let closure_tmp x y = x + y + > + > let a = print_int (some_fun 5 10) + > let b = print_int (closure_tmp 5 10) + > let closure_tmp = 5 + > let c = print_int closure_tmp + + $ llc-18 use_reserved_name.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + 15 + 15 + 5 diff --git a/XML/many_tests/dune b/XML/many_tests/dune index ad0b05a6..5263fa5e 100644 --- a/XML/many_tests/dune +++ b/XML/many_tests/dune @@ -15,9 +15,10 @@ (inline_tests)) (cram - (applies_to codegen anf cc ll gc) + (applies_to codegen codegen_llvm anf cc ll gc gc_llvm llvm_tweaks infer) (deps ../bin/XML.exe + ../bin/XML_llvm.exe ../bin/runtime.c manytests/do_not_type/001.ml manytests/do_not_type/002if.ml diff --git a/XML/many_tests/gc.t b/XML/many_tests/gc.t index 301204c9..f4234721 100644 --- a/XML/many_tests/gc.t +++ b/XML/many_tests/gc.t @@ -96,7 +96,6 @@ $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./prog.exe 7 - $ dune exec ./../bin/XML.exe -- -o gc_oom_block.s < let main = > let _ = alloc_block 10000000 in @@ -106,11 +105,11 @@ $ riscv64-linux-gnu-gcc -c ../bin/runtime.c -o runtime.o $ riscv64-linux-gnu-gcc temp.o runtime.o -o prog.exe $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./prog.exe || true - GC: out of memory + GC: out of memory: asked for 160000024 bytes) Aborted (core dumped) - $ ../bin/XML.exe -o tuple_gc_stress.s < let rec make_list n acc = > if n = 0 then acc else > make_list (n - 1) (n, acc) diff --git a/XML/many_tests/gc_llvm.t b/XML/many_tests/gc_llvm.t new file mode 100644 index 00000000..979780e9 --- /dev/null +++ b/XML/many_tests/gc_llvm.t @@ -0,0 +1,153 @@ + $ clang-18 --target=riscv64-linux-gnu --sysroot=/usr/riscv64-unknown-linux-gnu -c ./../bin/runtime.c -o runtime.o + + $ dune exec ./../bin/XML_llvm.exe -- -o gc_smoke.ll < let main = + > let _ = print_gc_status in + > let _ = collect in + > print_gc_status + + $ llc-18 gc_smoke.ll -o gc_smoke.s + $ clang-18 --target=riscv64-linux-gnu --sysroot=/usr/riscv64-unknown-linux-gnu -c ./../bin/runtime.c -o runtime.o + $ clang-18 --target=riscv64-linux-gnu -static gc_smoke.s runtime.o -o gc_smoke.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./gc_smoke.exe + === GC Status === + Current allocated: 0 + Free space: 524288 + Heap size: 524288 + Current bank: 0 + Total allocated: 0 + GC collections: 0 + GC allocations: 0 + ================= + === GC Status === + Current allocated: 0 + Free space: 524288 + Heap size: 524288 + Current bank: 1 + Total allocated: 0 + GC collections: 1 + GC allocations: 0 + ================= + + + + $ dune exec ./../bin/XML_llvm.exe -- -o lots_of_garbage.ll < let rec make_garbage n = + > if n = 0 then 0 else + > let _ = alloc_block 1 in + > make_garbage (n - 1) + > let main = + > let _ = print_gc_status in + > let f = (fun x -> x) in + > let _ = make_garbage 2000 in + > let _ = print_gc_status in + > let _ = collect in + > let _ = print_gc_status in + > print_int (f 8) + + $ llc-18 lots_of_garbage.ll -o lots_of_garbage.s + $ clang-18 --target=riscv64-linux-gnu -static lots_of_garbage.s runtime.o -o lots_of_garbage.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./lots_of_garbage.exe + === GC Status === + Current allocated: 0 + Free space: 524288 + Heap size: 524288 + Current bank: 0 + Total allocated: 0 + GC collections: 0 + GC allocations: 0 + ================= + === GC Status === + Current allocated: 80000 + Free space: 444288 + Heap size: 524288 + Current bank: 0 + Total allocated: 80000 + GC collections: 0 + GC allocations: 2000 + ================= + === GC Status === + Current allocated: 0 + Free space: 524288 + Heap size: 524288 + Current bank: 1 + Total allocated: 80000 + GC collections: 1 + GC allocations: 2000 + ================= + 8 + + + + $ dune exec ./../bin/XML_llvm.exe -- -o keep_block_across_gc.ll < let rec spam n = + > if n = 0 then 0 else + > let _ = (fun z -> z) in + > spam (n - 1) + > + > let make n = + > let b = alloc_block n in + > (fun x -> let _u = b in x) + > + > let main = + > let f = make 10 in + > let _ = spam 4000 in + > let _ = collect in + > print_int (f 7) + + $ llc-18 keep_block_across_gc.ll -o keep_block_across_gc.s + $ clang-18 --target=riscv64-linux-gnu -static keep_block_across_gc.s runtime.o -o keep_block_across_gc.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./keep_block_across_gc.exe + 7 + + + + $ dune exec ./../bin/XML_llvm.exe -- -o gc_oom_block.ll < let main = + > let _ = alloc_block 10000000 in + > print_int 0 + + $ llc-18 gc_oom_block.ll -o gc_oom_block.s + $ clang-18 --target=riscv64-linux-gnu -static gc_oom_block.s runtime.o -o gc_oom_block.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./gc_oom_block.exe + GC: out of memory: asked for 160000024 bytes) + Aborted (core dumped) + [134] + + + + + $ dune exec -- ../bin/XML_llvm.exe -o temp.ll -notypes < let rec make_list n acc = + > if n = 0 then acc else + > make_list (n - 1) (n, acc) + > + > let main = + > let _ = print_gc_status in + > let result = make_list 10000 0 in + > let (head, tail) = result in + > let _ = print_gc_status in + > print_int head + + $ llc-18 temp.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + === GC Status === + Current allocated: 0 + Free space: 524288 + Heap size: 524288 + Current bank: 0 + Total allocated: 0 + GC collections: 0 + GC allocations: 0 + ================= + === GC Status === + Current allocated: 231552 + Free space: 292736 + Heap size: 524288 + Current bank: 0 + Total allocated: 1280096 + GC collections: 2 + GC allocations: 30002 + ================= + 1 diff --git a/XML/many_tests/infer.t b/XML/many_tests/infer.t new file mode 100644 index 00000000..c97c32d8 --- /dev/null +++ b/XML/many_tests/infer.t @@ -0,0 +1,99 @@ + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/001fac.ml -typedtree + val fac : int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/002fac.ml -typedtree + val fac_cps : int -> (int -> 'a) -> 'a + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/003fib.ml -typedtree + val fib_acc : int -> int -> int -> int + val fib : int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/004manyargs.ml -typedtree + val wrap : 'a -> 'a + val test3 : int -> int -> int -> int + val test10 : int -> int -> int -> int -> int -> int -> int -> int -> int -> int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/005fix.ml -typedtree + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val fac : (int -> int) -> int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/006partial.ml -typedtree + val foo : int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/006partial2.ml -typedtree + val foo : int -> int -> int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/006partial3.ml -typedtree + val foo : int -> int -> int -> unit + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/007order.ml -typedtree + val _start : unit -> unit -> int -> unit -> int -> int -> unit -> int -> int -> int + val main : unit + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/008ascription.ml -typedtree + val addi : ('a -> bool -> int) -> ('a -> bool) -> 'a -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/009let_poly.ml -typedtree + val temp : int * bool + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010fac_anf.ml -typedtree + val fac : int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -typedtree + val id : 'a -> 'a + val fresh_1 : int -> (int -> 'a) -> int -> 'a + val fac_cps : int -> (int -> 'a) -> 'a + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010fibcps_ll.ml -typedtree + val id : 'a -> 'a + val fresh_2 : int -> (int -> 'a) -> int -> 'a + val fresh_1 : int -> (int -> 'a) -> (int -> (int -> 'a) -> 'b) -> int -> 'b + val fib : int -> (int -> 'a) -> 'a + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/011mapcps.ml -typedtree + val map : ('a -> 'b) -> 'a list -> ('b list -> 'c) -> 'c + val iter : ('a -> 'b) -> 'a list -> unit + val main : unit + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/012fibcps.ml -typedtree + val fib : int -> (int -> 'a) -> 'a + val main : unit + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/013foldfoldr.ml -typedtree + val id : 'a -> 'a + val fold_right : ('a -> 'b -> 'b) -> 'b -> 'a list -> 'b + val foldl : ('a -> 'b -> 'a) -> 'a -> 'b list -> 'a + val main : unit + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/015tuples.ml -typedtree + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val map : ('a -> 'b) -> 'a * 'a -> 'b * 'b + val fixpoly : 'a -> ('b -> 'c) * ('b -> 'c) + val feven : 'a * (int -> int) -> int -> int + val fodd : (int -> int) * 'a -> int -> int + val tie : ('a -> 'b) * ('a -> 'b) + val meven : int -> int + val modd : int -> int + val main : int + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/016lists.ml -typedtree + val length : 'a list -> int + val length_tail : 'a list -> int + val map : ('a -> 'b) -> 'a list -> 'b list + val append : 'a list -> 'a list -> 'a list + val concat : 'a list list -> 'a list + val iter : ('a -> unit) -> 'a list -> unit + val cartesian : 'a list -> 'b list -> ('a * 'b) list + val main : int diff --git a/XML/many_tests/llvm_tweaks.t b/XML/many_tests/llvm_tweaks.t new file mode 100644 index 00000000..32875a50 --- /dev/null +++ b/XML/many_tests/llvm_tweaks.t @@ -0,0 +1,708 @@ + $ clang-18 --target=riscv64-linux-gnu --sysroot=/usr/riscv64-unknown-linux-gnu -c ./../bin/runtime.c -o runtime.o + +====================== O0 ====================== + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o temp.ll -O O0 + + $ cat temp.ll + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @id(i64 %x) { + entry: + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + %x2 = load i64, ptr %x1, align 8 + ret i64 %x2 + } + + define i64 @fresh_1(i64 %n, i64 %k, i64 %p) { + entry: + %t_2 = alloca i64, align 8 + %t_1 = alloca i64, align 8 + %p3 = alloca i64, align 8 + %k2 = alloca i64, align 8 + %n1 = alloca i64, align 8 + store i64 %n, ptr %n1, align 8 + store i64 %k, ptr %k2, align 8 + store i64 %p, ptr %p3, align 8 + %p4 = load i64, ptr %p3, align 8 + %n5 = load i64, ptr %n1, align 8 + %multmp1 = lshr i64 %p4, 1 + %multmp2 = sub i64 %n5, 1 + %multmp3 = mul i64 %multmp1, %multmp2 + %multmp4 = add i64 %multmp3, 1 + store i64 %multmp4, ptr %t_1, align 8 + %k_val = load i64, ptr %k2, align 8 + %t_16 = load i64, ptr %t_1, align 8 + %apptmp = call i64 @apply1(i64 %k_val, i64 %t_16) + store i64 %apptmp, ptr %t_2, align 8 + %t_27 = load i64, ptr %t_2, align 8 + ret i64 %t_27 + } + + define i64 @fac_cps(i64 %n, i64 %k) { + entry: + %t_11 = alloca i64, align 8 + %t_10 = alloca i64, align 8 + %t_9 = alloca i64, align 8 + %t_8 = alloca i64, align 8 + %t_7 = alloca i64, align 8 + %t_6 = alloca i64, align 8 + %t_5 = alloca i64, align 8 + %t_4 = alloca i64, align 8 + %k2 = alloca i64, align 8 + %n1 = alloca i64, align 8 + store i64 %n, ptr %n1, align 8 + store i64 %k, ptr %k2, align 8 + %n3 = load i64, ptr %n1, align 8 + %eqtmp = icmp eq i64 %n3, 3 + %eqtmp_as_i64 = zext i1 %eqtmp to i64 + store i64 %eqtmp_as_i64, ptr %t_4, align 8 + %t_44 = load i64, ptr %t_4, align 8 + %cond = icmp ne i64 %t_44, 0 + br i1 %cond, label %then, label %else + + then: ; preds = %entry + %k_val = load i64, ptr %k2, align 8 + %apptmp = call i64 @apply1(i64 %k_val, i64 3) + store i64 %apptmp, ptr %t_5, align 8 + %t_55 = load i64, ptr %t_5, align 8 + br label %ifcont + + else: ; preds = %entry + %n6 = load i64, ptr %n1, align 8 + %subtmp1 = sub i64 %n6, 3 + %subtmp2 = add i64 %subtmp1, 1 + store i64 %subtmp2, ptr %t_6, align 8 + %t_67 = load i64, ptr %t_6, align 8 + %closure_tmp = call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp8 = call i64 @apply1(i64 %closure_tmp, i64 %t_67) + store i64 %apptmp8, ptr %t_7, align 8 + %n9 = load i64, ptr %n1, align 8 + %closure_tmp10 = call i64 @alloc_closure(i64 ptrtoint (ptr @fresh_1 to i64), i64 3) + %apptmp11 = call i64 @apply1(i64 %closure_tmp10, i64 %n9) + store i64 %apptmp11, ptr %t_8, align 8 + %t_8_val = load i64, ptr %t_8, align 8 + %k12 = load i64, ptr %k2, align 8 + %apptmp13 = call i64 @apply1(i64 %t_8_val, i64 %k12) + store i64 %apptmp13, ptr %t_9, align 8 + %t_7_val = load i64, ptr %t_7, align 8 + %t_914 = load i64, ptr %t_9, align 8 + %apptmp15 = call i64 @apply1(i64 %t_7_val, i64 %t_914) + store i64 %apptmp15, ptr %t_10, align 8 + %t_1016 = load i64, ptr %t_10, align 8 + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi i64 [ %t_55, %then ], [ %t_1016, %else ] + store i64 %iftmp, ptr %t_11, align 8 + %t_1117 = load i64, ptr %t_11, align 8 + ret i64 %t_1117 + } + + define i64 @main() { + entry: + %main = alloca i64, align 8 + %t_15 = alloca i64, align 8 + %t_14 = alloca i64, align 8 + %t_13 = alloca i64, align 8 + call void @rt_init(i64 5120) + %closure_tmp = call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp = call i64 @apply1(i64 %closure_tmp, i64 9) + store i64 %apptmp, ptr %t_13, align 8 + %t_13_val = load i64, ptr %t_13, align 8 + %closure_tmp1 = call i64 @alloc_closure(i64 ptrtoint (ptr @id to i64), i64 1) + %apptmp2 = call i64 @apply1(i64 %t_13_val, i64 %closure_tmp1) + store i64 %apptmp2, ptr %t_14, align 8 + %t_143 = load i64, ptr %t_14, align 8 + call void @print_int(i64 %t_143) + store i64 0, ptr %t_15, align 8 + store i64 1, ptr %main, align 8 + call void @collect() + ret i64 0 + } + + $ llc-18 temp.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + 24 + +====================== O1 ====================== + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o temp.ll -O O1 + + $ cat temp.ll + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) local_unnamed_addr + + declare i64 @alloc_closure(i64, i64) local_unnamed_addr + + declare i64 @apply1(i64, i64) local_unnamed_addr + + declare void @collect() local_unnamed_addr + + declare void @rt_init(i64) local_unnamed_addr + + ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) + define i64 @id(i64 returned %x) #0 { + entry: + ret i64 %x + } + + define i64 @fresh_1(i64 %n, i64 %k, i64 %p) { + entry: + %multmp1 = lshr i64 %p, 1 + %multmp2 = add i64 %n, -1 + %multmp3 = mul i64 %multmp1, %multmp2 + %multmp4 = add i64 %multmp3, 1 + %apptmp = tail call i64 @apply1(i64 %k, i64 %multmp4) + ret i64 %apptmp + } + + define i64 @fac_cps(i64 %n, i64 %k) { + entry: + %eqtmp = icmp eq i64 %n, 3 + br i1 %eqtmp, label %then, label %else + + then: ; preds = %entry + %apptmp = tail call i64 @apply1(i64 %k, i64 3) + br label %ifcont + + else: ; preds = %entry + %subtmp2 = add i64 %n, -2 + %closure_tmp = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp8 = tail call i64 @apply1(i64 %closure_tmp, i64 %subtmp2) + %closure_tmp10 = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fresh_1 to i64), i64 3) + %apptmp11 = tail call i64 @apply1(i64 %closure_tmp10, i64 %n) + %apptmp13 = tail call i64 @apply1(i64 %apptmp11, i64 %k) + %apptmp15 = tail call i64 @apply1(i64 %apptmp8, i64 %apptmp13) + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi i64 [ %apptmp, %then ], [ %apptmp15, %else ] + ret i64 %iftmp + } + + define noundef i64 @main() local_unnamed_addr { + entry: + tail call void @rt_init(i64 5120) + %closure_tmp = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp = tail call i64 @apply1(i64 %closure_tmp, i64 9) + %closure_tmp1 = tail call i64 @alloc_closure(i64 ptrtoint (ptr @id to i64), i64 1) + %apptmp2 = tail call i64 @apply1(i64 %apptmp, i64 %closure_tmp1) + tail call void @print_int(i64 %apptmp2) + tail call void @collect() + ret i64 0 + } + + attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } + + $ llc-18 temp.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + 24 + +====================== O2 ====================== + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o temp.ll -O O2 + + $ cat temp.ll + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) local_unnamed_addr + + declare i64 @alloc_closure(i64, i64) local_unnamed_addr + + declare i64 @apply1(i64, i64) local_unnamed_addr + + declare void @collect() local_unnamed_addr + + declare void @rt_init(i64) local_unnamed_addr + + ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) + define i64 @id(i64 returned %x) #0 { + entry: + ret i64 %x + } + + define i64 @fresh_1(i64 %n, i64 %k, i64 %p) { + entry: + %multmp1 = lshr i64 %p, 1 + %multmp2 = add i64 %n, -1 + %multmp3 = mul i64 %multmp1, %multmp2 + %multmp4 = add i64 %multmp3, 1 + %apptmp = tail call i64 @apply1(i64 %k, i64 %multmp4) + ret i64 %apptmp + } + + define i64 @fac_cps(i64 %n, i64 %k) { + entry: + %eqtmp = icmp eq i64 %n, 3 + br i1 %eqtmp, label %then, label %else + + then: ; preds = %entry + %apptmp = tail call i64 @apply1(i64 %k, i64 3) + br label %ifcont + + else: ; preds = %entry + %subtmp2 = add i64 %n, -2 + %closure_tmp = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp8 = tail call i64 @apply1(i64 %closure_tmp, i64 %subtmp2) + %closure_tmp10 = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fresh_1 to i64), i64 3) + %apptmp11 = tail call i64 @apply1(i64 %closure_tmp10, i64 %n) + %apptmp13 = tail call i64 @apply1(i64 %apptmp11, i64 %k) + %apptmp15 = tail call i64 @apply1(i64 %apptmp8, i64 %apptmp13) + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi i64 [ %apptmp, %then ], [ %apptmp15, %else ] + ret i64 %iftmp + } + + define noundef i64 @main() local_unnamed_addr { + entry: + tail call void @rt_init(i64 5120) + %closure_tmp = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp = tail call i64 @apply1(i64 %closure_tmp, i64 9) + %closure_tmp1 = tail call i64 @alloc_closure(i64 ptrtoint (ptr @id to i64), i64 1) + %apptmp2 = tail call i64 @apply1(i64 %apptmp, i64 %closure_tmp1) + tail call void @print_int(i64 %apptmp2) + tail call void @collect() + ret i64 0 + } + + attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } + + $ llc-18 temp.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + 24 + +====================== O3 ====================== + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o temp.ll -O O3 + + $ cat temp.ll + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) local_unnamed_addr + + declare i64 @alloc_closure(i64, i64) local_unnamed_addr + + declare i64 @apply1(i64, i64) local_unnamed_addr + + declare void @collect() local_unnamed_addr + + declare void @rt_init(i64) local_unnamed_addr + + ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) + define i64 @id(i64 returned %x) #0 { + entry: + ret i64 %x + } + + define i64 @fresh_1(i64 %n, i64 %k, i64 %p) { + entry: + %multmp1 = lshr i64 %p, 1 + %multmp2 = add i64 %n, -1 + %multmp3 = mul i64 %multmp1, %multmp2 + %multmp4 = add i64 %multmp3, 1 + %apptmp = tail call i64 @apply1(i64 %k, i64 %multmp4) + ret i64 %apptmp + } + + define i64 @fac_cps(i64 %n, i64 %k) { + entry: + %eqtmp = icmp eq i64 %n, 3 + br i1 %eqtmp, label %then, label %else + + then: ; preds = %entry + %apptmp = tail call i64 @apply1(i64 %k, i64 3) + br label %ifcont + + else: ; preds = %entry + %subtmp2 = add i64 %n, -2 + %closure_tmp = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp8 = tail call i64 @apply1(i64 %closure_tmp, i64 %subtmp2) + %closure_tmp10 = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fresh_1 to i64), i64 3) + %apptmp11 = tail call i64 @apply1(i64 %closure_tmp10, i64 %n) + %apptmp13 = tail call i64 @apply1(i64 %apptmp11, i64 %k) + %apptmp15 = tail call i64 @apply1(i64 %apptmp8, i64 %apptmp13) + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi i64 [ %apptmp, %then ], [ %apptmp15, %else ] + ret i64 %iftmp + } + + define noundef i64 @main() local_unnamed_addr { + entry: + tail call void @rt_init(i64 5120) + %closure_tmp = tail call i64 @alloc_closure(i64 ptrtoint (ptr @fac_cps to i64), i64 2) + %apptmp = tail call i64 @apply1(i64 %closure_tmp, i64 9) + %closure_tmp1 = tail call i64 @alloc_closure(i64 ptrtoint (ptr @id to i64), i64 1) + %apptmp2 = tail call i64 @apply1(i64 %apptmp, i64 %closure_tmp1) + tail call void @print_int(i64 %apptmp2) + tail call void @collect() + ret i64 0 + } + + attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } + + $ llc-18 temp.ll -o temp.s + $ clang-18 --target=riscv64-linux-gnu -static temp.s runtime.o -o temp.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu/ -cpu rv64 ./temp.exe + 24 + +====================== RISC-V ====================== + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o temp.ll -t "riscv64-unknown-linux-gnu" + $ llc-18 temp.ll -o temp.s + $ cat temp.s + .text + .attribute 4, 16 + .attribute 5, "rv64i2p1" + .file "main" + .globl id # -- Begin function id + .p2align 2 + .type id,@function + id: # @id + .cfi_startproc + # %bb.0: # %entry + addi sp, sp, -16 + .cfi_def_cfa_offset 16 + sd a0, 8(sp) + addi sp, sp, 16 + ret + .Lfunc_end0: + .size id, .Lfunc_end0-id + .cfi_endproc + # -- End function + .globl fresh_1 # -- Begin function fresh_1 + .p2align 2 + .type fresh_1,@function + fresh_1: # @fresh_1 + .cfi_startproc + # %bb.0: # %entry + addi sp, sp, -64 + .cfi_def_cfa_offset 64 + sd ra, 56(sp) # 8-byte Folded Spill + sd s0, 48(sp) # 8-byte Folded Spill + .cfi_offset ra, -8 + .cfi_offset s0, -16 + mv s0, a1 + sd a0, 8(sp) + sd a1, 16(sp) + sd a2, 24(sp) + srli a2, a2, 1 + addi a1, a0, -1 + mv a0, a2 + call __muldi3 + addi a1, a0, 1 + sd a1, 32(sp) + mv a0, s0 + call apply1 + sd a0, 40(sp) + ld ra, 56(sp) # 8-byte Folded Reload + ld s0, 48(sp) # 8-byte Folded Reload + addi sp, sp, 64 + ret + .Lfunc_end1: + .size fresh_1, .Lfunc_end1-fresh_1 + .cfi_endproc + # -- End function + .globl fac_cps # -- Begin function fac_cps + .p2align 2 + .type fac_cps,@function + fac_cps: # @fac_cps + .cfi_startproc + # %bb.0: # %entry + addi sp, sp, -96 + .cfi_def_cfa_offset 96 + sd ra, 88(sp) # 8-byte Folded Spill + sd s0, 80(sp) # 8-byte Folded Spill + .cfi_offset ra, -8 + .cfi_offset s0, -16 + sd a1, 8(sp) + addi a1, a0, -3 + seqz a1, a1 + sd a1, 16(sp) + li a1, 3 + sd a0, 0(sp) + bne a0, a1, .LBB2_2 + # %bb.1: # %then + ld a0, 8(sp) + li a1, 3 + call apply1 + sd a0, 24(sp) + j .LBB2_3 + .LBB2_2: # %else + ld s0, 0(sp) + addi s0, s0, -2 + sd s0, 32(sp) + lui a0, %hi(fac_cps) + addi a0, a0, %lo(fac_cps) + li a1, 2 + call alloc_closure + mv a1, s0 + call apply1 + ld s0, 0(sp) + sd a0, 40(sp) + lui a0, %hi(fresh_1) + addi a0, a0, %lo(fresh_1) + li a1, 3 + call alloc_closure + mv a1, s0 + call apply1 + ld a1, 8(sp) + sd a0, 48(sp) + call apply1 + ld a1, 40(sp) + mv a2, a0 + sd a0, 56(sp) + mv a0, a1 + mv a1, a2 + call apply1 + sd a0, 64(sp) + .LBB2_3: # %ifcont + sd a0, 72(sp) + ld ra, 88(sp) # 8-byte Folded Reload + ld s0, 80(sp) # 8-byte Folded Reload + addi sp, sp, 96 + ret + .Lfunc_end2: + .size fac_cps, .Lfunc_end2-fac_cps + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 2 + .type main,@function + main: # @main + .cfi_startproc + # %bb.0: # %entry + addi sp, sp, -64 + .cfi_def_cfa_offset 64 + sd ra, 56(sp) # 8-byte Folded Spill + sd s0, 48(sp) # 8-byte Folded Spill + sd s1, 40(sp) # 8-byte Folded Spill + .cfi_offset ra, -8 + .cfi_offset s0, -16 + .cfi_offset s1, -24 + li a0, 5 + slli a0, a0, 10 + call rt_init + lui a0, %hi(fac_cps) + addi a0, a0, %lo(fac_cps) + li a1, 2 + call alloc_closure + li a1, 9 + call apply1 + mv s0, a0 + sd a0, 8(sp) + lui a0, %hi(id) + addi a0, a0, %lo(id) + li a1, 1 + li s1, 1 + call alloc_closure + mv a1, a0 + mv a0, s0 + call apply1 + sd a0, 16(sp) + call print_int + sd zero, 24(sp) + sd s1, 32(sp) + call collect + li a0, 0 + ld ra, 56(sp) # 8-byte Folded Reload + ld s0, 48(sp) # 8-byte Folded Reload + ld s1, 40(sp) # 8-byte Folded Reload + addi sp, sp, 64 + ret + .Lfunc_end3: + .size main, .Lfunc_end3-main + .cfi_endproc + # -- End function + .section ".note.GNU-stack","",@progbits + +====================== x86-64 ====================== + + $ dune exec -- ../bin/XML_llvm.exe -fromfile manytests/typed/010faccps_ll.ml -o temp.ll -t "x86_64-pc-linux-gnu" + $ llc-18 temp.ll -o temp.s + $ cat temp.s + .text + .file "main" + .globl id # -- Begin function id + .p2align 4, 0x90 + .type id,@function + id: # @id + .cfi_startproc + # %bb.0: # %entry + movq %rdi, %rax + movq %rdi, -8(%rsp) + retq + .Lfunc_end0: + .size id, .Lfunc_end0-id + .cfi_endproc + # -- End function + .globl fresh_1 # -- Begin function fresh_1 + .p2align 4, 0x90 + .type fresh_1,@function + fresh_1: # @fresh_1 + .cfi_startproc + # %bb.0: # %entry + subq $40, %rsp + .cfi_def_cfa_offset 48 + movq %rsi, %rax + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + shrq %rdx + leaq -1(%rdi), %rsi + imulq %rdx, %rsi + incq %rsi + movq %rsi, 24(%rsp) + movq %rax, %rdi + callq apply1@PLT + movq %rax, 32(%rsp) + addq $40, %rsp + .cfi_def_cfa_offset 8 + retq + .Lfunc_end1: + .size fresh_1, .Lfunc_end1-fresh_1 + .cfi_endproc + # -- End function + .globl fac_cps # -- Begin function fac_cps + .p2align 4, 0x90 + .type fac_cps,@function + fac_cps: # @fac_cps + .cfi_startproc + # %bb.0: # %entry + pushq %rbx + .cfi_def_cfa_offset 16 + subq $80, %rsp + .cfi_def_cfa_offset 96 + .cfi_offset %rbx, -16 + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + xorl %eax, %eax + cmpq $3, %rdi + sete %al + movq %rax, 24(%rsp) + jne .LBB2_2 + # %bb.1: # %then + movq 8(%rsp), %rdi + movl $3, %esi + callq apply1@PLT + movq %rax, 32(%rsp) + jmp .LBB2_3 + .LBB2_2: # %else + movq (%rsp), %rbx + addq $-2, %rbx + movq %rbx, 40(%rsp) + movq fac_cps@GOTPCREL(%rip), %rdi + movl $2, %esi + callq alloc_closure@PLT + movq %rax, %rdi + movq %rbx, %rsi + callq apply1@PLT + movq %rax, 16(%rsp) + movq (%rsp), %rbx + movq fresh_1@GOTPCREL(%rip), %rdi + movl $3, %esi + callq alloc_closure@PLT + movq %rax, %rdi + movq %rbx, %rsi + callq apply1@PLT + movq %rax, 48(%rsp) + movq 8(%rsp), %rsi + movq %rax, %rdi + callq apply1@PLT + movq %rax, 56(%rsp) + movq 16(%rsp), %rdi + movq %rax, %rsi + callq apply1@PLT + movq %rax, 64(%rsp) + .LBB2_3: # %ifcont + movq %rax, 72(%rsp) + addq $80, %rsp + .cfi_def_cfa_offset 16 + popq %rbx + .cfi_def_cfa_offset 8 + retq + .Lfunc_end2: + .size fac_cps, .Lfunc_end2-fac_cps + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function + main: # @main + .cfi_startproc + # %bb.0: # %entry + pushq %rbx + .cfi_def_cfa_offset 16 + subq $32, %rsp + .cfi_def_cfa_offset 48 + .cfi_offset %rbx, -16 + movl $5120, %edi # imm = 0x1400 + callq rt_init@PLT + movq fac_cps@GOTPCREL(%rip), %rdi + movl $2, %esi + callq alloc_closure@PLT + movl $9, %esi + movq %rax, %rdi + callq apply1@PLT + movq %rax, %rbx + movq %rax, (%rsp) + movq id@GOTPCREL(%rip), %rdi + movl $1, %esi + callq alloc_closure@PLT + movq %rbx, %rdi + movq %rax, %rsi + callq apply1@PLT + movq %rax, 8(%rsp) + movq %rax, %rdi + callq print_int@PLT + movq $0, 16(%rsp) + movq $1, 24(%rsp) + callq collect@PLT + xorl %eax, %eax + addq $32, %rsp + .cfi_def_cfa_offset 16 + popq %rbx + .cfi_def_cfa_offset 8 + retq + .Lfunc_end3: + .size main, .Lfunc_end3-main + .cfi_endproc + # -- End function + .section ".note.GNU-stack","",@progbits diff --git a/XML/many_tests/qcheck/ast_qcheck.t b/XML/many_tests/qcheck/ast_qcheck.t new file mode 100644 index 00000000..b18a94e4 --- /dev/null +++ b/XML/many_tests/qcheck/ast_qcheck.t @@ -0,0 +1,34 @@ + $ ./run_ast_qcheck.exe -seed 160355461 -gen 1 + random seed: 160355461 + ================================================================================ + success (ran 1 tests) + + $ ./run_ast_qcheck.exe -seed 142495613 -gen 1 + random seed: 142495613 + ================================================================================ + success (ran 1 tests) + + $ ./run_ast_qcheck.exe -seed 512010308 -gen 1 + random seed: 512010308 + ================================================================================ + success (ran 1 tests) + + $ ./run_ast_qcheck.exe -seed 352654714 -gen 1 + random seed: 352654714 + ================================================================================ + success (ran 1 tests) + + $ ./run_ast_qcheck.exe -seed 18353380 -gen 1 + random seed: 18353380 + ================================================================================ + success (ran 1 tests) + + $ ./run_ast_qcheck.exe -seed 340151730 -gen 1 + random seed: 340151730 + ================================================================================ + success (ran 1 tests) + + $ ./run_ast_qcheck.exe -seed 372987422 -gen 1 + random seed: 372987422 + ================================================================================ + success (ran 1 tests) diff --git a/XML/many_tests/qcheck/dune b/XML/many_tests/qcheck/dune new file mode 100644 index 00000000..aa7083aa --- /dev/null +++ b/XML/many_tests/qcheck/dune @@ -0,0 +1,27 @@ +(executable + (name run_ast_qcheck) + (libraries + stdio + XML.Common + XML.Backend + qcheck-core + qcheck-core.runner + ppx_deriving_qcheck)) + +(cram + (applies_to ast_qcheck) + (deps ./run_ast_qcheck.exe)) + +(executable + (name run_machine_qcheck) + (libraries + stdio + XML.Common + XML.Backend + qcheck-core + qcheck-core.runner + ppx_deriving_qcheck)) + +(cram + (applies_to machine_qcheck) + (deps ./run_machine_qcheck.exe)) diff --git a/XML/many_tests/qcheck/machine_qcheck.t b/XML/many_tests/qcheck/machine_qcheck.t new file mode 100644 index 00000000..3606faec --- /dev/null +++ b/XML/many_tests/qcheck/machine_qcheck.t @@ -0,0 +1,522 @@ + $ ./run_machine_qcheck.exe -seed 410641225 -gen 40 + random seed: 410641225 + srai a7, s4, 0 + srai s10, a4, 2 + la zero, label + label: + xori a6, t4, 28 + ble a3, a2, label + slt s7, s6, s10 + li s1, 7 + xor t4, sp, a4 + snez t1, zero + j label + ret + sub zero, sp, t2 + snez ra, zero + call label + ble t4, t6, label + call label + mv a4, a1 + mv ra, t5 + seqz ra, ra + slt t1, s8, zero + li a4, 7 + mul ra, s1, t3 + sd t1, 5(sp) + lla zero, label + mv t0, t4 + mv zero, s9 + li a4, 66 + mv zero, a6 + # comment + slt ra, s6, a3 + ld s11, 3(s4) + li ra, 4 + lla ra, label + ld sp, 2(t0) + li zero, 7 + j label + mv a2, t4 + beq ra, zero, label + j label + mv zero, sp + # comment + sd ra, 47(ra) + la ra, label + li s1, 9 + call label + slt sp, ra, sp + la t5, label + ecall + call label + mv s2, sp + xori sp, ra, 0 + ld a7, 4(s10) + label: + j label + sd t4, 9(s10) + # comment + mv s2, s7 + sub sp, zero, ra + xor t1, a2, a2 + xor s11, s9, sp + beq zero, zero, label + ecall + snez ra, zero + j label + j label + label: + label: + lla sp, label + sd s2, 7(a5) + mul a3, s8, ra + ble t4, s10, label + beq t4, sp, label + sub t2, t1, zero + slt s3, zero, sp + sd zero, 29(t2) + xori sp, s7, 3 + ld a3, 12(s11) + lla sp, label + beq sp, t0, label + ret + snez a7, s0 + slt sp, ra, ra + j label + seqz zero, ra + ld t6, 4(zero) + li a2, 7 + ble s4, a6, label + ecall + seqz ra, sp + sd ra, 4(s8) + call label + sd sp, 4(zero) + blt sp, s6, label + sd a5, 3(a5) + ecall + sub t5, s10, zero + srai ra, ra, 4 + ble zero, s4, label + add t6, t3, s6 + sd sp, 8(t0) + sd t5, 25(zero) + ble zero, s6, label + srai t2, a4, 0 + srai a7, a1, 5 + call label + xor t4, a3, a3 + # comment + lla zero, label + snez sp, t3 + ble ra, t0, label + lla t4, label + srai s9, ra, 6 + ld zero, 1(ra) + ld sp, 8(t5) + seqz t0, t2 + sub a3, ra, ra + lla t2, label + mul zero, s9, a3 + slt sp, ra, a0 + blt a5, zero, label + mv sp, zero + # comment + label: + mv sp, zero + li a2, 0 + ecall + ld sp, 89(s10) + lla t4, label + la sp, label + addi sp, sp, 3 + add zero, sp, t1 + li t4, 48 + snez a2, zero + srai s4, zero, 2 + # comment + snez a5, zero + ret + label: + ret + blt sp, a6, label + ble t4, zero, label + ret + slli zero, zero, 5 + la ra, label + label: + seqz sp, a7 + ble zero, sp, label + slt s10, s0, sp + blt ra, sp, label + ret + srai t5, t6, 0 + sd ra, 7(a4) + beq zero, t6, label + slli sp, t1, 3 + seqz ra, s4 + la sp, label + ble t6, a2, label + beq t3, sp, label + mv s8, a4 + ecall + li ra, 90 + slli sp, a3, 9 + mul sp, t5, sp + xori sp, t6, 0 + la ra, label + blt t0, zero, label + sd s4, 3(s2) + ld zero, 1(sp) + sub a7, s10, zero + lla sp, label + label: + xor t3, a4, a4 + seqz s1, s11 + call label + # comment + seqz sp, a1 + sub a2, t0, zero + ecall + # comment + li a2, 9 + j label + li s5, 4 + # comment + lla zero, label + addi t4, sp, 7 + seqz zero, s7 + mv a3, a7 + mul t2, t3, s10 + beq a4, ra, label + sd t2, 41(s1) + la zero, label + mul ra, zero, t5 + li a6, 4 + j label + beq t1, t3, label + mv zero, a0 + snez t2, sp + li t0, 3 + label: + j label + lla t0, label + label: + sub sp, a3, ra + # comment + mul ra, t2, a7 + xor ra, t5, sp + ecall + lla s7, label + ret + sub zero, s1, sp + sub sp, zero, sp + ecall + blt a6, zero, label + sub s0, ra, s1 + xor sp, sp, zero + mul a3, a0, a7 + mul a5, zero, s7 + sub ra, sp, a7 + # comment + sd t0, 2(a7) + srai s7, a7, 1 + snez a7, sp + ret + call label + sd sp, 5(s7) + ld a5, 7(s4) + xor s9, sp, zero + lla zero, label + blt zero, s10, label + mv sp, zero + call label + snez zero, ra + ret + addi a3, t1, 62 + j label + j label + mv ra, ra + call label + ecall + xor t5, t0, zero + li ra, 88 + sub t5, sp, s6 + li s1, 71 + call label + xori s10, a1, 1 + slt t0, ra, sp + sub a2, ra, ra + mv t2, sp + lla zero, label + sub zero, s7, a0 + blt t6, s0, label + seqz a0, sp + mv s6, sp + sub ra, t2, a3 + beq ra, t2, label + li zero, 2 + blt sp, s9, label + ret + slli zero, t3, 5 + blt sp, sp, label + blt a3, t4, label + addi a1, zero, 2 + xori t3, s3, 8 + addi sp, t6, 4 + call label + # comment + add zero, ra, zero + xor zero, zero, a5 + slli zero, ra, 0 + add a6, sp, a4 + sd sp, 96(a6) + slt t5, zero, s8 + mv a0, sp + ecall + slli a6, sp, 55 + mul s3, sp, zero + xor sp, t4, a4 + add zero, zero, t2 + xor t3, sp, a5 + snez t4, sp + lla zero, label + j label + xori ra, zero, 0 + addi sp, ra, 8 + slt sp, sp, t1 + label: + beq ra, t5, label + sub s2, sp, ra + mul ra, t5, sp + srai a6, zero, 86 + mv a5, ra + blt zero, t6, label + # comment + beq zero, a4, label + ecall + beq a2, zero, label + snez a2, t0 + ecall + call label + mv zero, sp + blt t2, s11, label + ld a0, 4(zero) + beq zero, s2, label + ble t6, sp, label + li sp, 3 + snez ra, a3 + label: + xor ra, a6, zero + ecall + slt a3, t5, zero + call label + # comment + seqz ra, s4 + ld sp, 7(sp) + blt sp, t3, label + call label + ret + add ra, s3, zero + mul t3, s4, t1 + label: + add t2, s1, zero + ecall + blt ra, s10, label + ld sp, 74(sp) + sub sp, sp, ra + li ra, 0 + # comment + # comment + snez sp, sp + j label + xori t4, s4, 4 + sub a0, s6, sp + sd s5, 0(ra) + slli t0, zero, 70 + la ra, label + add zero, zero, t3 + sd zero, 0(s11) + lla t1, label + xori t0, ra, 5 + beq ra, ra, label + xori zero, sp, 7 + j label + beq ra, t4, label + slt s7, sp, zero + add zero, zero, a5 + mv zero, s5 + mv sp, sp + j label + mv ra, t0 + snez sp, sp + addi a6, t3, 98 + li ra, 3 + blt a2, sp, label + mv ra, sp + ld zero, 2(ra) + snez a2, sp + xor a5, s9, zero + li t1, 9 + j label + ble zero, sp, label + mv sp, zero + lla s3, label + label: + ble a5, ra, label + j label + mul s7, a0, t0 + ld ra, 34(zero) + j label + call label + la sp, label + j label + call label + add a3, a7, t6 + xori zero, s10, 5 + snez sp, sp + xori sp, sp, 8 + slli s4, t2, 8 + add sp, zero, a1 + srai sp, t4, 5 + beq a2, ra, label + # comment + xor ra, t6, t0 + ecall + # comment + slt ra, sp, s2 + seqz s8, ra + ret + sub zero, a1, ra + xori s7, ra, 14 + blt a7, ra, label + ecall + mul ra, zero, ra + slt a0, t6, sp + xor zero, sp, ra + blt ra, zero, label + ble sp, sp, label + snez zero, ra + mv s9, a3 + j label + label: + xori s1, ra, 7 + add s4, a6, t4 + sub zero, zero, s9 + # comment + ld a3, 8(t2) + sub zero, ra, t1 + ret + add t6, ra, ra + slli a2, t1, 4 + # comment + mul a3, s0, ra + mv t5, sp + call label + # comment + lla a6, label + # comment + beq ra, t4, label + beq zero, zero, label + j label + xori s3, a3, 4 + sd zero, 1(zero) + mv ra, zero + beq ra, s6, label + lla ra, label + snez zero, zero + ble ra, sp, label + slt sp, a1, t0 + slt sp, zero, ra + label: + mul ra, s10, t5 + mv ra, zero + addi s6, sp, 1 + lla s1, label + slli s2, t4, 1 + mul sp, sp, zero + ecall + slli sp, a2, 1 + ble sp, a5, label + lla a2, label + ret + ble s9, t3, label + mul zero, ra, t3 + j label + j label + mv ra, ra + slli s4, a7, 1 + li a6, 3 + xori zero, sp, 8 + # comment + la sp, label + li s5, 9 + ld sp, 3(zero) + srai t4, sp, 44 + seqz s0, a6 + slt s9, sp, zero + sub sp, sp, t4 + ecall + seqz s1, sp + blt zero, sp, label + seqz ra, a6 + call label + seqz a5, sp + beq ra, zero, label + blt ra, sp, label + la sp, label + # comment + addi sp, zero, 7 + j label + blt s9, t4, label + xori a7, s5, 8 + xori s8, zero, 6 + mv t0, t1 + beq a5, t2, label + xor s10, s4, t3 + ret + mul zero, t4, a2 + add zero, zero, zero + mv sp, zero + # comment + ble sp, s0, label + la ra, label + label: + srai ra, a2, 7 + ret + xor t3, a4, a7 + xori sp, zero, 6 + la a1, label + add sp, t2, a2 + slt s2, a7, ra + add sp, t1, s10 + slli sp, a2, 4 + beq zero, s8, label + add zero, sp, s11 + mv zero, ra + label: + ble t6, ra, label + ble s8, sp, label + label: + j label + ld ra, 90(s7) + ld a5, 4(a1) + lla ra, label + la a7, label + ret + li s10, 7 + lla zero, label + addi t1, zero, 3 + ret + call label + xor t0, sp, s3 + add s2, ra, zero + srai sp, t2, 7 + li a0, 4 + slli a2, a0, 0 + lla zero, label + blt t2, ra, label + sub a1, sp, a7 + ================================================================================ + success (ran 1 tests) diff --git a/XML/many_tests/qcheck/run_ast_qcheck.ml b/XML/many_tests/qcheck/run_ast_qcheck.ml new file mode 100644 index 00000000..07957d12 --- /dev/null +++ b/XML/many_tests/qcheck/run_ast_qcheck.ml @@ -0,0 +1,59 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Common.Pprinter +open Common.Ast +open Common.Parser +open Format + +let parse_prog str = Angstrom.parse_string ~consume:Angstrom.Consume.All pstructure str + +let print_prog_with_ast prog = + let parsed = (parse_prog (asprintf "%a" pprint_program prog)) in + asprintf + "AST:\n\n%s\n\nPprinted:\n\n%s\n\nParsed:\n\n%s\n\nParsed Pprinted:\n\n%s\n\n" + (show_program prog) + (asprintf "%a" pprint_program prog) + (show_program (((fun r -> + if Result.is_ok r + then Result.get_ok r + else (printf "Result is not ok, printing initial prog\n"; + prog)) + parsed ))) + (asprintf "%a" pprint_program ((fun r -> + if Result.is_ok r + then Result.get_ok r + else (printf "Result is not ok, printing initial prog\n"; + prog)) + parsed)) +;; + +let arb_program = + QCheck.make + ~print:print_prog_with_ast + (QCheck.Gen.sized Program.gen_program) + +let run n = + QCheck_base_runner.run_tests + [ QCheck.( + Test.make arb_program ~count:n (fun pr -> + let res = parse_str (asprintf "%a\n" pprint_program pr) in + pr = res)) + ] +;; + +let run_tests n = + let _ = run n in + () +;; + +let () = + Arg.parse + [ "-seed", Arg.Int QCheck_base_runner.set_seed, " Set seed" + ; "-stop", Arg.Unit (fun _ -> exit 0), " Exit" + ; "-gen", Arg.Int run_tests, " Exit" + ] + (fun _ -> ()) + "help" +;; \ No newline at end of file diff --git a/XML/many_tests/qcheck/run_ast_qcheck.mli b/XML/many_tests/qcheck/run_ast_qcheck.mli new file mode 100644 index 00000000..536d103e --- /dev/null +++ b/XML/many_tests/qcheck/run_ast_qcheck.mli @@ -0,0 +1,3 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/qcheck/run_machine_qcheck.ml b/XML/many_tests/qcheck/run_machine_qcheck.ml new file mode 100644 index 00000000..25bf1f92 --- /dev/null +++ b/XML/many_tests/qcheck/run_machine_qcheck.ml @@ -0,0 +1,36 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Backend.Machine +open QCheck.Gen + +let arb_machine_instrs = + QCheck.make + (QCheck.Gen.list_size (5--20) gen_instr) + +let run n = + QCheck_base_runner.run_tests + [ QCheck.( + Test.make arb_machine_instrs ~count:n (fun is -> + List.iter (fun i -> + Format.fprintf std_formatter "%a\n" pp_instr i + ) is; + true)) + ] + +let run_tests n = + let _ = run n in + () +;; + +let () = + Arg.parse + [ "-seed", Arg.Int QCheck_base_runner.set_seed, " Set seed" + ; "-stop", Arg.Unit (fun _ -> exit 0), " Exit" + ; "-gen", Arg.Int run_tests, " Exit" + ] + (fun _ -> ()) + "help" +;; \ No newline at end of file diff --git a/XML/many_tests/qcheck/run_machine_qcheck.mli b/XML/many_tests/qcheck/run_machine_qcheck.mli new file mode 100644 index 00000000..536d103e --- /dev/null +++ b/XML/many_tests/qcheck/run_machine_qcheck.mli @@ -0,0 +1,3 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/unit/anf.ml b/XML/many_tests/unit/anf.ml new file mode 100644 index 00000000..e31a0cb2 --- /dev/null +++ b/XML/many_tests/unit/anf.ml @@ -0,0 +1,167 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Middleend.Pprinter +open Common.Parser + +let to_anf_prog str = + let prog = parse_str str in + let aprog = anf_program_res prog in + match aprog with + | Ok aprog -> + print_anf_program Format.std_formatter aprog + | Error e -> Format.fprintf Format.std_formatter "%s" (pp_anf_error e) + + let pp_error e = + Format.fprintf Format.std_formatter "%s" (pp_anf_error e) + +(************************** Expressions **************************) + + +let%expect_test "char" = + to_anf_prog {| 'a' |}; + [%expect{| a;; |}] + + +let%expect_test "int" = + to_anf_prog {| 1 |}; + [%expect{| 1;; |}] + + +let%expect_test "str" = + to_anf_prog {| "Kakadu" |}; + [%expect{| Kakadu;; |}] + + +let%expect_test "tuple 2" = + to_anf_prog {| (1, 2) |}; + [%expect{| let t_0 = alloc(1, 2) in t_0;; |}] + + +let%expect_test "tuple 3" = + to_anf_prog {| (1, 2, 3) |}; + [%expect{| let t_0 = alloc(1, 2, 3) in t_0;; |}] + + +let%expect_test "tuples in tuple" = + to_anf_prog {| ((1, 2), (3, 4)) |}; + [%expect{| + let t_0 = alloc(1, 2) + in let t_1 = alloc(3, 4) in let t_2 = alloc(t_0, t_1) in t_2;; |}] + + +let%expect_test "if a then b" = + to_anf_prog {| if a then b |}; + [%expect{| unsupported expression in ANF normaliser |}] + + +let%expect_test "if a then b else c" = + to_anf_prog {| if a then b else c |}; + [%expect{| let t_0 = if a then b else c in t_0;; |}] + + +let%expect_test "apply f x" = + to_anf_prog {| f x |}; + [%expect{| let t_0 = f x in t_0;; |}] + + + let%expect_test "apply f (let a = x in a)" = + to_anf_prog {| f (let a = x in a) |}; + [%expect{| let t_0 = x in let a = t_0 in let t_1 = f a in t_1;; |}] + + +let%expect_test "apply f x y" = + to_anf_prog {| f x y |}; + [%expect{| let t_0 = f x in let t_1 = t_0 y in t_1;; |}] + + +let%expect_test "apply 1 + 2" = + to_anf_prog {| 1 + 2 |}; + [%expect{| 3;; |}] + + +let%expect_test "fun x -> x" = + to_anf_prog {| fun x -> x |}; + [%expect{| let t_0 = fun x -> x in t_0;; |}] + + +let%expect_test "apply (fun x -> x) 10" = + to_anf_prog {| (fun x -> x) 10 |}; + [%expect{| let t_0 = fun x -> x in let t_1 = t_0 10 in t_1;; |}] + + +let%expect_test "let a = 5 in a + 10" = + to_anf_prog {| let a = 5 in a + 10 |}; + [%expect{| let t_0 = 5 in let a = t_0 in let t_1 = (a + 10) in t_1;; |}] + + +let%expect_test "factorial" = + to_anf_prog {| let rec fac n = if n <= 0 then 1 else n * (fac (n-1)) in fac |}; + [%expect{| + let rec t_5 = fun n -> let t_0 = (n <= 0) + in let t_4 = if t_0 then 1 else let t_1 = (n - 1) + in let t_2 = fac t_1 + in let t_3 = (n * t_2) + in t_3 + in t_4 in let fac = t_5 in fac;; |}] + +(************************** Structure item **************************) + +let%expect_test "factorial" = + to_anf_prog {| let rec fac n = if n <= 0 then 1 else n * (fac (n-1)) |}; + [%expect{| + let rec fac = let t_5 = fun n -> let t_0 = (n <= 0) + in let t_4 = if t_0 then 1 else let t_1 = (n - 1) + in + let t_2 = fac t_1 + in + let t_3 = (n * t_2) + in t_3 + in t_4 in t_5;; |}] + +(************************** Error **************************) + +let%expect_test "Only_simple_var_params" = +pp_error `Only_simple_var_params; +[%expect {| Only simple variable patterns are allowed in function parameters |}] + +let%expect_test "Func_no_params" = +pp_error `Func_no_params; +[%expect {| Function with no parameters found |}] + + +let%expect_test "Let_and_not_supported" = +pp_error `Let_and_not_supported; +[%expect {| let ... and ... is not supported in ANF yet |}] + + +let%expect_test "Unsupported_let_pattern" = +pp_error (`Unsupported_let_pattern "pattern"); +[%expect {| Unsupported pattern in let-binding: pattern |}] + + +let%expect_test "Unsupported_let_pattern" = +pp_error (`Unsupported_let_pattern "wrong_pattern"); +[%expect {| Unsupported pattern in let-binding: wrong_pattern |}] + + +let%expect_test "Unsupported_expr_in_normaliser" = +pp_error (`Unsupported_expr_in_normaliser); +[%expect {| unsupported expression in ANF normaliser |}] + +let%expect_test "Mutual_rec_not_supported" = +pp_error `Mutual_rec_not_supported; +[%expect {| Mutually recursive let ... and ... bindings are not supported yet. |}] + + +let%expect_test "Unsupported_toplevel_let" = +pp_error `Unsupported_toplevel_let; +[%expect {| Unsupported pattern in a top-level let-binding. Only simple variables are allowed. |}] + + +let%expect_test "Unsupported_toplevel_item" = +pp_error `Unsupported_toplevel_item; +[%expect {| Unsupported top-level structure item. |}] + diff --git a/XML/many_tests/unit/anf.mli b/XML/many_tests/unit/anf.mli new file mode 100644 index 00000000..536d103e --- /dev/null +++ b/XML/many_tests/unit/anf.mli @@ -0,0 +1,3 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/unit/cc.ml b/XML/many_tests/unit/cc.ml new file mode 100644 index 00000000..41bb93bb --- /dev/null +++ b/XML/many_tests/unit/cc.ml @@ -0,0 +1,122 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Cc +open Common.Parser +open Common.Pprinter + +let to_cc str = + let prog = parse_str str in + let ccprog = cc_program prog in + (* let aprog = anf_program ccprog in + let llprog = lambda_lift_program aprog in *) + pprint_program Format.std_formatter ccprog + + +let%expect_test "020closures.pdf" = +to_cc {| + let fac n = + let rec fack n k = + if n <= 1 then k 1 + else fack (n-1) (fun m -> k (m * n)) + in + fack n (fun x -> x) +|}; +[%expect {| + let fac = (fun n -> (let rec fack = (fun n k -> (if n <= 1 + then (k 1) + else (fack n - 1) (((fun k n m -> (k m * n)) k) n))) in (fack n) (fun x -> x)));; |}] + + +let%expect_test "LN_CC_1" = +to_cc {| + let f x = + let g y = x + in + g +|}; +[%expect {| + let f = (fun x -> (let g = ((fun x y -> x) x) in (g x)));; |}] + + +let%expect_test "LN_CC_2" = +to_cc {| + fun x -> x + a + b +|}; +[%expect {| + ((fun a b x -> x + a + b) a) b ;; |}] + + +let%expect_test "LN_CC_3" = +to_cc {| + let mkPair = fun x -> + let add = fun y -> x + y in + let mul = fun z -> x * z in + (add, mul) +|}; +[%expect {| + let mkPair = (fun x -> (let add = ((fun x y -> x + y) x) in (let mul = ((fun x z -> x * z) x) in ((add x), (mul x)))));; |}] + + +let%expect_test "if" = +to_cc {| + let f = + let ret1 = 1 in + let ret2 = 2 in + let greater_10 x = if x > 10 then ret1 else ret2 in + greater_10 +|}; +[%expect {| + let f = (let ret1 = 1 in (let ret2 = 2 in (let greater_10 = ((fun ret1 ret2 x -> (if x > 10 + then ret1 + else ret2)) ret1) ret2 in (greater_10 ret1) ret2)));; |}] + + +let%expect_test "tuple" = +to_cc {| + let tuples = + let a, b = 10, 20 in + let to_tuple x = x, a, b in + to_tuple +|}; +[%expect {| + let tuples = (let (a, b) = (10, 20) in (let to_tuple = ((fun a b x -> (x, a, b)) a) b in (to_tuple a) b));; |}] + + +let%expect_test "func" = +to_cc {| + let f x y = + ((fun x -> fun x -> y) x) x +|}; +[%expect {| + let f = (fun x y -> (((fun y x -> ((fun y x -> y) y)) y) x) x);; |}] + + +let%expect_test "load" = +to_cc {| + let f x = + let g y = + let (a, b) = x, y in + a in g +|}; +[%expect{| let f = (fun x -> (let g = ((fun x y -> (let (a, b) = (x, y) in a)) x) in (g x)));; |}] + + +let%expect_test "match" = +to_cc {| + let f x = + let y = 10 in + let g z = + (match x with + | Some _ -> y + | _ -> z) + in + g +|}; +[%expect{| + let f = (fun x -> (let y = 10 in (let g = ((fun x y z -> (match x with + | Some _ -> y + | _ -> z)) x) y in (g x) y)));; |}] + + \ No newline at end of file diff --git a/XML/many_tests/unit/cc.mli b/XML/many_tests/unit/cc.mli new file mode 100644 index 00000000..536d103e --- /dev/null +++ b/XML/many_tests/unit/cc.mli @@ -0,0 +1,3 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/unit/codegen_llvm.ml b/XML/many_tests/unit/codegen_llvm.ml new file mode 100644 index 00000000..41c4a98d --- /dev/null +++ b/XML/many_tests/unit/codegen_llvm.ml @@ -0,0 +1,605 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Backend.Codegen_llvm +open Middleend.Anf +open Middleend.Ll +open Common.Parser + +let codegen_prog_str str = + let prog = parse_str str in + let anf = anf_program prog in + let ll = lambda_lift_program anf in + print_string (gen_program_ir ll "riscv64-unknown-linux-gnu" None) + +let%expect_test "num" = + codegen_prog_str {| 5 |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @main() { + entry: + call void @rt_init(i64 5120) + call void @collect() + ret i64 0 + } |}] + +let%expect_test "comp binop" = + codegen_prog_str {| 1 + 2 |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @main() { + entry: + call void @rt_init(i64 5120) + call void @collect() + ret i64 0 + } |}] + +let%expect_test "print_int 5" = + codegen_prog_str {| print_int 5 |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @main() { + entry: + %t_0 = alloca i64, align 8 + call void @rt_init(i64 5120) + call void @print_int(i64 11) + store i64 0, ptr %t_0, align 8 + %t_01 = load i64, ptr %t_0, align 8 + call void @collect() + ret i64 0 + } |}] + +let%expect_test "if 1 then 1 else 2 " = + codegen_prog_str {| if 1 then 1 else 2 |}; + [%expect{| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @main() { + entry: + %t_0 = alloca i64, align 8 + call void @rt_init(i64 5120) + br i1 true, label %then, label %else + + then: ; preds = %entry + br label %ifcont + + else: ; preds = %entry + br label %ifcont + + ifcont: ; preds = %else, %then + %iftmp = phi i64 [ 3, %then ], [ 5, %else ] + store i64 %iftmp, ptr %t_0, align 8 + %t_01 = load i64, ptr %t_0, align 8 + call void @collect() + ret i64 0 + } |}] + +let%expect_test "tuple" = + codegen_prog_str {| (1, 2, 3, 4) |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @main() { + entry: + %t_0 = alloca i64, align 8 + call void @rt_init(i64 5120) + %tuple_vals_alloca = alloca i64, i64 4, align 8 + %ptr_to_elem = getelementptr i64, ptr %tuple_vals_alloca, i64 0 + store i64 3, ptr %ptr_to_elem, align 8 + %ptr_to_elem1 = getelementptr i64, ptr %tuple_vals_alloca, i64 1 + store i64 5, ptr %ptr_to_elem1, align 8 + %ptr_to_elem2 = getelementptr i64, ptr %tuple_vals_alloca, i64 2 + store i64 7, ptr %ptr_to_elem2, align 8 + %ptr_to_elem3 = getelementptr i64, ptr %tuple_vals_alloca, i64 3 + store i64 9, ptr %ptr_to_elem3, align 8 + %alloca_as_i64 = ptrtoint ptr %tuple_vals_alloca to i64 + %tuple_tmp = call i64 @create_tuple_init(i64 4, i64 %alloca_as_i64) + store i64 %tuple_tmp, ptr %t_0, align 8 + %t_04 = load i64, ptr %t_0, align 8 + call void @collect() + ret i64 0 + } |}] + +let%expect_test "load" = + codegen_prog_str {| let a, b = (1, 2) in a, b |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @main() { + entry: + %t_1 = alloca i64, align 8 + %b = alloca i64, align 8 + %t_2 = alloca i64, align 8 + %a = alloca i64, align 8 + %t_3 = alloca i64, align 8 + %t_0 = alloca i64, align 8 + call void @rt_init(i64 5120) + %tuple_vals_alloca = alloca i64, i64 2, align 8 + %ptr_to_elem = getelementptr i64, ptr %tuple_vals_alloca, i64 0 + store i64 3, ptr %ptr_to_elem, align 8 + %ptr_to_elem1 = getelementptr i64, ptr %tuple_vals_alloca, i64 1 + store i64 5, ptr %ptr_to_elem1, align 8 + %alloca_as_i64 = ptrtoint ptr %tuple_vals_alloca to i64 + %tuple_tmp = call i64 @create_tuple_init(i64 2, i64 %alloca_as_i64) + store i64 %tuple_tmp, ptr %t_0, align 8 + %t_02 = load i64, ptr %t_0, align 8 + %load_tmp = call i64 @field(i64 %t_02, i64 0) + store i64 %load_tmp, ptr %t_3, align 8 + %t_33 = load i64, ptr %t_3, align 8 + store i64 %t_33, ptr %a, align 8 + %t_04 = load i64, ptr %t_0, align 8 + %load_tmp5 = call i64 @field(i64 %t_04, i64 1) + store i64 %load_tmp5, ptr %t_2, align 8 + %t_26 = load i64, ptr %t_2, align 8 + store i64 %t_26, ptr %b, align 8 + %a7 = load i64, ptr %a, align 8 + %b8 = load i64, ptr %b, align 8 + %tuple_vals_alloca9 = alloca i64, i64 2, align 8 + %ptr_to_elem10 = getelementptr i64, ptr %tuple_vals_alloca9, i64 0 + store i64 %a7, ptr %ptr_to_elem10, align 8 + %ptr_to_elem11 = getelementptr i64, ptr %tuple_vals_alloca9, i64 1 + store i64 %b8, ptr %ptr_to_elem11, align 8 + %alloca_as_i6412 = ptrtoint ptr %tuple_vals_alloca9 to i64 + %tuple_tmp13 = call i64 @create_tuple_init(i64 2, i64 %alloca_as_i6412) + store i64 %tuple_tmp13, ptr %t_1, align 8 + %t_114 = load i64, ptr %t_1, align 8 + call void @collect() + ret i64 0 + } |}] + +let%expect_test "new function " = + codegen_prog_str {| let a = fun x -> x |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @a(i64 %x) { + entry: + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + %x2 = load i64, ptr %x1, align 8 + ret i64 %x2 + } + + define i64 @main() { + entry: + call void @rt_init(i64 5120) + call void @collect() + ret i64 0 + } |}] + +let%expect_test "call function " = + codegen_prog_str {| + let a = fun x -> x;; + + let main = a 5;; + |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @a(i64 %x) { + entry: + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + %x2 = load i64, ptr %x1, align 8 + ret i64 %x2 + } + + define i64 @main() { + entry: + %main = alloca i64, align 8 + %t_1 = alloca i64, align 8 + call void @rt_init(i64 5120) + %calltmp = call i64 @a(i64 11) + store i64 %calltmp, ptr %t_1, align 8 + %t_11 = load i64, ptr %t_1, align 8 + store i64 %t_11, ptr %main, align 8 + call void @collect() + ret i64 0 + } |}] + + +let%expect_test "partial " = + codegen_prog_str {| + let a = fun x -> fun y -> x;; + + let main = a 5;; + |}; + [%expect {| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @a(i64 %x, i64 %y) { + entry: + %y2 = alloca i64, align 8 + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + store i64 %y, ptr %y2, align 8 + %x3 = load i64, ptr %x1, align 8 + ret i64 %x3 + } + + define i64 @main() { + entry: + %main = alloca i64, align 8 + %t_1 = alloca i64, align 8 + call void @rt_init(i64 5120) + %closure_tmp = call i64 @alloc_closure(i64 ptrtoint (ptr @a to i64), i64 2) + %apptmp = call i64 @apply1(i64 %closure_tmp, i64 11) + store i64 %apptmp, ptr %t_1, align 8 + %t_11 = load i64, ptr %t_1, align 8 + store i64 %t_11, ptr %main, align 8 + call void @collect() + ret i64 0 + } |}] + + +let%expect_test "user main" = + codegen_prog_str {| + let main x = x + + let a = print_int (main 5) + |}; + [%expect{| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @__user_main(i64 %x) { + entry: + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + %x2 = load i64, ptr %x1, align 8 + ret i64 %x2 + } + + define i64 @main() { + entry: + %a = alloca i64, align 8 + %t_2 = alloca i64, align 8 + %t_1 = alloca i64, align 8 + call void @rt_init(i64 5120) + %calltmp = call i64 @__user_main(i64 11) + store i64 %calltmp, ptr %t_1, align 8 + %t_11 = load i64, ptr %t_1, align 8 + call void @print_int(i64 %t_11) + store i64 0, ptr %t_2, align 8 + %t_22 = load i64, ptr %t_2, align 8 + store i64 %t_22, ptr %a, align 8 + call void @collect() + ret i64 0 + } |}] + + +let%expect_test "use reserved name" = + codegen_prog_str {| + let some_fun x y = x + y + let closure_tmp x y = x + y + + let a = print_int (some_fun 5 10) + let b = print_int (closure_tmp 5 10) + let closure_tmp = 5 + let c = print_int closure_tmp + |}; + [%expect{| + ; ModuleID = 'main' + source_filename = "main" + target triple = "riscv64-unknown-linux-gnu" + + declare void @print_int(i64) + + declare i64 @alloc_block(i64) + + declare i64 @alloc_closure(i64, i64) + + declare i64 @apply1(i64, i64) + + declare void @print_gc_status() + + declare void @collect() + + declare i64 @create_tuple(i64) + + declare i64 @create_tuple_init(i64, i64) + + declare i64 @field(i64, i64) + + declare void @rt_init(i64) + + define i64 @some_fun(i64 %x, i64 %y) { + entry: + %t_0 = alloca i64, align 8 + %y2 = alloca i64, align 8 + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + store i64 %y, ptr %y2, align 8 + %x3 = load i64, ptr %x1, align 8 + %y4 = load i64, ptr %y2, align 8 + %addtmp1 = add i64 %x3, %y4 + %addtmp2 = sub i64 %addtmp1, 1 + store i64 %addtmp2, ptr %t_0, align 8 + %t_05 = load i64, ptr %t_0, align 8 + ret i64 %t_05 + } + + define i64 @closure_tmp(i64 %x, i64 %y) { + entry: + %t_2 = alloca i64, align 8 + %y2 = alloca i64, align 8 + %x1 = alloca i64, align 8 + store i64 %x, ptr %x1, align 8 + store i64 %y, ptr %y2, align 8 + %x3 = load i64, ptr %x1, align 8 + %y4 = load i64, ptr %y2, align 8 + %addtmp1 = add i64 %x3, %y4 + %addtmp2 = sub i64 %addtmp1, 1 + store i64 %addtmp2, ptr %t_2, align 8 + %t_25 = load i64, ptr %t_2, align 8 + ret i64 %t_25 + } + + define i64 @main() { + entry: + %c = alloca i64, align 8 + %t_10 = alloca i64, align 8 + %closure_tmp9 = alloca i64, align 8 + %b = alloca i64, align 8 + %t_9 = alloca i64, align 8 + %t_8 = alloca i64, align 8 + %t_7 = alloca i64, align 8 + %a = alloca i64, align 8 + %t_6 = alloca i64, align 8 + %t_5 = alloca i64, align 8 + %t_4 = alloca i64, align 8 + call void @rt_init(i64 5120) + %closure_tmp = call i64 @alloc_closure(i64 ptrtoint (ptr @some_fun to i64), i64 2) + %apptmp = call i64 @apply1(i64 %closure_tmp, i64 11) + store i64 %apptmp, ptr %t_4, align 8 + %t_4_val = load i64, ptr %t_4, align 8 + %apptmp1 = call i64 @apply1(i64 %t_4_val, i64 21) + store i64 %apptmp1, ptr %t_5, align 8 + %t_52 = load i64, ptr %t_5, align 8 + call void @print_int(i64 %t_52) + store i64 0, ptr %t_6, align 8 + %t_63 = load i64, ptr %t_6, align 8 + store i64 %t_63, ptr %a, align 8 + %closure_tmp4 = call i64 @alloc_closure(i64 ptrtoint (ptr @closure_tmp to i64), i64 2) + %apptmp5 = call i64 @apply1(i64 %closure_tmp4, i64 11) + store i64 %apptmp5, ptr %t_7, align 8 + %t_7_val = load i64, ptr %t_7, align 8 + %apptmp6 = call i64 @apply1(i64 %t_7_val, i64 21) + store i64 %apptmp6, ptr %t_8, align 8 + %t_87 = load i64, ptr %t_8, align 8 + call void @print_int(i64 %t_87) + store i64 0, ptr %t_9, align 8 + %t_98 = load i64, ptr %t_9, align 8 + store i64 %t_98, ptr %b, align 8 + store i64 11, ptr %closure_tmp9, align 8 + %closure_tmp10 = load i64, ptr %closure_tmp9, align 8 + call void @print_int(i64 %closure_tmp10) + store i64 0, ptr %t_10, align 8 + %t_1011 = load i64, ptr %t_10, align 8 + store i64 %t_1011, ptr %c, align 8 + call void @collect() + ret i64 0 + } |}] diff --git a/XML/many_tests/unit/codegen_llvm.mli b/XML/many_tests/unit/codegen_llvm.mli new file mode 100644 index 00000000..26ede79c --- /dev/null +++ b/XML/many_tests/unit/codegen_llvm.mli @@ -0,0 +1,3 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/unit/dune b/XML/many_tests/unit/dune new file mode 100644 index 00000000..0702230e --- /dev/null +++ b/XML/many_tests/unit/dune @@ -0,0 +1,9 @@ +(library + (name XML_unittests) + (public_name XML.Many_tests.Unittests) + (libraries base stdio XML.Common XML.Middleend XML.Backend) + (preprocess + (pps ppx_expect ppx_inline_test ppx_expect)) + (instrumentation + (backend bisect_ppx)) + (inline_tests)) diff --git a/XML/many_tests/unit/infer.ml b/XML/many_tests/unit/infer.ml new file mode 100644 index 00000000..7bdd676d --- /dev/null +++ b/XML/many_tests/unit/infer.ml @@ -0,0 +1,455 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Infer +open Common.Parser +open Common.Pprinter +open Common.Ast.TypeExpr + +let infer_exp_str ?(rst = true) ?(env = []) str = + let exp = parse_exp_str str in + if rst then reset_gensym (); + match infer_exp env exp with + | Ok (_, ty) -> + pprint_type Format.std_formatter ty + | Error err -> pprint_err Format.std_formatter err + +let infer_pat_str ?(rst = true) ?(env = []) str = + let pat = parse_pat_str str in + if rst then reset_gensym (); + match infer_pat env pat with + | Ok (_, ty) -> + pprint_type Format.std_formatter ty + | Error err -> pprint_err Format.std_formatter err + +let infer_prog_str ?(env = env_with_things) str = + let prog = parse_str str in + reset_gensym (); + match infer_program env prog with + | Ok (new_env, names) -> + pprint_env new_env names + | Error err -> pprint_err Format.std_formatter err + +let show_etyp env exp = + match infer_exp env exp with + | Ok (_, ty) -> + Stdio.print_endline (Common.Ast.TypeExpr.show ty) + | Error err -> pprint_err Format.std_formatter err + +let type_bool = Type_construct ("bool", []) +let type_unit = Type_construct ("unit", []) +let type_char = Type_construct ("char", []) +let type_int = Type_construct ("int", []) +let type_string = Type_construct ("string", []) + +let env = env_with_things + +(************************** Expressions **************************) + +let%expect_test "char" = + infer_exp_str {| 'a' |}; + [%expect{| char |}] + + +let%expect_test "int" = + infer_exp_str {| 1 |}; + [%expect{| int |}] + + +let%expect_test "str" = + infer_exp_str {| "Kakadu" |}; + [%expect{| string |}] + + +let%expect_test "id in env" = + infer_exp_str {| m |} ~env:[("m", Type_var {contents = Unbound ("a", 0)})]; + [%expect{| 'a |}] + + +let%expect_test "id not in env" = + infer_exp_str {| m |}; + [%expect{| Unbound variable m |}];; + + +let%expect_test "tuple 2" = + infer_exp_str {| (1, 2) |}; + [%expect{| int * int |}] + + +let%expect_test "tuple 3" = + infer_exp_str {| (1, 2, 3) |}; + [%expect{| int * int * int |}] + + + let%expect_test "tuple 4" = + infer_exp_str {| (1, 2, 3, 4) |}; + [%expect{| int * int * int * int |}] + + +let%expect_test "tuples in tuple" = + infer_exp_str {| ((1, 2), (3, 4)) |}; + [%expect{| (int * int) * (int * int) |}] + + + let%expect_test "construct none" = + infer_exp_str {| None |} ~env; + [%expect {| 'a option |}] + + +let%expect_test "construct some" = + infer_exp_str {| Some 1 |} ~env; + [%expect {| int option |}] + + +let%expect_test "if (string) " = + infer_exp_str {| if "trololo" then 1 |}; + [%expect {| Cannot unify different constructors: string and bool |}] + + +let%expect_test "if (bool) then (not unit)" = + let env = ["cond", type_bool] in + infer_exp_str {| if cond then 1 |} ~env; + [%expect{| Cannot unify different constructors: int and unit |}] + + +let%expect_test "if (bool) then (unit)" = + let env = ["cond", type_bool; "bodyvar", type_unit] in + infer_exp_str {| if cond then bodyvar |} ~env; + [%expect{| unit |}] + + +let%expect_test "if (bool) then 'a else 'a" = + let env = ["cond", type_bool; "x", Type_var {contents = Unbound ("a", 0)}; "y", Type_var {contents = Unbound ("a", 0)}] in + infer_exp_str {| if cond then x else y |} ~env; + [%expect{| 'a |}] + + +let%expect_test "if (bool) then 'a else 'b" = + let env = ["cond", type_bool; "x", Type_var {contents = Unbound ("a", 0)}; "y", Type_var {contents = Unbound ("b", 0)}] in + infer_exp_str {| if cond then x else y |} ~env; + [%expect{| 'b |}] + + +let%expect_test "apply int -> int to int" = + let env = ["f", Type_arrow (type_int, type_int); "x", type_int] in + infer_exp_str {| f x |} ~env; + [%expect{| int |}] + + +let%expect_test "apply int -> int to string" = + let env = ["f", Type_arrow (type_int, type_int); "x", type_string] in + infer_exp_str {| f x |} ~env; + [%expect{| Cannot unify different constructors: int and string |}] + + +let%expect_test "apply 'a -> 'a to 'b" = + let env = ["f", Type_arrow (Type_var {contents = Unbound ("s", 0)}, Type_var {contents = Unbound ("s", 0)}); "x", Type_var {contents = Unbound ("t", 0)}] in + infer_exp_str {| f x |} ~env ~rst: false; + [%expect{| 'b |}] + + + (* not sure if this is right *) +let%expect_test "apply 'a to 'a (different vars)" = + let env = ["f", Type_var {contents = Unbound ("t", 0)}; "x", Type_var {contents = Unbound ("t", 0)}] in + infer_exp_str {| f x |} ~env ~rst: false; + [%expect {| 'c |}] + + +let%expect_test "apply 'a to 'a (same var)" = + let env = ["x", Type_var {contents = Unbound ("t", 0)}] in + infer_exp_str {| x x |} ~env ~rst: false; + [%expect{| Occurs check |}] + + +let%expect_test "apply 'a to 'b" = + let env = ["f", Type_var {contents = Unbound ("s", 0)}; "x", Type_var {contents = Unbound ("t", 0)}] in + infer_exp_str {| f x |} ~env ~rst: false; + [%expect{| 'e |}] + + +(************************** Patterns **************************) + +let%expect_test "id in env" = + let env = ["m", (Type_var {contents = Unbound ("c", 0)})] in + infer_pat_str {| m |} ~env; + [%expect {| 'a |}];; + + +let%expect_test "id not in env" = + infer_pat_str {| m |}; + [%expect {| 'a |}];; + + +let%expect_test "any" = + infer_pat_str {| _ |}; + [%expect {| 'a |}];; + + +let%expect_test "char" = + infer_pat_str {| 'a' |}; + [%expect {| char |}];; + + +let%expect_test "int" = + infer_pat_str {| 1 |}; + [%expect {| int |}];; + + +let%expect_test "str" = + infer_pat_str {| "kakadu" |}; + [%expect {| string |}];; + + +let%expect_test "tuple 2" = + infer_pat_str {| (1, 2) |}; + [%expect {| int * int |}];; + + +let%expect_test "tuple 3" = + infer_pat_str {| (1, 2, 3) |}; + [%expect {| int * int * int |}];; + + +let%expect_test "tuple 4" = + infer_pat_str {| (1, 2, 3, 4) |}; + [%expect {| int * int * int * int |}];; + + +let%expect_test "tuples in tuple" = + infer_pat_str {| ((1, 2), (3, 4)) |}; + [%expect {| (int * int) * (int * int) |}];; + + +let%expect_test "construct none" = + infer_pat_str {| None |} ~env; + [%expect {| 'a option |}] + + + let%expect_test "construct some" = + infer_pat_str {| Some 1 |} ~env; + [%expect{| int option |}] + + +(************************** Funs **************************) + +let%expect_test "fun 'a -> 'a (new var)" = + infer_exp_str {| fun x -> x |}; + [%expect {| 'a -> 'a |}] + + +let%expect_test "fun 'a -> 'a (shadow)" = + let env = ["x", Type_var {contents = Unbound ("s", 0)}] in + infer_exp_str {| fun x -> x |} ~env; + [%expect {| 'a -> 'a |}] + + +let%expect_test "fun 'a -> 'b (not in env)" = + infer_exp_str {| fun x -> y |}; + [%expect{| Unbound variable y |}] + + +let%expect_test "fun 'a -> 'b (in env)" = + let env = ["y", Type_var {contents = Unbound ("s", 0)}] in + infer_exp_str {| fun x -> y |} ~env; + [%expect{| 'a -> 's |}] + + +let%expect_test {| fun x -> fun y -> x y |} = + infer_exp_str {| fun x -> fun y -> x y |}; + [%expect{| ('b -> 'c) -> 'b -> 'c |}] + + +let%expect_test {| fun x y -> x y |} = + infer_exp_str {| fun x y -> x y |}; + [%expect{| ('b -> 'c) -> 'b -> 'c |}] + + +let%expect_test {| (fun f a b -> f a, f b) (fun x -> x) 1 "mystr" |} = + infer_exp_str {| (fun f a b -> f a, f b) (fun x -> x) 1 "mystr" |}; + [%expect{| Cannot unify different constructors: int and string |}] + + +(************************** Match, function **************************) + +let%expect_test "correct match" = + let env = [ "a", Type_construct("option", [Type_var {contents = Unbound ("a", 0)}])] @ env in + infer_exp_str {| match a with | Some x -> 1 | None -> 2 |} ~env; + [%expect {| + int |}] + + +let%expect_test "use match pattern in body" = + let env = [ "a", Type_construct("option", [Type_var {contents = Unbound ("a", 0)}])] @ env in + infer_exp_str {| match a with | Some x -> x | None -> 2 |} ~env; + [%expect {| + int |}] + + +let%expect_test "match different constructors" = + let env = [ "a", Type_construct("option", [Type_var {contents = Unbound ("a", 0)}])] @ env in + infer_exp_str {| match a with | Some x -> 1 | [] -> 2 |} ~env; + [%expect{| Cannot unify different constructors: list and option |}] + + +let%expect_test "match option with list constructors" = + let env = [ "a", Type_construct("option", [Type_var {contents = Unbound ("a", 0)}])] @ env in + infer_exp_str {| match a with | x :: tl -> 1 | [] -> 2 |} ~env; + [%expect{| Cannot unify different constructors: list and option |}] + + +let%expect_test "match different types of expr 1" = + let env = [ "a", Type_construct("option", [Type_var {contents = Unbound ("a", 0)}])] @ env in + infer_exp_str {| match a with | Some x -> 'a' | None -> 1234 |} ~env; + [%expect{| Cannot unify different constructors: char and int |}] + + +let%expect_test "match different types of expr 2" = + let env = [ "b", Type_construct("list", [Type_var {contents = Unbound ("a", 0)}])] @ env in + infer_exp_str {| match b with | x :: y :: tl -> 'a' | x :: tl -> 'b' | _ -> 1234 |} ~env; + [%expect{| Cannot unify different constructors: char and int |}] + + +let%expect_test "correct function" = + infer_exp_str {| function | Some x -> 1 | None -> 2 |} ~env; + [%expect {| + 'd option -> int |}] + + +let%expect_test "use function pattern in body" = + infer_exp_str {| function | Some x -> x | None -> 2 |} ~env; + [%expect {| + int option -> int |}] + + +let%expect_test "function different constructors" = + infer_exp_str {| function | Some x -> 1 | [] -> 2 |} ~env; + [%expect{| Cannot unify different constructors: list and option |}] + + +let%expect_test "function different types of expr 1" = + infer_exp_str {| function | Some x -> 'a' | None -> 1234 |} ~env; + [%expect{| Cannot unify different constructors: char and int |}] + + +let%expect_test "function different types of expr 2" = + infer_exp_str {| function | x :: y :: tl -> 'a' | x :: tl -> 'b' | _ -> 1234 |} ~env; + [%expect{| Cannot unify different constructors: char and int |}] + + +(************************** Let in **************************) + +let%expect_test {| let 1 = 1 in 2 |} = + infer_exp_str {| let 1 = 1 in 2 |}; + [%expect{| int |}] + + +let%expect_test {| let a = 1 in 2 |} = + infer_exp_str {| let a = 1 in 2 |}; + [%expect{| int |}] + + +let%expect_test {| let a = 1 in a |} = +infer_exp_str {| let a = 1 in a |}; + [%expect{| int |}] + + +let%expect_test {| let a = 1 in "str" |} = + infer_exp_str {| let a = 1 in "str" |}; + [%expect{| string |}] + + +let%expect_test "let poly" = + infer_exp_str {| let a = fun x -> x in a |}; + [%expect {| 'c -> 'c |}] + + +let%expect_test "let poly 2" = + infer_exp_str {| let a = fun x -> x in (a 1, a "str") |}; + [%expect{| int * string |}] + + +let%expect_test "poly in env" = + let env = ["=", Type_arrow (Quant_type_var "a", Type_arrow(Quant_type_var "a", type_bool))] in + infer_exp_str {| let a = 1 in 1 = 1, "str" = "str" |} ~env; + [%expect{| bool * bool |}] + + +let%expect_test {| let a, b = 1, 2 in a |} = + infer_exp_str {| let a, b = 1, 2 in a |} ; + [%expect{| int |}] + + +let%expect_test {| let a, b, c = 1, 2 in a |} = + infer_exp_str {| let a, b, c = 1, 2 in a |} ; + [%expect{| Cannot unify tuples of different sizes |}] + + +let%expect_test {| let a, b = 1, 2, 3 in a |} = + infer_exp_str {| let a, b = 1, 2, 3 in a |}; + [%expect{| Cannot unify tuples of different sizes |}] + + +let%expect_test "let and" = + infer_exp_str {| let a = 1 and b = "punk" in b |}; + [%expect {| string |}] + + +let%expect_test "factorial" = + infer_exp_str {| let rec fac n = if n = 1 then 1 else n * fac (n-1) in fac 4 |} ~env; + [%expect {| int |}] + + +let%expect_test "fibonacci" = + infer_exp_str {| let rec fib n = if n <= 1 then 1 else (fib (n-1)) + (fib (n-2)) in fib 4 |} ~env; + [%expect {| int |}] + + +let%expect_test "mutual recursion" = + let env = env @ ["not", Type_arrow (type_bool, type_bool); "true", type_bool] in + infer_exp_str ~env {| + let rec is_odd n = + if n = 0 then true else is_even (n-1) + and is_even n = not (is_odd n) + in is_odd 5 + |}; + [%expect {| bool |}] + + +let%expect_test "shadow with itself" = + infer_prog_str + {| + let test3 a b c = + let a = print_int a in 0 + |}; +[%expect{| val test3 : int -> 'a -> 'b -> int |}];; + + +let%expect_test "shadow" = + infer_prog_str {| + let x = 5 + + let x = "string" + |}; +[%expect {| + val x : string |}] + + +let%expect_test "shadow with itself 2" = + infer_prog_str {| + let x = 5 + + let x = x + |}; +[%expect {| + val x : int |}] + + +let%expect_test "weird let rec" = + infer_prog_str {| let rec x = x |}; +[%expect{| This kind of expression is not allowed as right-hand side of `let rec' |}] + + +let%expect_test "too polymorphic1" = + infer_prog_str {| let map f p = let (a,b) = p in (f a, f b) |}; +[%expect {| val map : ('a -> 'b) -> 'a * 'a -> 'b * 'b |}] diff --git a/XML/many_tests/unit/infer.mli b/XML/many_tests/unit/infer.mli new file mode 100644 index 00000000..26ede79c --- /dev/null +++ b/XML/many_tests/unit/infer.mli @@ -0,0 +1,3 @@ +(** Copyright 2025-2026, Mikhail Gavrilenko,Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/unit/ll.ml b/XML/many_tests/unit/ll.ml new file mode 100644 index 00000000..1858c8b7 --- /dev/null +++ b/XML/many_tests/unit/ll.ml @@ -0,0 +1,160 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + + +open Middleend.Cc +open Middleend.Anf +open Middleend.Pprinter +open Middleend.Ll +open Common.Parser + +let to_ll str = + let prog = parse_str str in + let ccprog = cc_program prog in + let aprog = anf_program ccprog in + let llprog = lambda_lift_program aprog in + print_anf_program Format.std_formatter llprog + + +let%expect_test "020closures.pdf" = +to_ll {| + let fac n = + let rec fack n k = + if n <= 1 then k 1 + else fack (n-1) (fun m -> k (m * n)) + in + fack n (fun x -> x) +|}; +[%expect {| + let fac = fun n -> let t_12 = t_11__ll$1 fack n + in let t_13 = t_13__ll$3 in let t_14 = t_12 t_13 in t_14;; + let t_6__ll$2 = fun k n m -> let t_4 = (m * n) in let t_5 = k t_4 in t_5;; + let t_11__ll$1 = fun fack n + k -> let t_0 = (n <= 1) + in let t_10 = if t_0 then let t_1 = k 1 in t_1 else + let t_2 = (n - 1) + in let t_3 = fack t_2 + in let t_7 = t_6__ll$2 k + in let t_8 = t_7 n + in let t_9 = t_3 t_8 + in t_9 + in t_10;; + let t_13__ll$3 = fun x -> x;; |}] + + +let%expect_test "LN_CC_1" = +to_ll {| + let f x = + let g y = x + in + g +|}; +[%expect {| + let f = fun x -> let t_1 = t_0__ll$1 x in let g = t_1 in let t_2 = g x in t_2;; + + let t_0__ll$1 = fun x y -> x;; |}] + + +let%expect_test "LN_CC_2" = +to_ll {| + fun x -> x + a + b +|}; +[%expect {| + let t_3 = t_2__ll$1 a in let t_4 = t_3 b in t_4;; + let t_2__ll$1 = fun a b x -> let t_0 = (x + a) in let t_1 = (t_0 + b) in t_1;; |}] + + +let%expect_test "LN_CC_3" = +to_ll {| + let mkPair = fun x -> + let add = fun y -> x + y in + let mul = fun z -> x * z in + (add, mul) +|}; +[%expect {| + let mkPair = fun x -> let t_2 = t_1__ll$1 x + in let add = t_2 + in let t_5 = t_4__ll$2 x + in let mul = t_5 + in let t_6 = add x + in let t_7 = mul x + in let t_8 = alloc(t_6, + t_7) in t_8;; + let t_1__ll$1 = fun x y -> let t_0 = (x + y) in t_0;; + let t_4__ll$2 = fun x z -> let t_3 = (x * z) in t_3;; |}] + + +let%expect_test "if" = +to_ll {| + let f = + let ret1 = 1 in + let ret2 = 2 in + let greater_10 x = if x > 10 then ret1 else ret2 in + greater_10 +|}; +[%expect {| + let f = let t_0 = 1 + in let ret1 = t_0 + in let t_1 = 2 + in let ret2 = t_1 + in let t_5 = t_4__ll$1 ret1 + in let t_6 = t_5 ret2 + in let greater_10 = t_6 + in let t_7 = greater_10 ret1 + in let t_8 = t_7 ret2 in t_8;; + let t_4__ll$1 = fun ret1 ret2 + x -> let t_2 = (x > 10) + in let t_3 = if t_2 then ret1 else ret2 in t_3;; |}] + + +let%expect_test "tuple" = +to_ll {| + let tuples = + let a, b = 10, 20 in + let to_tuple x = x, a, b in + to_tuple +|}; +[%expect {| + let tuples = let t_0 = alloc(10, 20) + in let t_8 = t_0[0] + in let a = t_8 + in let t_7 = t_0[8] + in let b = t_7 + in let t_3 = t_2__ll$1 a + in let t_4 = t_3 b + in let to_tuple = t_4 + in let t_5 = to_tuple a + in let t_6 = t_5 b + in t_6;; + let t_2__ll$1 = fun a b x -> let t_1 = alloc(x, a, b) in t_1;; |}] + + +let%expect_test "func" = +to_ll {| + let f x y = + ((fun x -> fun x -> y) x) x +|}; +[%expect {| + let f = fun x + y -> let t_3 = t_2__ll$1 y + in let t_4 = t_3 x in let t_5 = t_4 x in t_5;; + let t_0__ll$2 = fun y x -> y;; + let t_2__ll$1 = fun y x -> let t_1 = t_0__ll$2 y in t_1;; |}] + + +let%expect_test "load" = +to_ll {| + let f x = + let g y = + let (a, b) = x, y in + a in g +|}; +[%expect{| + let f = fun x -> let t_4 = t_3__ll$1 x in let g = t_4 in let t_5 = g x in t_5;; + + let t_3__ll$1 = fun x + y -> let t_0 = alloc(x, y) + in let t_2 = t_0[0] + in let a = t_2 + in let t_1 = t_0[8] in let b = t_1 in a;; |}] diff --git a/XML/many_tests/unit/ll.mli b/XML/many_tests/unit/ll.mli new file mode 100644 index 00000000..536d103e --- /dev/null +++ b/XML/many_tests/unit/ll.mli @@ -0,0 +1,3 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/many_tests/unit/machine.ml b/XML/many_tests/unit/machine.ml new file mode 100644 index 00000000..988f9e83 --- /dev/null +++ b/XML/many_tests/unit/machine.ml @@ -0,0 +1,272 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Backend.Machine + +let test x = + if x then Format.fprintf Format.std_formatter "true" else Format.fprintf Format.std_formatter "false" + +let%expect_test "addi" = + test (Addi (SP, SP, 8) = Addi (SP, SP, 8)); + [%expect {| true |}] + +let%expect_test "addi false" = + test (Addi (SP, SP, 8) = Addi (SP, SP, 16)); + [%expect {| false |}] + +let%expect_test "add" = + test (Add (SP, SP, A 0) = Add (SP, SP, A 0)); + [%expect {| true |}] + +let%expect_test "add false" = + test (Add (SP, SP, A 0) = Add (SP, A 0, SP)); + [%expect {| false |}] + +let%expect_test "sub" = + test (Sub (SP, SP, A 0) = Sub (SP, SP, A 0)); + [%expect {| true |}] + +let%expect_test "sub false" = + test (Sub (SP, SP, A 0) = Sub (SP, A 0, SP)); + [%expect {| false |}] + +let%expect_test "mul" = + test (Mul (SP, SP, A 0) = Mul (SP, SP, A 0)); + [%expect {| true |}] + +let%expect_test "mul false" = + test (Mul (SP, SP, A 0) = Mul (SP, A 0, SP)); + [%expect {| false |}] + +let%expect_test "slt" = + test (Slt (SP, SP, A 0) = Slt (SP, SP, A 0)); + [%expect {| true |}] + +let%expect_test "slt false" = + test (Slt (SP, SP, A 0) = Slt (SP, A 0, SP)); + [%expect {| false |}] + +let%expect_test "seqz" = + test (Seqz (SP, A 0) = Seqz (SP, A 0)); + [%expect {| true |}] + +let%expect_test "seqz false" = + test (Seqz (SP, A 0) = Seqz (A 0, SP)); + [%expect {| false |}] + +let%expect_test "snez" = + test (Snez (SP, A 0) = Snez (SP, A 0)); + [%expect {| true |}] + +let%expect_test "snez false" = + test (Snez (SP, A 0) = Snez (A 0, SP)); + [%expect {| false |}] + +let%expect_test "xor" = + test (Xor (SP, SP, A 0) = Xor (SP, SP, A 0)); + [%expect {| true |}] + +let%expect_test "xor false" = + test (Xor (SP, SP, A 0) = Xor (SP, A 0, SP)); + [%expect {| false |}] + +let%expect_test "xori" = + test (Xori (SP, SP, 8) = Xori (SP, SP, 8)); + [%expect {| true |}] + +let%expect_test "xori false" = + test (Xori (SP, SP, 8) = Xori (SP, SP, 16)); + [%expect {| false |}] + +let%expect_test "beq" = + test (Beq (SP, A 0, "label1") = Beq (SP, A 0, "label1")); + [%expect {| true |}] + +let%expect_test "beq false reg" = + test (Beq (SP, A 0, "label1") = Beq (A 0, SP, "label1")); + [%expect {| false |}] + +let%expect_test "beq false label" = + test (Beq (SP, A 0, "label1") = Beq (SP, A 0, "label2")); + [%expect {| false |}] + +let%expect_test "blt" = + test (Blt (SP, A 0, "label1") = Blt (SP, A 0, "label1")); + [%expect {| true |}] + +let%expect_test "blt false reg" = + test (Blt (SP, A 0, "label1") = Blt (A 0, SP, "label1")); + [%expect {| false |}] + +let%expect_test "blt false label" = + test (Blt (SP, A 0, "label1") = Blt (SP, A 0, "label2")); + [%expect {| false |}] + +let%expect_test "ble" = + test (Ble (SP, A 0, "label1") = Ble (SP, A 0, "label1")); + [%expect {| true |}] + +let%expect_test "ble false reg" = + test (Ble (SP, A 0, "label1") = Ble (A 0, SP, "label1")); + [%expect {| false |}] + +let%expect_test "ble false label" = + test (Ble (SP, A 0, "label1") = Ble (SP, A 0, "label2")); + [%expect {| false |}] + +let%expect_test "lla" = + test (Lla (SP, "label1") = Lla (SP, "label1")); + [%expect {| true |}] + +let%expect_test "lla false reg" = + test (Lla (SP, "label1") = Lla (A 0, "label1")); + [%expect {| false |}] + +let%expect_test "lla false label" = + test (Lla (SP, "label1") = Lla (SP, "label2")); + [%expect {| false |}] + +let%expect_test "li" = + test (Li (SP, 42) = Li (SP, 42)); + [%expect {| true |}] + +let%expect_test "li false reg" = + test (Li (SP, 42) = Li (A 0, 42)); + [%expect {| false |}] + +let%expect_test "li false imm" = + test (Li (SP, 42) = Li (SP, 100)); + [%expect {| false |}] + +let%expect_test "ld" = + test (Ld (SP, (SP, 8)) = Ld (SP, (SP, 8))); + [%expect {| true |}] + +let%expect_test "ld false reg" = + test (Ld (SP, (SP, 8)) = Ld (A 0, (SP, 8))); + [%expect {| false |}] + +let%expect_test "ld false reg" = + test (Ld (SP, (SP, 8)) = Ld (SP, (A 0, 8))); + [%expect {| false |}] + +let%expect_test "ld false imm" = + test (Ld (SP, (SP, 8)) = Ld (SP, (SP, 16))); + [%expect {| false |}] + +let%expect_test "sd" = + test (Sd (SP, (SP, 8)) = Sd (SP, (SP, 8))); + [%expect {| true |}] + +let%expect_test "sd false reg" = + test (Sd (SP, (SP, 8)) = Sd (A 0, (SP, 8))); + [%expect {| false |}] + +let%expect_test "sd false reg" = + test (Sd (SP, (SP, 8)) = Sd (SP, (A 0, 8))); + [%expect {| false |}] + +let%expect_test "sd false imm" = + test (Sd (SP, (SP, 8)) = Sd (SP, (SP, 16))); + [%expect {| false |}] + +let%expect_test "mv" = + test (Mv (SP, A 0) = Mv (SP, A 0)); + [%expect {| true |}] + +let%expect_test "mv false" = + test (Mv (SP, A 0) = Mv (A 0, SP)); + [%expect {| false |}] + +let%expect_test "comment" = + test (Comment "test comment" = Comment "test comment"); + [%expect {| true |}] + +let%expect_test "comment false" = + test (Comment "test comment" = Comment "different comment"); + [%expect {| false |}] + +let%expect_test "label" = + test (Label "label1" = Label "label1"); + [%expect {| true |}] + +let%expect_test "label false" = + test (Label "label1" = Label "label2"); + [%expect {| false |}] + +let%expect_test "call" = + test (Call "func1" = Call "func1"); + [%expect {| true |}] + +let%expect_test "call false" = + test (Call "func1" = Call "func2"); + [%expect {| false |}] + +let%expect_test "j" = + test (J "label1" = J "label1"); + [%expect {| true |}] + +let%expect_test "j false" = + test (J "label1" = J "label2"); + [%expect {| false |}] + +let%expect_test "ecall" = + test (Ecall = Ecall); + [%expect {| true |}] + +let%expect_test "ecall false" = + test (Ecall = Ret); + [%expect {| false |}] + +let%expect_test "ret" = + test (Ret = Ret); + [%expect {| true |}] + +let%expect_test "ret false" = + test (Ret = Ecall); + [%expect {| false |}] + +let%expect_test "la" = + test (La (SP, "func1") = La (SP, "func1")); + [%expect {| true |}] + +let%expect_test "la false reg" = + test (La (SP, "func1") = La (A 0, "func1")); + [%expect {| false |}] + +let%expect_test "la false label" = + test (La (SP, "func1") = La (SP, "func2")); + [%expect {| false |}] + +let%expect_test "slli" = + test (Slli (SP, A 0, 4) = Slli (SP, A 0, 4)); + [%expect {| true |}] + +let%expect_test "slli false reg dest" = + test (Slli (SP, A 0, 4) = Slli (A 0, A 0, 4)); + [%expect {| false |}] + +let%expect_test "slli false reg src" = + test (Slli (SP, A 0, 4) = Slli (SP, SP, 4)); + [%expect {| false |}] + +let%expect_test "slli false imm" = + test (Slli (SP, A 0, 4) = Slli (SP, A 0, 8)); + [%expect {| false |}] + +let%expect_test "srai" = + test (Srai (SP, A 0, 4) = Srai (SP, A 0, 4)); + [%expect {| true |}] + +let%expect_test "srai false reg dest" = + test (Srai (SP, A 0, 4) = Srai (A 0, A 0, 4)); + [%expect {| false |}] + +let%expect_test "srai false reg src" = + test (Srai (SP, A 0, 4) = Srai (SP, SP, 4)); + [%expect {| false |}] + +let%expect_test "srai false imm" = + test (Srai (SP, A 0, 4) = Srai (SP, A 0, 8)); + [%expect {| false |}] \ No newline at end of file diff --git a/XML/many_tests/unit/machine.mli b/XML/many_tests/unit/machine.mli new file mode 100644 index 00000000..536d103e --- /dev/null +++ b/XML/many_tests/unit/machine.mli @@ -0,0 +1,3 @@ +(** Copyright 2026, Mikhail Gavrilenko, Danila Rudnev-Stepanyan, Daniel Vlasenko *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/XML/out.ll b/XML/out.ll deleted file mode 100644 index d187d623..00000000 --- a/XML/out.ll +++ /dev/null @@ -1,12 +0,0 @@ -; ModuleID = 'main' -source_filename = "main" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" - -declare void @print_int(i64) - -define i64 @main() { -entry: - call void @print_int(i64 70) - ret i64 0 -} diff --git a/XML/test_qc/test_qc.ml b/XML/test_qc/test_qc.ml index fa125b7f..718195a1 100644 --- a/XML/test_qc/test_qc.ml +++ b/XML/test_qc/test_qc.ml @@ -12,7 +12,27 @@ module AST = struct type t = | Const of (int[@gen QCheck.Gen.return 1]) | Add of t * t - [@@deriving qcheck, show { with_path = false }] + [@@deriving show { with_path = false }] + + let rec gen_sized n = + match n with + | 0 -> QCheck.Gen.map (fun gen0 -> Const gen0) (QCheck.Gen.return 1) + | _ -> + QCheck.Gen.oneof_weighted + [ 1, QCheck.Gen.map (fun gen0 -> Const gen0) (QCheck.Gen.return 1) + ; ( 1 + , QCheck.Gen.map + (fun (gen0, gen1) -> Add (gen0, gen1)) + (QCheck.Gen.pair (gen_sized (n / 2)) (gen_sized (n / 2))) ) + ] + ;; + + let _ = gen_sized + let gen = QCheck.Gen.sized gen_sized + let _ = gen + let arb_sized n = QCheck.make (gen_sized n) + let _ = arb_sized + let arb = QCheck.make gen end module PP = struct