diff --git a/EML/.gitignore b/EML/.gitignore new file mode 100644 index 00000000..0e5f1e4b --- /dev/null +++ b/EML/.gitignore @@ -0,0 +1,3 @@ +/_build +/_coverage + diff --git a/EML/.ocamlformat b/EML/.ocamlformat new file mode 100644 index 00000000..25919d0e --- /dev/null +++ b/EML/.ocamlformat @@ -0,0 +1,3 @@ +version=0.27.0 +profile=janestreet + diff --git a/EML/EML.opam b/EML/EML.opam new file mode 100644 index 00000000..2f851515 --- /dev/null +++ b/EML/EML.opam @@ -0,0 +1,36 @@ +opam-version: "2.0" +synopsis: "A short LLVM demo" +maintainer: ["Victoria Ostrovskaya & Danil Usoltsev"] +authors: ["Victoria Ostrovskaya & Danil Usoltsev"] +license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" +homepage: "https://github.com/Kakadu/comp24" +bug-reports: "https://github.com/Kakadu/comp24/issues" +depends: [ + "ocaml" + "dune" {>= "3.8" & = "3.19.1"} + "angstrom" {= "0.16.0"} + "qcheck" + "bisect_ppx" + "ppx_expect" + "llvm" {= "18-shared"} + "qcheck" {with-tests} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/Kakadu/comp24.git" +depexts: [ + [ "llvm-18-dev" "clang" "gcc-riscv64-linux-gnu" "qemu-user" ] {os-distribution = "ubuntu"} +] diff --git a/EML/EML.opam.template b/EML/EML.opam.template new file mode 100644 index 00000000..f4e537bf --- /dev/null +++ b/EML/EML.opam.template @@ -0,0 +1,7 @@ +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/EML/Makefile b/EML/Makefile new file mode 100644 index 00000000..c0615678 --- /dev/null +++ b/EML/Makefile @@ -0,0 +1,20 @@ +.PHONY: all test +all: + dune build + +test: + dune test + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect bin/ + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/language dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml new file mode 100644 index 00000000..06c4b14b --- /dev/null +++ b/EML/bin/EML.ml @@ -0,0 +1,159 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Stdio +open EML_lib +open Middleend + +type backend = + | Ricsv + | Llvm + +type opts = + { input_file : string option + ; output_file : string option + ; enable_gc : bool + ; backend : backend + ; infer_only : bool + } + +let default_opts = + { input_file = None + ; output_file = None + ; enable_gc = false + ; backend = Ricsv + ; infer_only = false + } +;; + +type env = Inferencer.TypeEnv.t + +let report_parse_error oc s = + Out_channel.output_string oc (Format.asprintf "Parsing error: %s\n" s) +;; + +let report_infer_error oc e = + Out_channel.output_string + oc + (Format.asprintf "Inferencer error: %a\n" Inferencer.pp_error e) +;; + +let with_frontend text oc f_success : (env, unit) Result.t = + match Frontend.Runner.run text with + | Error (Frontend.Runner.Parse s) -> + report_parse_error oc s; + Error () + | Ok ast -> f_success ast +;; + +let with_middleend ast env f : (env, unit) Result.t = + match Middleend.Runner.run ast env with + | Error e_mid -> + Format.eprintf "Middleend error: %a\n%!" Middleend.Runner.pp_error e_mid; + Error () + | Ok (anf_ast, env') -> f anf_ast env' +;; + +let run_compile text env oc ~backend ~enable_gc : (env, unit) Result.t = + with_frontend text oc (fun ast -> + with_middleend ast env (fun anf_ast env' -> + let ppf = Format.formatter_of_out_channel oc in + let res = + match backend with + | Ricsv -> Backend.Ricsv.Runner.gen_program ~enable_gc ppf anf_ast + | Llvm -> Backend.Llvm_ir.Runner.gen_program ~enable_gc ppf anf_ast + in + match res with + | Ok () -> Ok env' + | Error msg -> + Format.eprintf "Codegen error: %s\n%!" msg; + Error ())) +;; + +let run_infer_only text env oc : (env, unit) Result.t = + match Frontend.Parser.parse text with + | Error s -> + report_parse_error oc s; + Error () + | Ok ast -> + (match Inferencer.ResultMonad.run (Inferencer.infer_structure env ast) with + | Error e -> + report_infer_error oc e; + Error () + | Ok (_subst, env') -> + let filtered_env = + Base.Map.filter_keys env' ~f:(fun key -> not (Base.Map.mem env key)) + in + Base.Map.iteri filtered_env ~f:(fun ~key ~data -> + match data with + | Inferencer.Scheme.Scheme (_, ty) -> + Out_channel.output_string + oc + (Format.asprintf "val %s: %a\n" key Frontend.Ast.pp_ty ty)); + Ok env') +;; + +let compiler opts : (unit, unit) Result.t = + let run text env oc = + if opts.infer_only + then run_infer_only text env oc + else run_compile text env oc ~backend:opts.backend ~enable_gc:opts.enable_gc + in + let env0 = + if opts.enable_gc + then Middleend.Inferencer.TypeEnv.env_with_gc + else Middleend.Inferencer.TypeEnv.initial_env + in + let with_output f = + match opts.output_file with + | Some path -> Out_channel.with_file path ~f + | None -> f Out_channel.stdout + in + let input = + match opts.input_file with + | Some path -> In_channel.read_all path |> String.trim + | None -> In_channel.input_all stdin |> String.trim + in + match with_output (fun oc -> run input env0 oc) with + | Ok _env -> Ok () + | Error () -> Error () +;; + +let parse_args () : (opts, unit) Result.t = + let parse_backend = function + | "llvm" -> Ok Llvm + | "ricsv" -> Ok Ricsv + | _ -> Error () + in + let rec loop current_opts = function + | [] -> Ok current_opts + | "-gc" :: rest -> loop { current_opts with enable_gc = true } rest + | "-infer" :: rest -> loop { current_opts with infer_only = true } rest + | "-fromfile" :: path :: rest -> + loop { current_opts with input_file = Some path } rest + | "-o" :: path :: rest -> loop { current_opts with output_file = Some path } rest + | "-backend" :: backend_name :: rest -> + (match parse_backend backend_name with + | Ok backend -> loop { current_opts with backend } rest + | Error () -> Error ()) + | argument :: _ when String.length argument > 0 && Char.equal argument.[0] '-' -> + Error () + | _positional_argument :: _ -> Error () + in + let argv = Array.to_list Sys.argv in + match argv with + | [] -> Ok default_opts + | _program_name :: arguments -> loop default_opts arguments +;; + +let () = + match parse_args () with + | Error () -> + Format.eprintf "Positional arguments are not supported\n"; + exit 1 + | Ok opts -> + (match compiler opts with + | Error () -> exit 1 + | Ok () -> ()) +;; diff --git a/EML/bin/dune b/EML/bin/dune new file mode 100644 index 00000000..c2e348c7 --- /dev/null +++ b/EML/bin/dune @@ -0,0 +1,4 @@ +(executable + (name EML) + (modes byte exe) + (libraries stdio base EML.lib)) diff --git a/EML/dune-project b/EML/dune-project new file mode 100644 index 00000000..8d153d2c --- /dev/null +++ b/EML/dune-project @@ -0,0 +1,18 @@ +(lang dune 3.8) + +(name EML) + +(generate_opam_files false) + +(source + (github Kakadu/comp24)) + +(authors "Victoria Ostrovskaya & Danil Usoltsev") + +(maintainers "Victoria Ostrovskaya & Danil Usoltsev") + +(license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") + + + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/EML/lib/backend/llvm_ir/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml new file mode 100644 index 00000000..450b23ac --- /dev/null +++ b/EML/lib/backend/llvm_ir/analysis.ml @@ -0,0 +1,112 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +open Middleend.Anf +open Runtime.Primitives + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; is_rec : bool + } + +type analysis_result = + { functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +let rec params_of_anf = function + | AnfExpr (ComplexLambda (pats, body)) -> + let imms = + List.filter_map + (function + | PatVariable id -> Some (ImmediateVar id) + | _ -> None) + pats + in + let rest, inner = params_of_anf body in + imms @ rest, inner + | other -> [], other +;; + +let analyze (program : anf_program) = + let raw = + List.filter_map + (function + | AnfValue (rec_flag, (func_name, arity, body), _) -> + let params, body = params_of_anf body in + Some (func_name, arity, params, body, rec_flag = Rec) + | AnfEval _ -> None) + program + in + let mangle_reserved name = + if is_reserved name + then "eml_" ^ name + else if String.equal name "_start" + then "eml_start" + else name + in + let functions, _ = + List.fold_left + (fun (reversed_functions, counts) (func_name, _arity, params, body, is_rec) -> + let base_asm_name = mangle_reserved func_name in + let duplicate_index = + Base.Map.find counts func_name |> Option.value ~default:0 + in + let updated_counts = + Base.Map.set counts ~key:func_name ~data:(duplicate_index + 1) + in + let asm_name = + if duplicate_index = 0 + then base_asm_name + else base_asm_name ^ "_" ^ Int.to_string duplicate_index + in + ( { func_name; asm_name; params; body; is_rec } :: reversed_functions + , updated_counts )) + ([], Base.Map.empty (module Base.String)) + raw + in + let functions = List.rev functions in + let has_main = + List.exists (fun func_layout -> String.equal func_layout.func_name "main") functions + in + let functions = + if has_main + then functions + else ( + let synthetic_main = + { func_name = "main" + ; asm_name = "main" + ; params = [] + ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) + ; is_rec = false + } + in + functions @ [ synthetic_main ]) + in + let resolver func_index var_name = + let rec find i = + if i < 0 + then None + else ( + match Base.List.nth functions i with + | None -> None + | Some func_layout when String.equal func_layout.func_name var_name -> + Some (func_layout.asm_name, List.length func_layout.params) + | Some _ -> find (i - 1)) + in + let start_index = + match Base.List.nth functions func_index with + | Some func_layout + when func_layout.is_rec && String.equal func_layout.func_name var_name -> + func_index + | _ -> func_index - 1 + in + find start_index + in + { functions; resolve = resolver } +;; diff --git a/EML/lib/backend/llvm_ir/analysis.mli b/EML/lib/backend/llvm_ir/analysis.mli new file mode 100644 index 00000000..92ca3459 --- /dev/null +++ b/EML/lib/backend/llvm_ir/analysis.mli @@ -0,0 +1,20 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; is_rec : bool + } + +type analysis_result = + { functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +val analyze : anf_program -> analysis_result diff --git a/EML/lib/backend/llvm_ir/architecture.ml b/EML/lib/backend/llvm_ir/architecture.ml new file mode 100644 index 00000000..63224627 --- /dev/null +++ b/EML/lib/backend/llvm_ir/architecture.ml @@ -0,0 +1,117 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module Llvm_backend = struct + type instr = + | Add of llvalue * llvalue * string + | Sub of llvalue * llvalue * string + | Mul of llvalue * llvalue * string + | Sdiv of llvalue * llvalue * string + | Neg of llvalue * string + | Icmp of Icmp.t * llvalue * llvalue * string + | And of llvalue * llvalue * string + | Or of llvalue * llvalue * string + | Not of llvalue * string + | Load of lltype * llvalue * string + | Store of llvalue * llvalue + | Alloca of lltype * string + | Call of lltype * llvalue * llvalue array * string + | Ret of llvalue option + | Br of llbasicblock + | CondBr of llvalue * llbasicblock * llbasicblock + | Phi of (llvalue * llbasicblock) list * string + | Bitcast of llvalue * lltype * string + | PtrToInt of llvalue * lltype * string + | IntToPtr of llvalue * lltype * string + + let emit builder = function + | Add (left, right, name) -> Some (build_add left right name builder) + | Sub (left, right, name) -> Some (build_sub left right name builder) + | Mul (left, right, name) -> Some (build_mul left right name builder) + | Sdiv (left, right, name) -> Some (build_sdiv left right name builder) + | Neg (operand, name) -> Some (build_neg operand name builder) + | Icmp (cond, left, right, name) -> Some (build_icmp cond left right name builder) + | And (left, right, name) -> Some (build_and left right name builder) + | Or (left, right, name) -> Some (build_or left right name builder) + | Not (operand, name) -> Some (build_not operand name builder) + | Load (load_ty, ptr_value, name) -> Some (build_load load_ty ptr_value name builder) + | Store (value, ptr_value) -> + let (_ : Llvm.llvalue) = build_store value ptr_value builder in + None + | Alloca (alloca_ty, name) -> Some (build_alloca alloca_ty name builder) + | Call (ft, callee, args, name) -> Some (build_call ft callee args name builder) + | Ret None -> + let (_ : Llvm.llvalue) = build_ret_void builder in + None + | Ret (Some ret_value) -> + let (_ : Llvm.llvalue) = build_ret ret_value builder in + None + | Br block -> + let (_ : Llvm.llvalue) = build_br block builder in + None + | CondBr (cond, then_bb, else_bb) -> + let (_ : Llvm.llvalue) = build_cond_br cond then_bb else_bb builder in + None + | Phi (incoming, name) -> Some (build_phi incoming name builder) + | Bitcast (operand, dest_ty, name) -> + Some (build_bitcast operand dest_ty name builder) + | PtrToInt (operand, dest_ty, name) -> + Some (build_ptrtoint operand dest_ty name builder) + | IntToPtr (operand, dest_ty, name) -> + Some (build_inttoptr operand dest_ty name builder) + ;; + + let add builder left right name = emit builder (Add (left, right, name)) + let sub builder left right name = emit builder (Sub (left, right, name)) + let mul builder left right name = emit builder (Mul (left, right, name)) + let sdiv builder left right name = emit builder (Sdiv (left, right, name)) + let neg builder operand name = emit builder (Neg (operand, name)) + let icmp builder cond left right name = emit builder (Icmp (cond, left, right, name)) + let and_ builder left right name = emit builder (And (left, right, name)) + let or_ builder left right name = emit builder (Or (left, right, name)) + let not builder operand name = emit builder (Not (operand, name)) + let load builder load_ty ptr_value name = emit builder (Load (load_ty, ptr_value, name)) + let alloca builder alloca_ty name = emit builder (Alloca (alloca_ty, name)) + let call builder ft callee args name = emit builder (Call (ft, callee, args, name)) + let phi builder incoming name = emit builder (Phi (incoming, name)) + + let bitcast builder operand dest_ty name = + emit builder (Bitcast (operand, dest_ty, name)) + ;; + + let ptrtoint builder operand dest_ty name = + emit builder (PtrToInt (operand, dest_ty, name)) + ;; + + let inttoptr builder operand dest_ty name = + emit builder (IntToPtr (operand, dest_ty, name)) + ;; + + let store builder value ptr_value = + let (_ : Llvm.llvalue option) = emit builder (Store (value, ptr_value)) in + () + ;; + + let ret_void builder = + let (_ : Llvm.llvalue option) = emit builder (Ret None) in + () + ;; + + let ret builder ret_value = + let (_ : Llvm.llvalue option) = emit builder (Ret (Some ret_value)) in + () + ;; + + let br builder block = + let (_ : Llvm.llvalue option) = emit builder (Br block) in + () + ;; + + let cond_br builder cond then_bb else_bb = + let (_ : Llvm.llvalue option) = emit builder (CondBr (cond, then_bb, else_bb)) in + () + ;; +end diff --git a/EML/lib/backend/llvm_ir/architecture.mli b/EML/lib/backend/llvm_ir/architecture.mli new file mode 100644 index 00000000..8c61568a --- /dev/null +++ b/EML/lib/backend/llvm_ir/architecture.mli @@ -0,0 +1,52 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module Llvm_backend : sig + type instr = + | Add of llvalue * llvalue * string + | Sub of llvalue * llvalue * string + | Mul of llvalue * llvalue * string + | Sdiv of llvalue * llvalue * string + | Neg of llvalue * string + | Icmp of Icmp.t * llvalue * llvalue * string + | And of llvalue * llvalue * string + | Or of llvalue * llvalue * string + | Not of llvalue * string + | Load of lltype * llvalue * string + | Store of llvalue * llvalue + | Alloca of lltype * string + | Call of lltype * llvalue * llvalue array * string + | Ret of llvalue option + | Br of llbasicblock + | CondBr of llvalue * llbasicblock * llbasicblock + | Phi of (llvalue * llbasicblock) list * string + | Bitcast of llvalue * lltype * string + | PtrToInt of llvalue * lltype * string + | IntToPtr of llvalue * lltype * string + + val emit : llbuilder -> instr -> llvalue option + val add : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val sub : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val mul : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val sdiv : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val neg : llbuilder -> llvalue -> string -> llvalue option + val icmp : llbuilder -> Icmp.t -> llvalue -> llvalue -> string -> llvalue option + val and_ : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val or_ : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val not : llbuilder -> llvalue -> string -> llvalue option + val load : llbuilder -> lltype -> llvalue -> string -> llvalue option + val alloca : llbuilder -> lltype -> string -> llvalue option + val call : llbuilder -> lltype -> llvalue -> llvalue array -> string -> llvalue option + val phi : llbuilder -> (llvalue * llbasicblock) list -> string -> llvalue option + val bitcast : llbuilder -> llvalue -> lltype -> string -> llvalue option + val ptrtoint : llbuilder -> llvalue -> lltype -> string -> llvalue option + val inttoptr : llbuilder -> llvalue -> lltype -> string -> llvalue option + val store : llbuilder -> llvalue -> llvalue -> unit + val ret_void : llbuilder -> unit + val ret : llbuilder -> llvalue -> unit + val br : llbuilder -> llbasicblock -> unit + val cond_br : llbuilder -> llvalue -> llbasicblock -> llbasicblock -> unit +end diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml new file mode 100644 index 00000000..b274ac57 --- /dev/null +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -0,0 +1,770 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm +open Runtime.Primitives +open Architecture +open Llvm_backend +open Analysis +open Frontend.Ast +open Middleend.Anf +open Generator_state + +let tag_int n = 1 + (n lsl 1) +let tag_bool b = if b then 4 else 2 +let tag_char c = tag_int (Char.code c) +let context = global_context () +let builder = builder context +let int_t = i64_type context +let i32_t = i32_type context +let void_t = void_type context +let ptr_t = pointer_type context + +let lltype_of_arg = function + | Ptr -> ptr_t + | Int -> int_t + | I32 -> i32_t +;; + +let lltype_of_ret = function + | RPtr -> ptr_t + | RInt -> int_t + | RVoid -> void_t +;; + +let predefined_funcs = + List.map + (fun { name; ret; args } -> + let ret_t = lltype_of_ret ret in + let arg_t = Array.of_list (List.map lltype_of_arg args) in + name, function_type ret_t arg_t) + predefined_runtime_funcs +;; + +let predefined_init current_module = + List.fold_left + (fun (value_env, type_env) (function_name, function_type) -> + let function_value = declare_function function_name function_type current_module in + ( Base.Map.set value_env ~key:function_name ~data:function_value + , Base.Map.set type_env ~key:function_name ~data:function_type )) + (Base.Map.empty (module Base.String), Base.Map.empty (module Base.String)) + predefined_funcs +;; + +let emit_void builder instr : (unit, string) Result.t = + match emit builder instr with + | _ -> Ok () +;; + +let emit_void_st builder instr = + match emit_void builder instr with + | Ok () -> return () + | Error e -> fail e +;; + +let with_optional_value = function + | Some value -> return value + | None -> fail "Llvm_backend: expected value" +;; + +let lookup_func name = + let* value_opt = find_value_opt name in + match value_opt with + | Some func -> return func + | None -> + let* state = get in + (match lookup_function name state.current_module with + | Some func -> return func + | None -> fail ("Couldn't find value for key: " ^ name)) +;; + +let lookup_type name = + let* type_opt = find_type_opt name in + match type_opt with + | Some ty -> return ty + | None -> fail ("Couldn't find type for key: " ^ name) +;; + +let lookup_func_type name = + let* func_value = lookup_func name in + let* func_type = lookup_type name in + return (func_value, func_type) +;; + +let gen_simple_type name args = + let* func_value, func_type = lookup_func_type name in + let* res = + with_optional_value (call builder func_type func_value args ("boxed_" ^ name)) + in + return res +;; + +let imm_unit = + let* v = + with_optional_value (inttoptr builder (const_int int_t (tag_int 0)) ptr_t "unit") + in + return v +;; + +let imm_tagged_int i = + let* v = + with_optional_value + (inttoptr builder (const_int int_t (tag_int i)) ptr_t "tagged_int") + in + return v +;; + +let imm_tagged_bool b = + let* v = + with_optional_value + (inttoptr builder (const_int int_t (tag_bool b)) ptr_t "tagged_bool") + in + return v +;; + +let imm_tagged_char c = + let* v = + with_optional_value + (inttoptr builder (const_int int_t (tag_char c)) ptr_t "tagged_char") + in + return v +;; + +let untag_int_val tagged_val = + let* raw = with_optional_value (ptrtoint builder tagged_val int_t "raw_int") in + let* minus1 = with_optional_value (sub builder raw (const_int int_t 1) "minus1") in + let* result = + with_optional_value (sdiv builder minus1 (const_int int_t 2) "untagged") + in + return result +;; + +let tag_int_result i = + let* twice = with_optional_value (mul builder i (const_int int_t 2) "twice") in + let* tagged = with_optional_value (add builder twice (const_int int_t 1) "tagged") in + let* v = with_optional_value (inttoptr builder tagged ptr_t "result_int") in + return v +;; + +let untag_bool_val tagged_val = + let* raw = with_optional_value (ptrtoint builder tagged_val int_t "raw_bool") in + let* result = + with_optional_value (icmp builder Icmp.Eq raw (const_int int_t 4) "is_true") + in + return result +;; + +let tag_bool_result cond_value = + let tagged_i64 = + build_select cond_value (const_int int_t 4) (const_int int_t 2) "tagged_bool" builder + in + let* v = with_optional_value (inttoptr builder tagged_i64 ptr_t "result_bool") in + return v +;; + +let rec gen_imm = function + | ImmediateConst (ConstInt i) -> imm_tagged_int i + | ImmediateConst (ConstBool b) -> imm_tagged_bool b + | ImmediateConst (ConstChar c) -> imm_tagged_char c + | ImmediateConst (ConstString _s) -> imm_unit + | ImmediateVar id -> + let* value = + let* gc_allocas = get_gc_allocas in + match gc_allocas with + | Some allocas -> + (match Generator_state.map_find_opt allocas id with + | Some alloca -> + let* v = with_optional_value (load builder ptr_t alloca id) in + return v + | None -> + let* value_opt = find_value_opt id in + (match value_opt with + | Some v -> return v + | None -> + let* resolved_value = resolved_find_value_opt id in + (match resolved_value with + | None -> fail ("Unbound variable: " ^ id) + | Some v -> return v))) + | None -> + let* value_opt = find_value_opt id in + (match value_opt with + | Some v -> return v + | None -> + let* resolved_value = resolved_find_value_opt id in + (match resolved_value with + | None -> fail ("Unbound variable: " ^ id) + | Some v -> return v)) + in + (match classify_value value with + | ValueKind.Function when Array.length (params value) = 0 -> + let* type_opt = resolved_find_type_opt id in + let* func_type = + match type_opt with + | Some ty -> return ty + | None -> fail ("Missing type for: " ^ id) + in + let* v = with_optional_value (call builder func_type value [||] "call_0") in + return v + | _ -> + let* arity_opt = get_resolved_arity id in + maybe_closure value arity_opt) + +and get_resolved_arity name = + let* state = get in + return + (match state.resolve with + | Some resolver -> + (match resolver state.current_func_index name with + | Some (_, arity) -> Some arity + | None -> None) + | None -> None) + +and maybe_closure value arity_opt = + match classify_value value with + | ValueKind.Function -> + let arity = Option.value arity_opt ~default:(Array.length (params value)) in + let* function_ptr = + with_optional_value (bitcast builder value ptr_t "func_ptr_cast") + in + gen_simple_type "alloc_closure" [| function_ptr; const_int int_t arity |] + | _ -> return value +;; + +let gen_binop_native op left_v right_v = + match op with + | Plus -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (add builder l r "add") in + tag_int_result v + | Minus -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (sub builder l r "sub") in + tag_int_result v + | Multiply -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (mul builder l r "mul") in + tag_int_result v + | Division -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (sdiv builder l r "sdiv") in + tag_int_result v + | GreatestEqual -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Sge l r "icmp_sge") in + tag_bool_result v + | LowestEqual -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Sle l r "icmp_sle") in + tag_bool_result v + | GreaterThan -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Sgt l r "icmp_sgt") in + tag_bool_result v + | LowerThan -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Slt l r "icmp_slt") in + tag_bool_result v + | Equal -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Eq l r "icmp_eq") in + tag_bool_result v + | NotEqual -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Ne l r "icmp_ne") in + tag_bool_result v + | And -> + let* l = untag_bool_val left_v in + let* r = untag_bool_val right_v in + let* v = with_optional_value (and_ builder l r "and") in + tag_bool_result v + | Or -> + let* l = untag_bool_val left_v in + let* r = untag_bool_val right_v in + let* v = with_optional_value (or_ builder l r "or") in + tag_bool_result v +;; + +let gen_unop_native op tagged_val = + match op with + | Negative -> + let* int_val = untag_int_val tagged_val in + let* result = with_optional_value (neg builder int_val "neg") in + tag_int_result result + | Not -> + let* bool_val = untag_bool_val tagged_val in + let* result = with_optional_value (not builder bool_val "not") in + tag_bool_result result +;; + +let rec gen_cexpr = function + | ComplexImmediate imm -> gen_imm imm + | ComplexUnit -> imm_unit + | ComplexBinOper (op, left, right) -> + let* left_v = gen_imm left in + let* right_v = gen_imm right in + gen_binop_native op left_v right_v + | ComplexUnarOper (op, imm) -> + let* v = gen_imm imm in + gen_unop_native op v + | ComplexTuple (e1, e2, rest) -> + let* args = + List.fold_left + (fun acc imm -> + let* vs = acc in + let* v = gen_imm imm in + return (vs @ [ v ])) + (return []) + (e1 :: e2 :: rest) + in + let len = List.length args in + let arr_len = if len = 0 then 1 else len in + let arr_ty = Llvm.array_type ptr_t arr_len in + let* alloca_arr = + with_optional_value (Some (Llvm.build_alloca arr_ty "tuple_args" builder)) + in + let* () = + Base.List.foldi args ~init:(return ()) ~f:(fun i acc v -> + let* () = acc in + let* elem_ptr = + with_optional_value + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t i |] + "tuple_elem" + builder)) + in + let () = Llvm_backend.store builder v elem_ptr in + return ()) + in + let* args_ptr = + with_optional_value + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "tuple_args_ptr" + builder)) + in + let* create_tuple_func, create_tuple_type = lookup_func_type "create_tuple" in + with_optional_value + (call + builder + create_tuple_type + create_tuple_func + [| const_int int_t len; args_ptr |] + "boxed_create_tuple") + | ComplexField (tuple_imm, idx) -> + let* tuple_val = gen_imm tuple_imm in + let* field_func, field_type = lookup_func_type "field" in + let* v = + with_optional_value + (call + builder + field_type + field_func + [| tuple_val; const_int int_t (tag_int idx) |] + "field") + in + return v + | ComplexApp (ImmediateVar fname, first, rest) -> + let args_list = first :: rest in + if fname = "print_int" && List.length args_list = 1 + then + let* arg = gen_imm first in + let* tagged_i64 = + with_optional_value (ptrtoint builder arg int_t "print_int_arg") + in + let* print_int_func, print_int_type = lookup_func_type "print_int" in + let* () = + emit_void_st builder (Call (print_int_type, print_int_func, [| tagged_i64 |], "")) + in + imm_unit + else + let* callee_value, callee_from_alloca = + let resolve_callee () = + let* resolved_value = resolved_find_value_opt fname in + match resolved_value with + | Some v -> return (v, false) + | None -> + let* value_opt = find_value_opt fname in + (match value_opt with + | Some v -> return (v, false) + | None -> fail ("Unbound function: " ^ fname)) + in + let* gc_allocas = get_gc_allocas in + match gc_allocas with + | Some allocas -> + (match Generator_state.map_find_opt allocas fname with + | Some alloca -> + let* v = with_optional_value (load builder ptr_t alloca fname) in + return (v, true) + | None -> resolve_callee ()) + | None -> resolve_callee () + in + let* args = + List.fold_left + (fun acc imm -> + let* vs = acc in + let* v = gen_imm imm in + return (vs @ [ v ])) + (return []) + args_list + in + let args_values = Array.of_list args in + let num_args = Array.length args_values in + let is_direct_func = + match classify_value callee_value with + | ValueKind.Function -> Array.length (params callee_value) = num_args + | _ -> false + in + let is_zero_arg_with_unit = + match classify_value callee_value with + | ValueKind.Function -> Array.length (params callee_value) = 0 && num_args = 1 + | _ -> false + in + let use_direct = + callee_from_alloca = false + && (is_direct_func || is_zero_arg_with_unit) + && num_args <= 6 + in + if use_direct + then + let* type_opt = resolved_find_type_opt fname in + let* func_type = + match type_opt with + | Some ty -> return ty + | None -> fail ("Missing type for: " ^ fname) + in + let args_for_call = if is_zero_arg_with_unit then [||] else args_values in + with_optional_value + (call builder func_type callee_value args_for_call ("direct_" ^ fname)) + else + let* arity_opt = get_resolved_arity fname in + let* closure_value = maybe_closure callee_value arity_opt in + let* eml_applyN_func, eml_applyN_type = lookup_func_type "eml_applyN" in + if num_args = 0 + then ( + let arr_ty = Llvm.array_type ptr_t 1 in + let* alloca_arr = + with_optional_value (Some (Llvm.build_alloca arr_ty "apply_args" builder)) + in + let* args_ptr = + with_optional_value + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "apply_args_ptr" + builder)) + in + with_optional_value + (call + builder + eml_applyN_type + eml_applyN_func + [| closure_value; const_int int_t 0; args_ptr |] + "boxed_eml_applyN")) + else + let* _then_name, _else_name, merge_name = fresh_blocks in + let current_func = block_parent (insertion_block builder) in + let merge_block = append_block context merge_name current_func in + let blocks = + Array.init num_args (fun i -> + append_block context ("apply_step_" ^ Int.to_string i) current_func) + in + let apply_one_step closure arg name = + let one_ty = Llvm.array_type ptr_t 1 in + let a = Llvm.build_alloca one_ty "apply_one" builder in + let p = + Llvm.build_gep + one_ty + a + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "one_elem" + builder + in + Llvm_backend.store builder arg p; + Llvm_backend.call + builder + eml_applyN_type + eml_applyN_func + [| closure; const_int int_t 1; p |] + name + in + let result_vals = Array.make num_args (Llvm.const_null ptr_t) in + let* () = emit_void_st builder (Br blocks.(0)) in + let rec loop step_index = + if step_index >= num_args + then return () + else ( + let () = position_at_end blocks.(step_index) builder in + let* cur = + if step_index = 0 + then return closure_value + else + with_optional_value + (Llvm_backend.phi + builder + [ result_vals.(step_index - 1), blocks.(step_index - 1) ] + ("cur_" ^ Int.to_string step_index)) + in + let* step_result = + with_optional_value + (apply_one_step + cur + args_values.(step_index) + ("apply_step_" ^ Int.to_string step_index)) + in + let () = result_vals.(step_index) <- step_result in + let* () = + emit_void_st + builder + (if step_index < num_args - 1 + then Br blocks.(step_index + 1) + else Br merge_block) + in + loop (step_index + 1)) + in + let* () = loop 0 in + let () = position_at_end merge_block builder in + let* final_val = + with_optional_value + (Llvm_backend.phi + builder + [ result_vals.(num_args - 1), blocks.(num_args - 1) ] + "apply_result") + in + return final_val + | ComplexApp (_, _, _) -> + fail "LLVM codegen: ComplexApp with non-variable function not supported" + | ComplexBranch (cond_imm, then_e, else_e) -> + let* cond_val = gen_imm cond_imm in + let* bool_val = untag_bool_val cond_val in + let current_func = block_parent (insertion_block builder) in + let* then_name, else_name, merge_name = fresh_blocks in + let then_block = append_block context then_name current_func in + let else_block = append_block context else_name current_func in + let merge_block = append_block context merge_name current_func in + let* () = emit_void_st builder (CondBr (bool_val, then_block, else_block)) in + position_at_end then_block builder; + let* then_val = gen_anf then_e in + let* () = emit_void_st builder (Br merge_block) in + let then_bb = insertion_block builder in + position_at_end else_block builder; + let* else_val = gen_anf else_e in + let* () = emit_void_st builder (Br merge_block) in + let else_bb = insertion_block builder in + position_at_end merge_block builder; + let* v = + with_optional_value + (phi builder [ then_val, then_bb; else_val, else_bb ] "ite_result") + in + return v + | ComplexList _ | ComplexOption _ | ComplexLambda _ -> + fail "LLVM codegen: List/Option/Lambda not yet implemented" + +and gen_anf = function + | AnfExpr cexp -> gen_cexpr cexp + | AnfLet (_, name, cexp, cont) -> + let* value = gen_cexpr cexp in + let* () = + let* gc_allocas = get_gc_allocas in + match gc_allocas with + | Some allocas -> + let* alloca = + match Generator_state.map_find_opt allocas name with + | Some a -> return a + | None -> + let* entry_opt = get_gc_entry_block in + let* entry_block = + match entry_opt with + | Some blk -> return blk + | None -> fail "gen_anf: gc_entry_block not set" + in + let current_block = insertion_block builder in + let* alloca_ptr = + if current_block = entry_block + then with_optional_value (Llvm_backend.alloca builder ptr_t name) + else ( + let () = + match Llvm.instr_begin entry_block with + | Llvm.Before first -> position_before first builder + | Llvm.At_end _ -> position_at_end entry_block builder + in + let* alloca_in_entry = + with_optional_value (Llvm_backend.alloca builder ptr_t name) + in + let () = position_at_end current_block builder in + return alloca_in_entry) + in + let* () = + set_gc_allocas (Some (Base.Map.set allocas ~key:name ~data:alloca_ptr)) + in + return alloca_ptr + in + let* () = set_gc_allocas (Some (Base.Map.set allocas ~key:name ~data:alloca)) in + emit_void_st builder (Store (value, alloca)) + | None -> set_value name value + in + gen_anf cont +;; + +let declare_function (func_layout : function_layout) state = + let arg_types = Array.make (List.length func_layout.params) ptr_t in + let func_type = function_type ptr_t arg_types in + let func = declare_function func_layout.asm_name func_type state.current_module in + { state with + value_env = Base.Map.set state.value_env ~key:func_layout.asm_name ~data:func + ; type_env = Base.Map.set state.type_env ~key:func_layout.asm_name ~data:func_type + } +;; + +let emit_gc_prologue = + let* init_gc_func, init_gc_type = lookup_func_type "init_gc" in + let* set_ptr_func, set_ptr_type = lookup_func_type "set_ptr_stack" in + let* frameaddr_func, frameaddr_type = lookup_func_type "llvm.frameaddress.p0" in + let* () = emit_void_st builder (Call (init_gc_type, init_gc_func, [||], "")) in + let* frame_ptr = + with_optional_value + (call builder frameaddr_type frameaddr_func [| const_int i32_t 0 |] "frame") + in + emit_void_st builder (Call (set_ptr_type, set_ptr_func, [| frame_ptr |], "")) +;; + +let gen_function + (func_layout : function_layout) + ~enable_gc + ~is_entry + ~func_index + initial_state + = + let comp = + let* state = get in + let* () = put { state with current_func_index = func_index } in + let* func, _ = lookup_func_type func_layout.asm_name in + let entry_block = append_block context "entry" func in + position_at_end entry_block builder; + let* () = if enable_gc && is_entry then emit_gc_prologue else return () in + let* () = + if enable_gc + then set_gc_allocas (Some (Base.Map.empty (module Base.String))) + else return () + in + let* () = if enable_gc then set_gc_entry_block (Some entry_block) else return () in + let* () = set_value func_layout.asm_name func in + let* state_before_params = get in + let func_params = params func in + let* () = + Base.List.foldi func_layout.params ~init:(return ()) ~f:(fun param_index acc arg -> + let* () = acc in + match arg with + | ImmediateVar name -> + let* param_value = + if param_index >= 0 && param_index < Array.length func_params + then return (Array.get func_params param_index) + else fail "gen_function: param index out of bounds" + in + set_value_name name param_value; + if enable_gc + then ( + let* gc_allocas = get_gc_allocas in + let* allocas_map = + match gc_allocas with + | Some map -> return map + | None -> fail "gen_function: enable_gc but gc_allocas not set" + in + let* alloca_ptr = with_optional_value (alloca builder ptr_t name) in + store builder param_value alloca_ptr; + set_gc_allocas (Some (Base.Map.set allocas_map ~key:name ~data:alloca_ptr))) + else set_value name param_value + | ImmediateConst _ -> return ()) + in + let* body_value = gen_anf func_layout.body in + let* () = emit_void_st builder (Ret (Some body_value)) in + let* () = if enable_gc then set_gc_allocas None else return () in + let* () = if enable_gc then set_gc_entry_block None else return () in + let* state = get in + let value_env = + let without_params = + List.fold_left + (fun env -> function + | ImmediateVar name -> Base.Map.remove env name + | _ -> env) + state.value_env + func_layout.params + in + List.fold_left + (fun env -> function + | ImmediateVar name -> + (match Generator_state.map_find_opt state_before_params.value_env name with + | Some v -> Base.Map.set env ~key:name ~data:v + | None -> env) + | _ -> env) + without_params + func_layout.params + in + put + { state with + value_env = Base.Map.set value_env ~key:func_layout.asm_name ~data:func + } + in + run comp initial_state +;; + +let gen_program ~output_file ~enable_gc (program : anf_program) = + let llvm_module = create_module context "EML" in + let value_env, type_env = predefined_init llvm_module in + let { functions; resolve; _ } = analyze program in + let initial_state : Generator_state.state = + { value_env + ; type_env + ; current_module = llvm_module + ; gc_allocas = None + ; gc_entry_block = None + ; naming_state = Default_naming.init + ; resolve = Some resolve + ; current_func_index = 0 + } + in + (* [functions] is never empty: synthetic main is added when missing. *) + let entry_name = + match List.find_opt (fun func -> func.func_name = "main") functions with + | Some _ -> "main" + | None -> (List.rev functions |> List.hd).func_name + in + let state_after_declares = + List.fold_left (fun state func -> declare_function func state) initial_state functions + in + match + Base.List.foldi + functions + ~init:(Ok state_after_declares) + ~f:(fun idx acc func_layout -> + match acc with + | Error _ -> acc + | Ok state -> + let is_entry = func_layout.func_name = entry_name in + (match gen_function func_layout ~enable_gc ~is_entry ~func_index:idx state with + | Ok ((), state') -> Ok state' + | Error err -> Error err)) + with + | Error err -> Error err + | Ok _ -> + print_module output_file llvm_module; + Ok () +;; diff --git a/EML/lib/backend/llvm_ir/generator.mli b/EML/lib/backend/llvm_ir/generator.mli new file mode 100644 index 00000000..f19341f6 --- /dev/null +++ b/EML/lib/backend/llvm_ir/generator.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : output_file:string + -> enable_gc:bool + -> Middleend.Anf.anf_program + -> (unit, string) Result.t diff --git a/EML/lib/backend/llvm_ir/generator_state.ml b/EML/lib/backend/llvm_ir/generator_state.ml new file mode 100644 index 00000000..170342a9 --- /dev/null +++ b/EML/lib/backend/llvm_ir/generator_state.ml @@ -0,0 +1,119 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module type NAMING = sig + type t + + val init : t + val fresh_blocks : t -> (string * string * string) * t +end + +module Default_naming : NAMING = struct + type t = int + + let init = 0 + + let fresh_blocks n = + let then_name = "then_" ^ Int.to_string n in + let else_name = "else_" ^ Int.to_string n in + let merge_name = "merge_" ^ Int.to_string n in + (then_name, else_name, merge_name), n + 1 + ;; +end + +module Make (N : NAMING) = struct + type state = + { value_env : (string, llvalue, Base.String.comparator_witness) Base.Map.t + ; type_env : (string, lltype, Base.String.comparator_witness) Base.Map.t + ; current_module : llmodule + ; gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option + ; gc_entry_block : llbasicblock option + ; naming_state : N.t + ; resolve : (int -> string -> (string * int) option) option + ; current_func_index : int + } + + type 'a t = state -> ('a * state, string) Result.t + + let return x state = Ok (x, state) + + let bind m f state = + match m state with + | Ok (x, state') -> f x state' + | Error err -> Error err + ;; + + let ( let* ) = bind + let get state = Ok (state, state) + let put state _ = Ok ((), state) + + let modify f state = + match get state with + | Ok (current_state, _) -> put (f current_state) state + | Error err -> Error err + ;; + + let fail err = fun _ -> Error err + let map_find_opt (map : (string, 'a, _) Base.Map.t) key = Base.Map.find map key + let find_value_opt name state = Ok (Base.Map.find state.value_env name, state) + let find_type_opt name state = Ok (Base.Map.find state.type_env name, state) + + let resolve_key state name = + match state.resolve with + | None -> name + | Some resolver -> + (match resolver state.current_func_index name with + | Some (asm_name, _) -> asm_name + | None -> name) + ;; + + let resolved_find_value_opt name state = + let resolved_key = resolve_key state name in + Ok (Base.Map.find state.value_env resolved_key, state) + ;; + + let resolved_find_type_opt name state = + let resolved_key = resolve_key state name in + Ok (Base.Map.find state.type_env resolved_key, state) + ;; + + let set_value name value = + modify (fun state -> + { state with value_env = Base.Map.set state.value_env ~key:name ~data:value }) + ;; + + let set_type name lltype = + modify (fun state -> + { state with type_env = Base.Map.set state.type_env ~key:name ~data:lltype }) + ;; + + let remove_value name = + modify (fun state -> { state with value_env = Base.Map.remove state.value_env name }) + ;; + + let get_gc_allocas state = Ok (state.gc_allocas, state) + + let set_gc_allocas allocas_map = + modify (fun state -> { state with gc_allocas = allocas_map }) + ;; + + let get_gc_entry_block state = Ok (state.gc_entry_block, state) + + let set_gc_entry_block block = + modify (fun state -> { state with gc_entry_block = block }) + ;; + + let fresh_blocks = + let* state = get in + let triple, next = N.fresh_blocks state.naming_state in + let* () = put { state with naming_state = next } in + return triple + ;; + + let run m = m +end + +include Make (Default_naming) diff --git a/EML/lib/backend/llvm_ir/generator_state.mli b/EML/lib/backend/llvm_ir/generator_state.mli new file mode 100644 index 00000000..25e7f403 --- /dev/null +++ b/EML/lib/backend/llvm_ir/generator_state.mli @@ -0,0 +1,59 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module type NAMING = sig + type t + + val init : t + val fresh_blocks : t -> (string * string * string) * t +end + +module Default_naming : NAMING + +module Make (N : NAMING) : sig + type state = + { value_env : (string, llvalue, Base.String.comparator_witness) Base.Map.t + ; type_env : (string, lltype, Base.String.comparator_witness) Base.Map.t + ; current_module : llmodule + ; gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option + ; gc_entry_block : llbasicblock option + ; naming_state : N.t + ; resolve : (int -> string -> (string * int) option) option + ; current_func_index : int + } + + type 'a t = state -> ('a * state, string) Result.t + + val return : 'a -> 'a t + val bind : 'a t -> ('a -> 'b t) -> 'b t + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + val get : state t + val put : state -> unit t + val modify : (state -> state) -> unit t + val fail : string -> 'a t + val map_find_opt : (string, 'a, 'cmp) Base.Map.t -> string -> 'a option + val find_value_opt : string -> llvalue option t + val find_type_opt : string -> lltype option t + val resolved_find_value_opt : string -> llvalue option t + val resolved_find_type_opt : string -> lltype option t + val set_value : string -> llvalue -> unit t + val set_type : string -> lltype -> unit t + val remove_value : string -> unit t + + val get_gc_allocas + : (string, llvalue, Base.String.comparator_witness) Base.Map.t option t + + val set_gc_allocas + : (string, llvalue, Base.String.comparator_witness) Base.Map.t option + -> unit t + + val get_gc_entry_block : llbasicblock option t + val set_gc_entry_block : llbasicblock option -> unit t + val fresh_blocks : (string * string * string) t + val run : 'a t -> state -> ('a * state, string) Result.t +end + +include module type of Make (Default_naming) diff --git a/EML/lib/backend/llvm_ir/runner.ml b/EML/lib/backend/llvm_ir/runner.ml new file mode 100644 index 00000000..a8d0d532 --- /dev/null +++ b/EML/lib/backend/llvm_ir/runner.ml @@ -0,0 +1,21 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf + +let gen_program ~enable_gc ppf (program : anf_program) : (unit, string) Result.t = + let temp_ll_path = Filename.temp_file "eml_llvm" ".ll" in + let remove_temp_file_if_exists () = + if Sys.file_exists temp_ll_path then Sys.remove temp_ll_path + in + match Generator.gen_program ~output_file:temp_ll_path ~enable_gc program with + | Error err -> + remove_temp_file_if_exists (); + Error err + | Ok () -> + let content = In_channel.with_open_text temp_ll_path In_channel.input_all in + remove_temp_file_if_exists (); + Format.fprintf ppf "%s" content; + Ok () +;; diff --git a/EML/lib/backend/llvm_ir/runner.mli b/EML/lib/backend/llvm_ir/runner.mli new file mode 100644 index 00000000..2f4ff9fa --- /dev/null +++ b/EML/lib/backend/llvm_ir/runner.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : enable_gc:bool + -> Format.formatter + -> Middleend.Anf.anf_program + -> (unit, string) Result.t diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml new file mode 100644 index 00000000..cb11905d --- /dev/null +++ b/EML/lib/backend/ricsv/analysis.ml @@ -0,0 +1,259 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +open Middleend.Anf +open Runtime.Primitives + +let word_size = 8 + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; is_rec : bool + ; slots_count : int + ; max_stack_args : int + ; max_create_tuple_array_bytes : int + } + +type analysis_result = + { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +let sum_by f xs = List.fold_left (fun acc x -> acc + f x) 0 xs +let max_by f xs = List.fold_left (fun acc x -> max acc (f x)) 0 xs + +let rec slots_in_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and slots_in_cexpr = function + | ComplexImmediate imm -> slots_in_imm imm + | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> slots_in_imm left + slots_in_imm right + | ComplexUnarOper (_, imm) -> slots_in_imm imm + | ComplexTuple (first, second, rest) -> + let elts = first :: second :: rest in + List.length elts + sum_by slots_in_imm elts + | ComplexField (imm, _) -> slots_in_imm imm + | ComplexList imm_list -> + let n = List.length imm_list in + n + sum_by slots_in_imm imm_list + | ComplexApp (first, second, rest) -> + (* +1 for curried-call intermediate; +1 per arg for spill_dangerous_args. + +8 for spill_caller_saved_vars_to_frame at start of every invocation (can spill a0-a7). + +N when nargs >= 2: margin so partial stays above argv (confirmed: overwrite → eml_applyN gets c=0x3). *) + let args = first :: second :: rest in + let argument_count = List.length args in + let additional_margin = if argument_count >= 2 then 12 else 0 in + 1 + 8 + argument_count + additional_margin + sum_by slots_in_imm args + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> slots_in_imm imm + | ComplexLambda (_, body) -> slots_in_anf body + | ComplexBranch (cond, then_e, else_e) -> + 1 + slots_in_imm cond + slots_in_anf then_e + slots_in_anf else_e + +and slots_in_anf = function + | AnfExpr cexp -> slots_in_cexpr cexp + | AnfLet (_, _, cexp, cont) -> 1 + slots_in_cexpr cexp + slots_in_anf cont +;; + +let rec max_stack_args_cexpr = function + | ComplexImmediate _ | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> + max (max_stack_args_imm left) (max_stack_args_imm right) + | ComplexUnarOper (_, imm) -> max_stack_args_imm imm + | ComplexTuple (first, second, rest) -> + max_by max_stack_args_imm (first :: second :: rest) + | ComplexField (imm, _) -> max_stack_args_imm imm + | ComplexList imm_list -> max_by max_stack_args_imm imm_list + | ComplexApp (_first, second, rest) -> + let argument_count = 1 + List.length rest in + let required_stack_words = argument_count in + let max_nested_argument_pressure = max_by max_stack_args_imm (second :: rest) in + max required_stack_words max_nested_argument_pressure + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> max_stack_args_imm imm + | ComplexLambda (_, body) -> max_stack_args_anf body + | ComplexBranch (cond, then_e, else_e) -> + max + (max_stack_args_imm cond) + (max (max_stack_args_anf then_e) (max_stack_args_anf else_e)) + +and max_stack_args_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and max_stack_args_anf = function + | AnfExpr cexp -> max_stack_args_cexpr cexp + | AnfLet (_, _, cexp, cont) -> max (max_stack_args_cexpr cexp) (max_stack_args_anf cont) +;; + +let rec max_create_tuple_array_cexpr = function + | ComplexImmediate _ | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> + max (max_create_tuple_array_imm left) (max_create_tuple_array_imm right) + | ComplexUnarOper (_, imm) -> max_create_tuple_array_imm imm + | ComplexTuple (first, second, rest) -> + let elts = first :: second :: rest in + let here = List.length elts * word_size in + max here (max_by max_create_tuple_array_imm elts) + | ComplexField (imm, _) -> max_create_tuple_array_imm imm + | ComplexList imm_list -> + let bytes_per_cons_cell = 2 * word_size in + let bytes_from_elements = sum_by max_create_tuple_array_imm imm_list in + (bytes_per_cons_cell * List.length imm_list) + bytes_from_elements + | ComplexApp (_f, second, rest) -> max_by max_create_tuple_array_imm (second :: rest) + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> max_create_tuple_array_imm imm + | ComplexLambda (_, body) -> max_create_tuple_array_anf body + | ComplexBranch (cond, then_e, else_e) -> + max + (max_create_tuple_array_imm cond) + (max (max_create_tuple_array_anf then_e) (max_create_tuple_array_anf else_e)) + +and max_create_tuple_array_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and max_create_tuple_array_anf = function + | AnfExpr cexp -> max_create_tuple_array_cexpr cexp + | AnfLet (_, _, cexp, cont) -> + max (max_create_tuple_array_cexpr cexp) (max_create_tuple_array_anf cont) +;; + +let rec params_of_anf = function + | AnfExpr (ComplexLambda (pats, body)) -> + let imms = + List.filter_map + (function + | PatVariable id -> Some (ImmediateVar id) + | _ -> None) + pats + in + let remaining_parameters, inner_body = params_of_anf body in + imms @ remaining_parameters, inner_body + | other -> [], other +;; + +let arity_map_of_program (program : anf_program) = + let add_function_arity map (function_identifier, arity, _) = + Base.Map.set map ~key:function_identifier ~data:arity + in + List.fold_left + (fun map -> function + | AnfValue (_, (function_identifier, arity, _), and_binds) -> + let map = Base.Map.set map ~key:function_identifier ~data:arity in + List.fold_left add_function_arity map and_binds + | _ -> map) + (Base.Map.empty (module Base.String)) + program +;; + +let analyze (program : anf_program) = + let arity_map = arity_map_of_program program in + let analyzed_functions_raw = + List.filter_map + (function + | AnfValue (rec_flag, (func_name, arity, body), _) -> + let params, body = params_of_anf body in + Some + ( func_name + , arity + , params + , body + , rec_flag = Rec + , slots_in_anf body + , max_stack_args_anf body + , max_create_tuple_array_anf body ) + | AnfEval _ -> None) + program + in + let mangle_reserved name = + if is_reserved name + then "eml_" ^ name + else if String.equal name "_start" + then "eml_start" + else name + in + let functions, _ = + List.fold_left + (fun (reversed_functions, generated_name_counts) + ( func_name + , _arity + , params + , body + , is_rec + , slots_count + , max_stack_args + , max_create_tuple_array_bytes ) -> + let base_asm_name = mangle_reserved func_name in + let duplicate_index = + Base.Map.find generated_name_counts func_name |> Option.value ~default:0 + in + let updated_generated_name_counts = + Base.Map.set generated_name_counts ~key:func_name ~data:(duplicate_index + 1) + in + let asm_name = + if duplicate_index = 0 + then base_asm_name + else base_asm_name ^ "_" ^ Int.to_string duplicate_index + in + ( { func_name + ; asm_name + ; params + ; body + ; is_rec + ; slots_count + ; max_stack_args + ; max_create_tuple_array_bytes + } + :: reversed_functions + , updated_generated_name_counts )) + ([], Base.Map.empty (module Base.String)) + analyzed_functions_raw + in + let functions = List.rev functions in + let has_main = List.exists (fun fn -> String.equal fn.func_name "main") functions in + let functions = + if has_main + then functions + else ( + let synthetic_main = + { func_name = "main" + ; asm_name = "main" + ; params = [] + ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) + ; is_rec = false + ; slots_count = 0 + ; max_stack_args = 0 + ; max_create_tuple_array_bytes = 0 + } + in + functions @ [ synthetic_main ]) + in + let arity_map = + if has_main then arity_map else Base.Map.set arity_map ~key:"main" ~data:0 + in + let resolver current_function_index variable_name = + let rec find_visible_function = function + | i when i < 0 -> None + | i -> + (match Base.List.nth functions i with + | None -> None + | Some fn when String.equal fn.func_name variable_name -> + Some (fn.asm_name, List.length fn.params) + | Some _ -> find_visible_function (i - 1)) + in + let start_index = + match Base.List.nth functions current_function_index with + | Some fn when fn.is_rec && String.equal fn.func_name variable_name -> + current_function_index + | _ -> current_function_index - 1 + in + find_visible_function start_index + in + { arity_map; functions; resolve = resolver } +;; diff --git a/EML/lib/backend/ricsv/analysis.mli b/EML/lib/backend/ricsv/analysis.mli new file mode 100644 index 00000000..f941d6eb --- /dev/null +++ b/EML/lib/backend/ricsv/analysis.mli @@ -0,0 +1,24 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; is_rec : bool + ; slots_count : int + ; max_stack_args : int + ; max_create_tuple_array_bytes : int + } + +type analysis_result = + { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +val analyze : anf_program -> analysis_result diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml new file mode 100644 index 00000000..e4638586 --- /dev/null +++ b/EML/lib/backend/ricsv/architecture.ml @@ -0,0 +1,189 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format + +module Riscv_backend = struct + type reg = + | Zero + | RA + | SP + | A of int + | T of int + | S of int + [@@deriving eq] + + type offset = reg * int + + type instr = + | Addi of reg * reg * int (* сложение с константой: rd = rs + imm *) + | Ld of reg * offset (* загрузка 8 байт из памяти: rd = mem[base + offset] *) + | Sd of reg * offset (* сохранение 8 байт в память: mem[base + offset] = rd *) + | Mv of reg * reg (* копирование регистра: rd = rs *) + | Li of reg * int (* загрузить константу: rd = imm *) + | Add of reg * reg * reg (* сложение: rd = rs1 + rs2 *) + | Sub of reg * reg * reg (* вычитание: rd = rs1 - rs2 *) + | Call of string (* вызов функции по имени *) + | Ret (* возврат из функции *) + | Beq of + reg * reg * string (* переход если равно: если rs1 == rs2, переход на метку *) + | J of string (* безусловный переход на метку *) + | Label of string (* метка: именованная точка в коде, цель для Beq/J *) + | La of reg * string (* загрузить адрес: rd = адрес метки *) + (* Сравнения и логика *) + | Slt of reg * reg * reg (* записать в rd 1 если rs1 < rs2, иначе 0 *) + | Seqz of reg * reg (* записать в rd 1 если rs == 0, иначе 0 *) + | Snez of reg * reg (* записать в rd 1 если rs != 0, иначе 0 *) + | Xori of reg * reg * int (* xor регистра с константой: rd = rs ^ imm *) + | Xor of reg * reg * reg (* xor двух регистров: rd = rs1 ^ rs2 *) + | Mul of reg * reg * reg (* умножение: rd = rs1 * rs2 *) + | Div of reg * reg * reg (* целочисленное деление: rd = rs1 / rs2 *) + | Srli of reg * reg * int (* логический сдвиг вправо на константу: rd = rs >>> imm *) + + let pp_reg ppf = function + | Zero -> fprintf ppf "zero" + | RA -> fprintf ppf "ra" + | SP -> fprintf ppf "sp" + | A n -> fprintf ppf "a%d" n + | T n -> fprintf ppf "t%d" n + | S 0 -> fprintf ppf "fp" + | S n -> fprintf ppf "s%d" n + ;; + + let pp_offset ppf offset = fprintf ppf "%d(%a)" (snd offset) pp_reg (fst offset) + + let pp_instr ppf = function + | Addi (rd, rs, imm) -> fprintf ppf "addi %a, %a, %d" pp_reg rd pp_reg rs imm + | Add (rd, rs1, rs2) -> fprintf ppf "add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sub (rd, rs1, rs2) -> fprintf ppf "sub %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mul (rd, rs1, rs2) -> fprintf ppf "mul %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Div (rd, rs1, rs2) -> fprintf ppf "div %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Srli (rd, rs1, imm) -> fprintf ppf "srli %a, %a, %d" pp_reg rd pp_reg rs1 imm + | Xori (rd, rs1, imm) -> fprintf ppf "xori %a, %a, %d" pp_reg rd pp_reg rs1 imm + | Xor (rd, rs1, rs2) -> fprintf ppf "xor %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Slt (rd, rs1, rs2) -> fprintf ppf "slt %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Seqz (rd, rs) -> fprintf ppf "seqz %a, %a" pp_reg rd pp_reg rs + | Snez (rd, rs) -> fprintf ppf "snez %a, %a" pp_reg rd pp_reg rs + | Li (rd, imm) -> fprintf ppf "li %a, %d" pp_reg rd imm + | La (rd, s) -> fprintf ppf "la %a, %s" pp_reg rd s + | Mv (rd, rs) -> fprintf ppf "mv %a, %a" pp_reg rd pp_reg rs + | Ld (rd, ofs) -> fprintf ppf "ld %a, %a" pp_reg rd pp_offset ofs + | Sd (rs, ofs) -> fprintf ppf "sd %a, %a" pp_reg rs pp_offset ofs + | Beq (rs1, rs2, s) -> fprintf ppf "beq %a, %a, %s" pp_reg rs1 pp_reg rs2 s + | J s -> fprintf ppf "j %s" s + | Label s -> fprintf ppf "%s:" s + | Call s -> fprintf ppf "call %s" s + | Ret -> fprintf ppf "ret" + ;; + + let tag_int n = (2 * n) + 1 + let fp = S 0 + let sp = SP + let ra = RA + let zero = Zero + let a0 = A 0 + let a1 = A 1 + let a2 = A 2 + let a3 = A 3 + let a4 = A 4 + let a5 = A 5 + let a6 = A 6 + let a7 = A 7 + let t0 = T 0 + let t1 = T 1 + let result_reg = a0 + let addi rd rs imm = [ Addi (rd, rs, imm) ] + let ld rd ofs = [ Ld (rd, ofs) ] + let sd rs ofs = [ Sd (rs, ofs) ] + let mv rd rs = [ Mv (rd, rs) ] + let li rd imm = [ Li (rd, imm) ] + let add rd rs1 rs2 = [ Add (rd, rs1, rs2) ] + let sub rd rs1 rs2 = [ Sub (rd, rs1, rs2) ] + let call s = [ Call s ] + let ret () = [ Ret ] + let beq rs1 rs2 lbl = [ Beq (rs1, rs2, lbl) ] + let j lbl = [ J lbl ] + let label s = [ Label s ] + let la rd s = [ La (rd, s) ] + let slt rd rs1 rs2 = [ Slt (rd, rs1, rs2) ] + let seqz rd rs = [ Seqz (rd, rs) ] + let snez rd rs = [ Snez (rd, rs) ] + let xori rd rs imm = [ Xori (rd, rs, imm) ] + let xor rd rs1 rs2 = [ Xor (rd, rs1, rs2) ] + let mul rd rs1 rs2 = [ Mul (rd, rs1, rs2) ] + let div rd rs1 rs2 = [ Div (rd, rs1, rs2) ] + let srli rd rs imm = [ Srli (rd, rs, imm) ] + let add_tag_items dst delta = [ Addi (dst, dst, delta) ] + let arg_regs = [ a0; a1; a2; a3; a4; a5; a6; a7 ] + let candidate_regs_for_spill = arg_regs + let arg_regs_count = 8 + let word_size = 8 + + (* RISC-V ABI: stack must be 16-byte aligned at call boundaries *) + let stack_align = 16 + let frame_header_size = 2 * word_size + let saved_fp_offset = 0 + let saved_ra_offset = word_size + + (* addi/sd/ld immediate is 12-bit signed: -2048 .. 2047 *) + let max_addi_imm = 2048 + + let rec sub_sp bytes_to_subtract = + if bytes_to_subtract <= 0 + then [] + else if bytes_to_subtract <= max_addi_imm + then addi sp sp (-bytes_to_subtract) + else addi sp sp (-max_addi_imm) @ sub_sp (bytes_to_subtract - max_addi_imm) + ;; + + let addi_or_li_add destination_register source_register immediate_value = + if immediate_value >= -max_addi_imm && immediate_value <= max_addi_imm - 1 + then addi destination_register source_register immediate_value + else li t0 immediate_value @ add destination_register source_register t0 + ;; + + (* Store at sp+offset; use direct sd when offset in 12-bit range *) + let sd_at_sp_offset reg offset = + if offset >= -2048 && offset <= 2047 + then sd reg (sp, offset) + else addi_or_li_add t0 sp offset @ sd reg (t0, 0) + ;; + + type location = + | Loc_reg of reg + | Loc_mem of offset + + let prologue ~enable_gc ~name ~stack_size = + let ra_offset = stack_size - saved_ra_offset in + let fp_offset = stack_size - frame_header_size in + let fp_imm = stack_size - frame_header_size in + let base = + label name + @ sub_sp stack_size + @ sd_at_sp_offset ra ra_offset + @ sd_at_sp_offset fp fp_offset + @ addi_or_li_add fp sp fp_imm + in + if enable_gc && String.equal name "main" + then base @ call "init_gc" @ mv a0 fp @ call "set_ptr_stack" + else base + ;; + + let epilogue ~enable_gc ~is_main = + let base = + (if enable_gc && is_main then call "destroy_gc" else []) + @ addi sp fp frame_header_size + @ ld ra (fp, saved_ra_offset) + @ ld fp (fp, saved_fp_offset) + in + if is_main then base @ li a0 0 @ ret () else base @ ret () + ;; + + let format_item ppf i = + (match i with + | Label _ -> fprintf ppf "%a" pp_instr i + | _ -> fprintf ppf " %a" pp_instr i); + fprintf ppf "\n" + ;; +end diff --git a/EML/lib/backend/ricsv/architecture.mli b/EML/lib/backend/ricsv/architecture.mli new file mode 100644 index 00000000..b3eef301 --- /dev/null +++ b/EML/lib/backend/ricsv/architecture.mli @@ -0,0 +1,101 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +module Riscv_backend : sig + type reg = + | Zero + | RA + | SP + | A of int + | T of int + | S of int + [@@deriving eq] + + type offset = reg * int + + type instr = + | Addi of reg * reg * int + | Ld of reg * offset + | Sd of reg * offset + | Mv of reg * reg + | Li of reg * int + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Call of string + | Ret + | Beq of reg * reg * string + | J of string + | Label of string + | La of reg * string + | Slt of reg * reg * reg + | Seqz of reg * reg + | Snez of reg * reg + | Xori of reg * reg * int + | Xor of reg * reg * reg + | Mul of reg * reg * reg + | Div of reg * reg * reg + | Srli of reg * reg * int + + val pp_reg : Format.formatter -> reg -> unit + val pp_offset : Format.formatter -> offset -> unit + val pp_instr : Format.formatter -> instr -> unit + val tag_int : int -> int + val fp : reg + val sp : reg + val ra : reg + val zero : reg + val a0 : reg + val a1 : reg + val a2 : reg + val a3 : reg + val a4 : reg + val a5 : reg + val a6 : reg + val a7 : reg + val t0 : reg + val t1 : reg + val result_reg : reg + val addi : reg -> reg -> int -> instr list + val ld : reg -> offset -> instr list + val sd : reg -> offset -> instr list + val mv : reg -> reg -> instr list + val li : reg -> int -> instr list + val add : reg -> reg -> reg -> instr list + val sub : reg -> reg -> reg -> instr list + val call : string -> instr list + val ret : unit -> instr list + val beq : reg -> reg -> string -> instr list + val j : string -> instr list + val label : string -> instr list + val la : reg -> string -> instr list + val slt : reg -> reg -> reg -> instr list + val seqz : reg -> reg -> instr list + val snez : reg -> reg -> instr list + val xori : reg -> reg -> int -> instr list + val xor : reg -> reg -> reg -> instr list + val mul : reg -> reg -> reg -> instr list + val div : reg -> reg -> reg -> instr list + val srli : reg -> reg -> int -> instr list + val add_tag_items : reg -> int -> instr list + val arg_regs : reg list + val candidate_regs_for_spill : reg list + val arg_regs_count : int + val word_size : int + val stack_align : int + val frame_header_size : int + val saved_fp_offset : int + val saved_ra_offset : int + val max_addi_imm : int + val sub_sp : int -> instr list + val addi_or_li_add : reg -> reg -> int -> instr list + val sd_at_sp_offset : reg -> int -> instr list + + type location = + | Loc_reg of reg + | Loc_mem of offset + + val prologue : enable_gc:bool -> name:string -> stack_size:int -> instr list + val epilogue : enable_gc:bool -> is_main:bool -> instr list + val format_item : Format.formatter -> instr -> unit +end diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml new file mode 100644 index 00000000..7de3de17 --- /dev/null +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -0,0 +1,109 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Architecture +open Riscv_backend +open Generator_state + +let is_caller_saved = function + | A _ | T _ -> true + | Zero | RA | SP | S _ -> false +;; + +let to_tagged_bool dst = add dst dst dst @ add_tag_items dst 1 + +let compare_ordering dst left_reg right_reg ~invert = + let base = slt dst left_reg right_reg in + (if invert then base @ xori dst dst 1 else base) @ to_tagged_bool dst +;; + +let compare_eq_ne dst left_reg right_reg ~is_eq = + let base = xor dst left_reg right_reg in + (if is_eq then base @ seqz dst dst else base @ snez dst dst) @ to_tagged_bool dst +;; + +let bin_op dst op left_reg right_reg : (instr list, string) result = + match op with + | "+" -> Ok (add dst left_reg right_reg @ add_tag_items dst (-1)) + | "-" -> Ok (sub dst left_reg right_reg @ add_tag_items dst 1) + | "*" -> + Ok + (srli left_reg left_reg 1 + @ addi right_reg right_reg (-1) + @ mul dst left_reg right_reg + @ add_tag_items dst 1) + | "/" -> + Ok + (srli left_reg left_reg 1 + @ srli right_reg right_reg 1 + @ div dst left_reg right_reg + @ add dst dst dst + @ add_tag_items dst 1) + | "<" -> Ok (compare_ordering dst left_reg right_reg ~invert:false) + | ">" -> Ok (compare_ordering dst right_reg left_reg ~invert:false) + | "<=" -> Ok (compare_ordering dst right_reg left_reg ~invert:true) + | ">=" -> Ok (compare_ordering dst left_reg right_reg ~invert:true) + | "=" -> Ok (compare_eq_ne dst left_reg right_reg ~is_eq:true) + | "<>" -> Ok (compare_eq_ne dst left_reg right_reg ~is_eq:false) + | _ -> Error ("unsupported binary operator: " ^ op) +;; + +let bin_oper_to_string = Utils.Pretty_printer.string_of_bin_op + +let vars_in_caller_saved_regs environment = + Base.Map.to_alist environment + |> List.filter_map (fun (variable_name, variable_location) -> + match variable_location with + | Loc_reg register when is_caller_saved register -> Some (variable_name, register) + | _ -> None) +;; + +let indices_of_args_to_spill generation_state immediate_arguments = + let argument_overwrites_result_register = function + | ImmediateConst _ -> false + | ImmediateVar function_name -> Base.Map.mem generation_state.arity_map function_name + in + Base.List.foldi + immediate_arguments + ~init:[] + ~f:(fun argument_index spilled_indices immediate_argument -> + if argument_overwrites_result_register immediate_argument + then argument_index :: spilled_indices + else spilled_indices) + |> List.rev +;; + +type call_style = + | Nullary of string + | CurryChain of + { function_name : string + ; arity : int + ; initial_arguments : immediate list + ; remaining_arguments : immediate list + } + | Direct of + { function_name : string + ; arguments : immediate list + } + | ViaApplyNargs of + { function_name : string + ; argument_count : int + ; arguments : immediate list + } + +let classify_call ~argument_count ~callee_arity_opt ~function_name ~arguments : call_style + = + match callee_arity_opt with + | Some 0 when argument_count = 1 -> Nullary function_name + | Some arity when argument_count > arity -> + CurryChain + { function_name + ; arity + ; initial_arguments = Base.List.take arguments arity + ; remaining_arguments = Base.List.drop arguments arity + } + | Some arity when argument_count = arity -> Direct { function_name; arguments } + | _ -> ViaApplyNargs { function_name; argument_count; arguments } +;; diff --git a/EML/lib/backend/ricsv/auxillary.mli b/EML/lib/backend/ricsv/auxillary.mli new file mode 100644 index 00000000..3b85b3a5 --- /dev/null +++ b/EML/lib/backend/ricsv/auxillary.mli @@ -0,0 +1,42 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Architecture +open Riscv_backend + +val is_caller_saved : reg -> bool +val bin_op : reg -> string -> reg -> reg -> (instr list, string) result +val bin_oper_to_string : Frontend.Ast.bin_oper -> string + +val vars_in_caller_saved_regs + : (string, location, Base.String.comparator_witness) Base.Map.t + -> (string * reg) list + +val indices_of_args_to_spill : Generator_state.state -> immediate list -> int list + +type call_style = + | Nullary of string + | CurryChain of + { function_name : string + ; arity : int + ; initial_arguments : immediate list + ; remaining_arguments : immediate list + } + | Direct of + { function_name : string + ; arguments : immediate list + } + | ViaApplyNargs of + { function_name : string + ; argument_count : int + ; arguments : immediate list + } + +val classify_call + : argument_count:int + -> callee_arity_opt:int option + -> function_name:string + -> arguments:immediate list + -> call_style diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml new file mode 100644 index 00000000..c4f78645 --- /dev/null +++ b/EML/lib/backend/ricsv/generator.ml @@ -0,0 +1,526 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Frontend.Ast +open Middleend.Anf +open Architecture +open Analysis +open Riscv_backend +open Generator_state +open Auxillary + +let alloc_frame_slot = + let* () = + modify (fun state -> { state with frame_offset = state.frame_offset + word_size }) + in + let* state = get in + return (fp, -state.frame_offset) +;; + +let store_reg_into_frame source_register = + let* slot = alloc_frame_slot in + let* () = append (sd source_register slot) in + return (Loc_mem slot) +;; + +let load_into_reg destination_register source_location = + let instructions = + match source_location with + | Loc_reg source_register when equal_reg source_register destination_register -> [] + | Loc_reg source_register -> mv destination_register source_register + | Loc_mem source_offset -> ld destination_register source_offset + in + let* () = append instructions in + return () +;; + +let spill_params_to_frame params_reg = + Base.List.foldi params_reg ~init:(return ()) ~f:(fun index acc param -> + let* () = acc in + match param with + | ImmediateVar name -> + let argument_register = List.nth arg_regs index in + let* slot = store_reg_into_frame argument_register in + modify_env (fun env -> Base.Map.set env ~key:name ~data:slot) + | _ -> return ()) +;; + +let spill_caller_saved_vars_to_frame = + let* env = get_env in + let caller_saved_variables = vars_in_caller_saved_regs env in + let frame_bytes = List.length caller_saved_variables * word_size in + let* () = if frame_bytes > 0 then append (addi sp sp (-frame_bytes)) else return () in + let rec spill environment = function + | [] -> return environment + | (variable_name, register) :: remaining_variables -> + let* new_location = store_reg_into_frame register in + spill + (Base.Map.set environment ~key:variable_name ~data:new_location) + remaining_variables + in + let* updated_environment = spill env caller_saved_variables in + set_env updated_environment +;; + +let evacuate_reg destination_register = + let is_register_used environment register = + Base.Map.exists environment ~f:(function + | Loc_reg mapped_register -> equal_reg register mapped_register + | Loc_mem _ -> false) + in + let rewrite_location_in_environment environment from_register to_location = + Base.Map.map environment ~f:(function + | Loc_reg mapped_register when equal_reg mapped_register from_register -> + to_location + | location -> location) + in + let* env = get_env in + if not (is_register_used env destination_register) + then return () + else ( + match + List.find_opt + (fun candidate_register -> not (is_register_used env candidate_register)) + candidate_regs_for_spill + with + | Some free_register -> + let* () = append (mv free_register destination_register) in + let updated_environment = + rewrite_location_in_environment env destination_register (Loc_reg free_register) + in + set_env updated_environment + | None -> + let* spilled_location = store_reg_into_frame destination_register in + let updated_environment = + rewrite_location_in_environment env destination_register spilled_location + in + set_env updated_environment) +;; + +let resolve_call_symbol name = + let* state = get in + match state.symbol_resolve state.current_func_index name with + | Some (asm_name, _) -> return asm_name + | None -> return name +;; + +let resolve_symbol_and_arity state name = + match state.symbol_resolve state.current_func_index name with + | Some (asm_name, arity_val) -> asm_name, arity_val + | None -> + (match Base.Map.find state.arity_map name with + | Some arity_val -> name, arity_val + | None -> name, -1) +;; + +let gen_imm dst = function + | ImmediateConst (ConstInt n) -> append (li dst (tag_int n)) + | ImmediateConst (ConstBool b) -> append (li dst (if b then tag_int 1 else tag_int 0)) + | ImmediateConst (ConstChar c) -> append (li dst (tag_int (Char.code c))) + | ImmediateConst (ConstString _) -> fail "String constants not yet supported in codegen" + | ImmediateVar name -> + let* env = get_env in + (match Base.Map.find env name with + | Some loc -> load_into_reg dst loc + | None -> + let* state = get in + let sym, arity = resolve_symbol_and_arity state name in + if arity < 0 + then fail ("unbound variable: " ^ name) + else ( + match arity with + | 0 -> append (call sym) + | nargs -> + let* () = append (la result_reg sym) in + let* () = append (li (List.nth arg_regs 1) nargs) in + append (call "alloc_closure"))) +;; + +let copy_result_to dst = + if equal_reg dst result_reg then return () else append (mv dst result_reg) +;; + +let spill_dangerous_args state arguments = + let dangerous_argument_indices = indices_of_args_to_spill state arguments in + let spill_slots = List.length dangerous_argument_indices * word_size in + let* () = if spill_slots > 0 then append (addi sp sp (-spill_slots)) else return () in + Base.List.foldi + arguments + ~init:(return (Base.Map.empty (module Base.Int))) + ~f:(fun argument_index acc argument -> + let* spilled_locations_by_index = acc in + if List.mem argument_index dangerous_argument_indices + then + let* () = gen_imm result_reg argument in + let* spilled_location = store_reg_into_frame result_reg in + return + (Base.Map.add_exn + spilled_locations_by_index + ~key:argument_index + ~data:spilled_location) + else return spilled_locations_by_index) +;; + +let load_immediates_into_registers + spilled_locations + argument_registers + immediate_arguments + = + let immediate_count_to_load = + min (List.length immediate_arguments) (List.length argument_registers) + in + Base.List.foldi + (Base.List.take immediate_arguments immediate_count_to_load) + ~init:(return ()) + ~f:(fun argument_index acc immediate_argument -> + let* () = acc in + let destination_register = List.nth argument_registers argument_index in + match Base.Map.find spilled_locations argument_index with + | Some spilled_location -> load_into_reg destination_register spilled_location + | None -> gen_imm destination_register immediate_argument) +;; + +let emit_arguments_to_stack spilled_arguments arguments = + Base.List.foldi arguments ~init:(return ()) ~f:(fun argument_index acc argument -> + let* () = acc in + let stack_offset = argument_index * word_size in + let* () = + match Base.Map.find spilled_arguments argument_index with + | Some spilled_location -> load_into_reg t0 spilled_location + | None -> gen_imm t0 argument + in + append (sd t0 (sp, stack_offset))) +;; + +let push_stack_args stack_args = + let stack_argument_count = List.length stack_args in + if stack_argument_count = 0 + then return 0 + else ( + let stack_bytes = stack_argument_count * word_size in + let* () = append (addi sp sp (-stack_bytes)) in + let no_spilled_arguments = Base.Map.empty (module Base.Int) in + let* () = emit_arguments_to_stack no_spilled_arguments stack_args in + return stack_bytes) +;; + +let gen_call_with_regs + destination_register + argument_registers + call_arguments + spilled_arguments + function_symbol + = + let* () = + load_immediates_into_registers spilled_arguments argument_registers call_arguments + in + let stack_arguments = Base.List.drop call_arguments (List.length argument_registers) in + let* reserved_stack_bytes = push_stack_args stack_arguments in + let* () = append (call function_symbol) in + let* () = copy_result_to destination_register in + if reserved_stack_bytes > 0 then append (addi sp sp reserved_stack_bytes) else return () +;; + +(* let foo = ... in + foo () *) +let gen_nullary destination_register function_name = + let* resolved_symbol = resolve_call_symbol function_name in + let* () = append (call resolved_symbol) in + copy_result_to destination_register +;; + +let gen_direct_call destination_register function_name call_arguments spilled_arguments = + let* resolved_symbol = resolve_call_symbol function_name in + gen_call_with_regs + destination_register + arg_regs + call_arguments + spilled_arguments + resolved_symbol +;; + +let gen_via_apply_nargs + destination_register + function_name + argument_count + call_arguments + spilled_arguments + = + let argv_bytes = argument_count * word_size in + let* () = gen_imm a0 (ImmediateVar function_name) in + let* () = append (li a1 argument_count) in + let* () = append (addi sp sp (-argv_bytes)) in + let* () = emit_arguments_to_stack spilled_arguments call_arguments in + let* () = append (mv a2 sp) in + let* () = append (call "eml_applyN") in + let* () = copy_result_to destination_register in + append (addi sp sp argv_bytes) +;; + +let rec gen_invocation destination_register function_name call_arguments = + let* () = spill_caller_saved_vars_to_frame in + let* state = get in + let* spilled_arguments = spill_dangerous_args state call_arguments in + let argument_count = List.length call_arguments in + let callee_arity_opt = Base.Map.find state.arity_map function_name in + let style = + classify_call + ~argument_count + ~callee_arity_opt + ~function_name + ~arguments:call_arguments + in + match style with + | Nullary resolved_function_name -> + gen_nullary destination_register resolved_function_name + | CurryChain { function_name; arity; initial_arguments; remaining_arguments } -> + gen_curried_call + destination_register + function_name + arity + initial_arguments + remaining_arguments + | Direct { function_name; arguments } -> + gen_direct_call destination_register function_name arguments spilled_arguments + | ViaApplyNargs { function_name; argument_count; arguments } -> + gen_via_apply_nargs + destination_register + function_name + argument_count + arguments + spilled_arguments + +and gen_curried_call + destination_register + function_name + _arity + initial_arguments + remaining_arguments + = + let* part_name = fresh_partial in + let* () = + gen_cexpr + destination_register + (ComplexApp + (ImmediateVar function_name, List.hd initial_arguments, List.tl initial_arguments)) + in + let* partial_function_location = store_reg_into_frame destination_register in + let* () = + modify_env (fun environment -> + Base.Map.set environment ~key:part_name ~data:partial_function_location) + in + let rec apply_remaining_arguments = function + | [] -> return () + | [ argument ] -> + gen_cexpr destination_register (ComplexApp (ImmediateVar part_name, argument, [])) + | argument :: remaining_arguments_tail -> + let* () = + gen_cexpr destination_register (ComplexApp (ImmediateVar part_name, argument, [])) + in + let* updated_partial_location = store_reg_into_frame destination_register in + let* () = + modify_env (fun environment -> + Base.Map.set environment ~key:part_name ~data:updated_partial_location) + in + apply_remaining_arguments remaining_arguments_tail + in + apply_remaining_arguments remaining_arguments + +and gen_unit dst = append (li dst (tag_int 0)) + +and gen_neg dst op = + let* () = gen_imm t0 op in + let* () = append (li dst (tag_int 0)) in + append (sub dst dst t0) + +and gen_not dst op = + let* () = gen_imm t0 op in + append (xori dst t0 (tag_int 1)) + +and gen_binop dst binary_operator left_operand right_operand = + let* () = gen_imm t0 left_operand in + let* () = gen_imm t1 right_operand in + let* () = evacuate_reg dst in + match bin_op dst (bin_oper_to_string binary_operator) t0 t1 with + | Ok instrs -> append instrs + | Error msg -> fail msg + +and gen_branch dst cond then_e else_e = + let* () = gen_imm t0 cond in + let* else_lbl, end_lbl = fresh_branch in + let* () = append (li t1 (tag_int 0)) in + let* () = append (beq t0 t1 else_lbl) in + let* state_before_then = get in + let frame_offset_before_then = state_before_then.frame_offset in + let* () = gen_anf dst then_e in + let* () = append (j end_lbl) in + let* state_after_then = get in + let* () = + put + { state_before_then with + frame_offset = frame_offset_before_then + ; instr_buffer = state_after_then.instr_buffer + } + in + let* () = append (label else_lbl) in + let* () = gen_anf dst else_e in + append (label end_lbl) + +and spill_tuple_var_if_in_reg = function + | ImmediateVar name -> + let* env = get_env in + (match Base.Map.find env name with + | Some (Loc_reg _) -> + let* loc = store_reg_into_frame result_reg in + modify_env (fun env -> Base.Map.set env ~key:name ~data:loc) + | _ -> return ()) + | _ -> return () + +and gen_field dst tuple_imm idx = + let* () = gen_imm result_reg tuple_imm in + let* () = spill_tuple_var_if_in_reg tuple_imm in + let* () = append (li (List.nth arg_regs 1) (tag_int idx)) in + let* () = append (call "field") in + copy_result_to dst + +and gen_list dst = function + | [] -> append (li dst (tag_int 0)) + | hd :: tl -> + let* () = gen_list dst tl in + let* tail_loc = store_reg_into_frame dst in + let* () = gen_imm t0 hd in + let* () = spill_caller_saved_vars_to_frame in + let* () = append (li result_reg 2) in + let* () = load_into_reg (List.nth arg_regs 1) (Loc_reg t0) in + let* () = load_into_reg (List.nth arg_regs 2) tail_loc in + let* () = append (call "create_tuple") in + copy_result_to dst + +and gen_tuple dst e1 e2 rest = + let elts = e1 :: e2 :: rest in + let argc = List.length elts in + let* () = spill_caller_saved_vars_to_frame in + let* state = get in + let* spilled = spill_dangerous_args state elts in + let array_bytes = argc * word_size in + let* () = append (addi sp sp (-array_bytes)) in + let* () = emit_arguments_to_stack spilled elts in + let* () = append (li result_reg argc) in + let* () = append (addi (List.nth arg_regs 1) sp 0) in + let* () = append (call "create_tuple") in + let* () = append (addi sp sp array_bytes) in + copy_result_to dst + +and gen_app dst fname first rest = gen_invocation dst fname (first :: rest) + +and gen_cexpr dst = function + | ComplexUnit -> gen_unit dst + | ComplexImmediate imm -> gen_imm dst imm + | ComplexUnarOper (Negative, op) -> gen_neg dst op + | ComplexUnarOper (Not, op) -> gen_not dst op + | ComplexBinOper (op, left, right) -> gen_binop dst op left right + | ComplexBranch (cond, then_e, else_e) -> gen_branch dst cond then_e else_e + | ComplexField (tuple_imm, idx) -> gen_field dst tuple_imm idx + | ComplexTuple (e1, e2, rest) -> gen_tuple dst e1 e2 rest + | ComplexApp (ImmediateVar name, first, rest) -> gen_app dst name first rest + | ComplexApp (_, _, _) -> fail "ComplexApp: function must be a variable" + | ComplexLambda _ | ComplexOption _ -> fail "gen_cexpr: Lambda/Option not implemented" + | ComplexList imm_list -> gen_list dst imm_list + +and gen_anf dst = function + | AnfExpr cexp -> gen_cexpr dst cexp + | AnfLet (_, name, rhs, cont) -> + let* () = evacuate_reg result_reg in + let* () = gen_cexpr result_reg rhs in + let* loc = store_reg_into_frame result_reg in + let* () = modify_env (fun env -> Base.Map.set env ~key:name ~data:loc) in + gen_anf dst cont +;; + +let bind_param_to_reg env i = function + | ImmediateVar name -> + let register = List.nth arg_regs i in + return (Base.Map.set env ~key:name ~data:(Loc_reg register)) + | _ -> fail "unsupported pattern" +;; + +let bind_param_to_stack env i = function + | ImmediateVar name -> + let stack_offset = (i + 2) * word_size in + return (Base.Map.set env ~key:name ~data:(Loc_mem (fp, stack_offset))) + | _ -> fail "unsupported pattern" +;; + +let flush_instr_buffer ppf = + let* state = get in + let instruction_buffer = state.instr_buffer in + let* () = put { state with instr_buffer = [] } in + let () = List.iter (fun item -> format_item ppf item) (List.rev instruction_buffer) in + return () +;; + +let gen_func ~enable_gc asm_name params body frame_sz ppf = + fprintf ppf "\n .globl %s\n .type %s, @function\n" asm_name asm_name; + let args = List.length params in + let params_reg, params_stack = + ( Base.List.take params (min args arg_regs_count) + , Base.List.drop params (min args arg_regs_count) ) + in + let env0 = Base.Map.empty (module Base.String) in + let* env = + Base.List.foldi params_reg ~init:(return env0) ~f:(fun i acc p -> + let* e = acc in + bind_param_to_reg e i p) + in + let* env = + Base.List.foldi params_stack ~init:(return env) ~f:(fun i acc p -> + let* e = acc in + bind_param_to_stack e i p) + in + let* () = set_env env in + let* () = append (prologue ~enable_gc ~name:asm_name ~stack_size:frame_sz) in + let* state = get in + let* () = put { state with frame_offset = 0 } in + let* () = spill_params_to_frame params_reg in + let* () = gen_anf result_reg body in + let* () = append (epilogue ~enable_gc ~is_main:(String.equal asm_name "main")) in + let* () = flush_instr_buffer ppf in + return () +;; + +let gen_program ~enable_gc ppf (analysis : analysis_result) = + fprintf ppf ".section .text"; + let base = Runtime.Primitives.runtime_primitive_arities in + let arity_map = + List.fold_left + (fun map (name, arity) -> Base.Map.set map ~key:name ~data:arity) + analysis.arity_map + base + in + let init = + { frame_offset = 0 + ; naming_state = Default_naming.init + ; arity_map + ; env = Base.Map.empty (module Base.String) + ; instr_buffer = [] + ; current_func_index = 0 + ; symbol_resolve = analysis.resolve + } + in + let comp = + Base.List.foldi analysis.functions ~init:(return ()) ~f:(fun function_index acc fn -> + let frame_sz = (2 + fn.slots_count) * word_size in + let* () = acc in + let* () = + modify (fun state -> { state with current_func_index = function_index }) + in + gen_func ~enable_gc fn.asm_name fn.params fn.body frame_sz ppf) + in + match run comp init with + | Ok ((), _) -> + pp_print_flush ppf (); + Ok () + | Error msg -> Error msg +;; diff --git a/EML/lib/backend/ricsv/generator.mli b/EML/lib/backend/ricsv/generator.mli new file mode 100644 index 00000000..5d1bfb22 --- /dev/null +++ b/EML/lib/backend/ricsv/generator.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : enable_gc:bool + -> Format.formatter + -> Analysis.analysis_result + -> (unit, string) Result.t diff --git a/EML/lib/backend/ricsv/generator_state.ml b/EML/lib/backend/ricsv/generator_state.ml new file mode 100644 index 00000000..9197def7 --- /dev/null +++ b/EML/lib/backend/ricsv/generator_state.ml @@ -0,0 +1,95 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Architecture +open Riscv_backend + +module type NAMING = sig + type t + + val init : t + val fresh_partial : t -> string * t + val fresh_branch : t -> (string * string) * t +end + +module Default_naming : NAMING = struct + type t = int + + let init = 0 + let fresh_partial n = "part_" ^ string_of_int n, n + 1 + let fresh_branch n = ("else_" ^ string_of_int n, "end_" ^ string_of_int n), n + 1 +end + +module Make (N : NAMING) = struct + type env = (string, location, Base.String.comparator_witness) Base.Map.t + + type state = + { frame_offset : int + ; naming_state : N.t + ; arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; env : env + ; instr_buffer : instr list + ; current_func_index : int + ; symbol_resolve : int -> string -> (string * int) option + } + + type 'a t = state -> ('a * state, string) Result.t + + let return x st = Ok (x, st) + let fail e = fun _ -> Error e + + let bind m f = + fun state -> + match m state with + | Ok (x, st') -> f x st' + | Error e -> Error e + ;; + + let ( let* ) = bind + let get st = Ok (st, st) + let put st = fun _ -> Ok ((), st) + + let modify f = + let* st = get in + put (f st) + ;; + + let modify_env f = modify (fun st -> { st with env = f st.env }) + + let get_env = + let* st = get in + return st.env + ;; + + let set_env env = modify (fun st -> { st with env }) + + let fresh_partial = + let* st = get in + let name, next = N.fresh_partial st.naming_state in + let* () = put { st with naming_state = next } in + return name + ;; + + let fresh_branch = + let* st = get in + let pair, next = N.fresh_branch st.naming_state in + let* () = put { st with naming_state = next } in + return pair + ;; + + let run m = m + + let append (items : instr list) = + let modify_instr_buffer f = + modify (fun st -> { st with instr_buffer = f st.instr_buffer }) + in + match items with + | [] -> return () + | _ -> + modify_instr_buffer (fun buffer -> + List.fold_left (fun acc instruction -> instruction :: acc) buffer items) + ;; +end + +include Make (Default_naming) diff --git a/EML/lib/backend/ricsv/generator_state.mli b/EML/lib/backend/ricsv/generator_state.mli new file mode 100644 index 00000000..93d5d122 --- /dev/null +++ b/EML/lib/backend/ricsv/generator_state.mli @@ -0,0 +1,49 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Architecture +open Riscv_backend + +module type NAMING = sig + type t + + val init : t + val fresh_partial : t -> string * t + val fresh_branch : t -> (string * string) * t +end + +module Default_naming : NAMING + +module Make (N : NAMING) : sig + type env = (string, location, Base.String.comparator_witness) Base.Map.t + + type state = + { frame_offset : int + ; naming_state : N.t + ; arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; env : env + ; instr_buffer : instr list + ; current_func_index : int + ; symbol_resolve : int -> string -> (string * int) option + } + + type 'a t = state -> ('a * state, string) Result.t + + val return : 'a -> 'a t + val fail : string -> 'a t + val bind : 'a t -> ('a -> 'b t) -> 'b t + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + val get : state t + val put : state -> unit t + val modify : (state -> state) -> unit t + val modify_env : (env -> env) -> unit t + val get_env : env t + val set_env : env -> unit t + val fresh_partial : string t + val fresh_branch : (string * string) t + val run : 'a t -> state -> ('a * state, string) Result.t + val append : instr list -> unit t +end + +include module type of Make (Default_naming) diff --git a/EML/lib/backend/ricsv/runner.ml b/EML/lib/backend/ricsv/runner.ml new file mode 100644 index 00000000..9efd4272 --- /dev/null +++ b/EML/lib/backend/ricsv/runner.ml @@ -0,0 +1,10 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Analysis + +let gen_program ?(enable_gc = false) ppf (program : anf_program) = + program |> analyze |> Generator.gen_program ~enable_gc ppf +;; diff --git a/EML/lib/backend/ricsv/runner.mli b/EML/lib/backend/ricsv/runner.mli new file mode 100644 index 00000000..e5df8f59 --- /dev/null +++ b/EML/lib/backend/ricsv/runner.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : ?enable_gc:bool + -> Format.formatter + -> Middleend.Anf.anf_program + -> (unit, string) result diff --git a/EML/lib/dune b/EML/lib/dune new file mode 100644 index 00000000..bc13309d --- /dev/null +++ b/EML/lib/dune @@ -0,0 +1,11 @@ +(include_subdirs qualified) + +(library + (name EML_lib) + (public_name EML.lib) + (modules :standard) + (libraries base angstrom llvm) + (preprocess + (pps ppx_deriving.show ppx_deriving.eq)) + (instrumentation + (backend bisect_ppx))) diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml new file mode 100644 index 00000000..18b2e913 --- /dev/null +++ b/EML/lib/frontend/ast.ml @@ -0,0 +1,114 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format + +type ident = string [@@deriving show { with_path = false }] + +type is_rec = + | NonRec + | Rec +[@@deriving show { with_path = false }] + +type bin_oper = + | Plus (* [+] *) + | Minus (* [-] *) + | Multiply (* [*] *) + | Division (* [/] *) + | And (* [&&] *) + | Or (* [||] *) + | GreatestEqual (* [>=] *) + | LowestEqual (* [<=] *) + | GreaterThan (* [>] *) + | LowerThan (* [<] *) + | Equal (* [=] *) + | NotEqual (* [<>] *) +[@@deriving show { with_path = false }] + +type unar_oper = + | Negative (* [-x] *) + | Not (* [not x]*) +[@@deriving show { with_path = false }] + +type const = + | ConstInt of int (* Integer constant: Example - [21] *) + | ConstBool of bool (* Boolean constant: Example - [true] or [false] *) + | ConstString of string (* String constant: Example - "I like OCaml!" *) + | ConstChar of char (* Character constant: Example - ['a'] *) +[@@deriving show { with_path = false }] + +type binder = int [@@deriving show { with_path = false }] + +type ty = + | TyVar of ident + | TyPrim of string + | TyArrow of ty * ty + | TyList of ty + | TyTuple of ty list + | TyOption of ty +[@@deriving show { with_path = false }] + +type pattern = + | PatVariable of ident (* [x] *) + | PatConst of const (* [21] or [true] or [false] *) + | PatTuple of pattern * pattern * pattern list (* (x1; x2 ... xn) *) + | PatAny + | PatType of pattern * ty + | PatUnit + | PatList of pattern list + | PatOption of pattern option + | PatConstruct of ident * pattern option +[@@deriving show { with_path = false }] + +type expr = + | ExpIdent of ident (* ExpIdent "x" *) + | ExpConst of const (* ExpConst (ConstInt 666) *) + | ExpBranch of expr * expr * expr option + | ExpBinOper of bin_oper * expr * expr (* ExpBinOper(Plus, 1, 2) *) + | ExpUnarOper of unar_oper * expr (* ExpUnarOper(not, x)*) + | ExpTuple of expr * expr * expr list (* ExpTuple[x1; x2 .. xn] *) + | ExpList of expr list (* ExpList[x1; x2 .. xn] *) + | ExpLambda of pattern * pattern list * expr + | ExpTypeAnnotation of expr * ty + | ExpLet of is_rec * bind * bind list * expr + | ExpApply of expr * expr + | ExpOption of expr option + | ExpFunction of bind * bind list + | ExpMatch of expr * bind * bind list + | ExpConstruct of ident * expr option +[@@deriving show { with_path = false }] + +and bind = pattern * expr [@@deriving show { with_path = false }] + +type structure = + | SEval of expr + | SValue of is_rec * bind * bind list +[@@deriving show { with_path = false }] + +type program = structure list [@@deriving show { with_path = false }] + +let bin_op_list = [ "*"; "/"; "+"; "-"; "^"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] + +let rec pp_ty fmt = function + | TyPrim x -> fprintf fmt "%s" x + | TyVar x -> fprintf fmt "%s" x + | TyArrow (l, r) -> + (match l, r with + | TyArrow _, _ -> fprintf fmt "(%a) -> %a" pp_ty l pp_ty r + | _, _ -> fprintf fmt "%a -> %a" pp_ty l pp_ty r) + | TyTuple elems -> + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " * ") pp_ty) + elems + | TyList ty -> + (match ty with + | TyArrow _ | TyTuple _ -> fprintf fmt "(%a) list" pp_ty ty + | _ -> fprintf fmt "%a list" pp_ty ty) + | TyOption ty -> + (match ty with + | TyArrow _ | TyTuple _ -> fprintf fmt "(%a) option" pp_ty ty + | _ -> fprintf fmt "%a option" pp_ty ty) +;; diff --git a/EML/lib/frontend/parser.ml b/EML/lib/frontend/parser.ml new file mode 100644 index 00000000..3f871894 --- /dev/null +++ b/EML/lib/frontend/parser.ml @@ -0,0 +1,540 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Base +open Angstrom + +let is_keyword = function + | "let" + | "match" + | "in" + | "if" + | "then" + | "else" + | "fun" + | "rec" + | "true" + | "false" + | "Some" + | "and" + | "function" + | "None" + | "with" -> true + | _ -> false +;; + +let is_lowercase = function + | 'a' .. 'z' -> true + | _ -> false +;; + +let is_uppercase = function + | 'A' .. 'Z' -> true + | _ -> false +;; + +let is_digit = function + | '0' .. '9' -> true + | _ -> false +;; + +let white_space = take_while Char.is_whitespace +let token s = white_space *> string s +let token1 s = white_space *> s +let parse_parens p = token "(" *> p <* token ")" + +let is_separator = function + | ')' + | '(' + | '<' + | '>' + | '@' + | ',' + | ';' + | ':' + | '\\' + | '"' + | '/' + | '[' + | ']' + | '?' + | '=' + | '{' + | '}' + | ' ' + | '\r' + | '\t' + | '\n' + | '*' + | '-' -> true + | _ -> false +;; + +let token2 str = + token str *> peek_char + >>= function + | Some c when is_separator c -> return str <* white_space + | None -> return str <* white_space + | _ -> fail (Printf.sprintf "There is no separator after %S." str) +;; + +let parse_const_int = + let sign = choice [ token "" ] in + let num = take_while1 Char.is_digit in + lift2 (fun s n -> ConstInt (Int.of_string (s ^ n))) sign num +;; + +let parse_const_char = + string "\'" *> any_char <* string "\'" >>| fun char_value -> ConstChar char_value +;; + +let parse_const_bool = + choice + [ token "true" *> return (ConstBool true); token "false" *> return (ConstBool false) ] +;; + +let parse_const_string = + token "\"" *> take_till (Char.equal '\"') <* token "\"" >>| fun s -> ConstString s +;; + +let parse_const = + white_space + *> choice [ parse_const_int; parse_const_char; parse_const_string; parse_const_bool ] +;; + +let parse_unar_oper = choice [ token "-" *> return Negative; token "not" *> return Not ] + +let parse_ident = + let parse_first_char = + satisfy (fun ch -> is_lowercase ch || is_uppercase ch || Char.equal ch '_') + >>| Char.escaped + in + let parse_other_chars = + take_while (fun ch -> + is_lowercase ch || is_uppercase ch || is_digit ch || Char.equal ch '_') + in + let parse_regular_ident = + token1 @@ lift2 ( ^ ) parse_first_char parse_other_chars + >>= fun s -> if is_keyword s then fail "It is not identifier" else return s + in + let parse_op_ident = + white_space + *> char '(' + *> white_space + *> choice (List.map Ast.bin_op_list ~f:(fun opr -> token opr *> return opr)) + <* white_space + <* char ')' + in + parse_regular_ident <|> parse_op_ident +;; + +let parse_base_type = + choice + [ token "int" *> return (TyPrim "int") + ; token "bool" *> return (TyPrim "bool") + ; token "string" *> return (TyPrim "string") + ; token "unit" *> return (TyPrim "unit") + ; token "char" *> return (TyPrim "char") + ] +;; + +let rec parse_type_list t = + let* base = t in + white_space + *> token "list" + *> (parse_type_list (return (TyList base)) <|> return (TyList base)) +;; + +let parse_tuple_type parse_type = + let* fst_type = parse_type in + let* snd_type = token "*" *> parse_type in + let* type_list = many (token "*" *> parse_type) in + return (TyTuple (fst_type :: snd_type :: type_list)) +;; + +let rec parse_arrow_type parse_type = + let* type1 = parse_type in + let* type2 = token "->" *> (parse_arrow_type parse_type <|> parse_type) in + return (TyArrow (type1, type2)) +;; + +let parse_type = + let base_type = parse_base_type in + let list_type = parse_type_list base_type <|> base_type in + let tuple_type = parse_tuple_type list_type <|> list_type in + parse_arrow_type tuple_type <|> tuple_type +;; + +let parse_pattern_with_type parse_pattern = + let* pat = white_space *> token "(" *> parse_pattern in + let* constr = + white_space *> token ":" *> white_space *> parse_type <* white_space <* token ")" + in + return (PatType (pat, constr)) +;; + +let parse_pattern_var = parse_ident >>| fun id -> PatVariable id +let parse_pattern_const = parse_const >>| fun c -> PatConst c +let parse_pattern_any = token2 "_" *> return PatAny + +let parse_pattern_tuple parse_pattern = + let parse_unparenthesized = + lift3 + (fun p1 p2 rest -> PatTuple (p1, p2, rest)) + parse_pattern + (token "," *> parse_pattern) + (many (token "," *> parse_pattern)) + <* white_space + in + parse_parens parse_unparenthesized <|> parse_unparenthesized +;; + +let parse_keyword = choice [ token "true"; token "false"; token "None"; token "()" ] + +let parse_option parse = + let* tag = token2 "Some" in + let* opt = parse >>| Option.some in + return (tag, opt) +;; + +let parse_construct parse construct func = + token "[" *> sep_by (token ";") parse + <* token "]" + >>| List.fold_right ~init:(construct ("[]", None)) ~f:func +;; + +let parse_list parse construct tuple = + let rec go acc = + token "::" *> parse + >>= (fun elem -> + go elem >>| fun rest -> construct ("::", Some (tuple (acc, rest, [])))) + <|> return acc + in + parse >>= go +;; + +let parse_pattern_construct parse_elem parse_pat = + choice + [ (parse_option (parse_elem <|> parse_parens parse_pat) + >>| fun (t, p) -> PatConstruct (t, p)) + ; parse_construct + parse_elem + (fun (t, p) -> PatConstruct (t, p)) + (fun p acc -> PatConstruct ("::", Some (PatTuple (p, acc, [])))) + ; parse_list + parse_elem + (fun (t, p) -> PatConstruct (t, p)) + (fun (a, b, c) -> PatTuple (a, b, c)) + ] +;; + +let parse_base_pat = + choice + [ parse_pattern_any + ; parse_pattern_var + ; parse_pattern_const + ; (parse_keyword >>| fun tag -> PatConstruct (tag, None)) + ] +;; + +let parse_pattern = + white_space + *> fix (fun pat -> + let atom = + choice + [ parse_base_pat + ; parse_pattern_construct parse_base_pat pat + ; parse_pattern_with_type pat + ; parse_parens pat + ] + in + let tuple = parse_pattern_construct atom pat <|> atom in + let lst = parse_pattern_construct tuple pat <|> tuple in + parse_pattern_tuple lst <|> lst) +;; + +let parse_left_associative expr oper = + let rec go acc = lift2 (fun f x -> f acc x) oper expr >>= go <|> return acc in + expr >>= go +;; + +let parse_expr_bin_oper parse_bin_op tkn = + token tkn *> return (fun e1 e2 -> ExpBinOper (parse_bin_op, e1, e2)) +;; + +let multiply = parse_expr_bin_oper Multiply "*" +let division = parse_expr_bin_oper Division "/" +let plus = parse_expr_bin_oper Plus "+" +let minus = parse_expr_bin_oper Minus "-" + +let compare = + choice + [ parse_expr_bin_oper Equal "=" + ; parse_expr_bin_oper NotEqual "<>" + ; parse_expr_bin_oper LowestEqual "<=" + ; parse_expr_bin_oper LowerThan "<" + ; parse_expr_bin_oper GreatestEqual ">=" + ; parse_expr_bin_oper GreaterThan ">" + ] +;; + +let and_op = parse_expr_bin_oper And "&&" +let or_op = parse_expr_bin_oper Or "||" +let parse_expr_ident = parse_ident >>| fun x -> ExpIdent x +let parse_expr_const = parse_const >>| fun c -> ExpConst c + +let parse_expr_with_type parse_expr = + let parse_annotated_type = token ":" *> parse_type in + lift2 (fun expr t -> ExpTypeAnnotation (expr, t)) parse_expr parse_annotated_type +;; + +let parse_expr_branch parse_expr = + lift3 + (fun cond t f -> ExpBranch (cond, t, f)) + (token "if" *> parse_expr) + (token "then" *> parse_expr) + (option None (token "else" *> parse_expr >>| Option.some)) +;; + +let parse_expr_unar_oper parse_expr = + parse_unar_oper >>= fun op -> parse_expr >>= fun expr -> return (ExpUnarOper (op, expr)) +;; + +let parse_expr_list parse_expr = + parse_list + parse_expr + (fun (tag, exp_opt) -> ExpConstruct (tag, exp_opt)) + (fun (fst_exp, snd_exp, exp_list) -> ExpTuple (fst_exp, snd_exp, exp_list)) +;; + +let parse_expr_apply e = + parse_left_associative e (return (fun e1 e2 -> ExpApply (e1, e2))) +;; + +let parse_expr_lambda parse_expr = + token2 "fun" *> sep_by1 white_space parse_pattern + <* token "->" + >>= fun params -> + parse_expr + >>| fun body -> + match params with + | pat :: pats -> ExpLambda (pat, pats, body) + | [] -> body +;; + +let parse_case parse_expr = + white_space + *> option () (token "|" *> return ()) + *> lift2 (fun pat exp -> pat, exp) parse_pattern (token "->" *> parse_expr) +;; + +let parse_expr_function parse_expr = + token2 "function" + *> + let* case_list = sep_by1 (token "|") (parse_case parse_expr) in + return (ExpFunction (List.hd_exn case_list, List.drop case_list 1)) +;; + +let parse_expr_match parse_expr = + let* exp = token2 "match" *> parse_expr <* token2 "with" in + let* case_list = sep_by1 (token "|") (parse_case parse_expr) in + return (ExpMatch (exp, List.hd_exn case_list, List.drop case_list 1)) +;; + +let parse_expr_tuple parse_expr = + let commas = token "," in + let tuple = + lift3 + (fun e1 e2 rest -> ExpTuple (e1, e2, rest)) + (parse_expr <* commas) + parse_expr + (many (commas *> parse_expr)) + <* white_space + in + parse_parens tuple <|> tuple +;; + +let parse_body parse_expr = + many1 parse_pattern + >>= fun patterns -> + token "=" *> parse_expr + >>| fun body -> + match patterns with + | pat :: pats -> ExpLambda (pat, pats, body) + | [] -> body +;; + +let parse_expr_sequence parse_expr = + parse_left_associative + parse_expr + (token ";" *> return (fun exp1 exp2 -> ExpLet (NonRec, (PatUnit, exp1), [], exp2))) +;; + +let parse_expr_construct parse_expr = + let cons_one exp acc = ExpConstruct ("::", Some (ExpTuple (exp, acc, []))) in + let rec unfold_sequence = function + | ExpLet (NonRec, (PatUnit, e1), [], e2) -> + let rest, last = unfold_sequence e2 in + e1 :: rest, last + | e -> [], e + in + let rec fold_elem (from_parens, exp) acc = + if from_parens + then cons_one exp acc + else ( + match exp with + | ExpLet (NonRec, (PatUnit, e1), [], e2) -> + let rest, last = unfold_sequence e2 in + let acc' = fold_elem (false, last) acc in + let acc'' = + List.fold_right rest ~init:acc' ~f:(fun e a -> fold_elem (false, e) a) + in + fold_elem (false, e1) acc'' + | _ -> cons_one exp acc) + in + let elem_parser = + parse_parens (parse_expr_sequence parse_expr) + >>| (fun exp -> true, exp) + <|> (parse_expr >>| fun exp -> false, exp) + in + parse_construct elem_parser (fun (t, e) -> ExpConstruct (t, e)) fold_elem +;; + +let parse_annotated_rhs parse_expr opr = + token ":" *> parse_type + >>= fun t -> token opr *> parse_expr >>| fun expr -> ExpTypeAnnotation (expr, t) +;; + +let parse_fun_binding parse_expr = + let* id = parse_pattern_var in + let* params = many1 parse_pattern in + let pat = List.hd_exn params + and pats = List.drop params 1 in + let mk_body body = ExpLambda (pat, pats, body) in + choice + [ (parse_annotated_rhs parse_expr "=" + >>= function + | ExpTypeAnnotation (expr, t) -> return (PatType (id, t), mk_body expr) + | _ -> fail "expected type annotation") + ; (token "=" *> parse_expr >>| fun expr -> id, mk_body expr) + ] +;; + +let parse_simple_binding parse_expr = + let* pat = parse_pattern in + choice + [ (parse_annotated_rhs parse_expr "=" + >>= function + | ExpTypeAnnotation (expr, t) -> return (PatType (pat, t), expr) + | _ -> fail "expected type annotation") + ; (token "=" *> parse_expr >>| fun expr -> pat, expr) + ] +;; + +let parse_value_binding_list parse_expr = + let parse_binding = parse_fun_binding parse_expr <|> parse_simple_binding parse_expr in + sep_by1 (token2 "and") (white_space *> parse_binding) +;; + +let parse_base_expr = + choice + [ parse_expr_ident + ; parse_expr_const + ; (parse_keyword >>| fun tag -> ExpConstruct (tag, None)) + ] +;; + +let parse_expr_construct_keyword_some parse_expr = + parse_option (parse_base_expr <|> parse_parens parse_expr) + >>| fun (tag, exp_opt) -> ExpConstruct (tag, exp_opt) +;; + +let parse_expr_let parse_expr = + token "let" + *> lift4 + (fun rec_flag value_bindings and_bindings body -> + ExpLet (rec_flag, value_bindings, and_bindings, body)) + (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) <|> return NonRec) + (lift2 + (fun pat expr -> pat, expr) + parse_pattern + (token "=" *> parse_expr <|> parse_body parse_expr)) + (many + (token "and" + *> lift2 + (fun pat expr -> pat, expr) + parse_pattern + (token "=" *> parse_expr <|> parse_body parse_expr))) + (token "in" *> parse_expr) +;; + +let parse_top_expr parse_expr = + choice + [ parse_expr_let parse_expr + ; parse_expr_function parse_expr + ; parse_expr_lambda parse_expr + ; parse_expr_match parse_expr + ; parse_expr_branch parse_expr + ] +;; + +let parse_exp_apply e = + let app = parse_expr_apply e in + let app = parse_expr_unar_oper app <|> app in + let ops1 = parse_left_associative app (multiply <|> division) in + let ops2 = parse_left_associative ops1 (plus <|> minus) in + let cmp = parse_left_associative ops2 compare in + parse_left_associative cmp (and_op <|> or_op) +;; + +let parse_expr = + white_space + *> fix (fun expr -> + let term = + choice + [ parse_base_expr + ; parse_expr_construct_keyword_some expr + ; parse_parens (parse_expr_with_type expr) + ; parse_expr_construct expr + ; parse_top_expr expr + ; parse_parens expr + ] + in + let func = parse_exp_apply term <|> term in + let lst = parse_expr_list func <|> func in + let tuple = parse_expr_tuple lst <|> lst in + let seq = parse_expr_sequence tuple <|> tuple in + let lambda = parse_expr_lambda expr <|> seq in + choice + [ parse_expr_let expr + ; parse_expr_function expr + ; parse_expr_lambda expr + ; parse_expr_match expr + ; parse_expr_branch expr + ; lambda + ]) +;; + +let parse_structure = + let parse_eval = parse_expr >>| fun e -> SEval e in + let parse_value = + token "let" + *> lift2 + (fun r id_list -> SValue (r, List.hd_exn id_list, List.drop id_list 1)) + (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) <|> return NonRec) + (parse_value_binding_list parse_expr) + in + let parse_structure_item = choice [ parse_eval; parse_value ] in + parse_structure_item <* option () (token ";;" >>| ignore) +;; + +let parse_program = + let definitions_or_exprs = + white_space *> many parse_structure <* option () (token ";;" >>| ignore) + in + definitions_or_exprs <* white_space +;; + +let parse input = parse_string ~consume:All parse_program input diff --git a/EML/lib/frontend/parser.mli b/EML/lib/frontend/parser.mli new file mode 100644 index 00000000..abf45da6 --- /dev/null +++ b/EML/lib/frontend/parser.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val parse : string -> (Ast.program, string) Result.t diff --git a/EML/lib/frontend/runner.ml b/EML/lib/frontend/runner.ml new file mode 100644 index 00000000..65a5094b --- /dev/null +++ b/EML/lib/frontend/runner.ml @@ -0,0 +1,20 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Format + +type error = Parse of string + +let pp_error ppf = function + | Parse s -> fprintf ppf "Parse error: %s" s +;; + +let parse (text : string) : (program, string) Result.t = Parser.parse text + +let run (text : string) : (program, error) Result.t = + match Parser.parse text with + | Error s -> Error (Parse s) + | Ok ast -> Ok ast +;; diff --git a/EML/lib/frontend/runner.mli b/EML/lib/frontend/runner.mli new file mode 100644 index 00000000..c965527e --- /dev/null +++ b/EML/lib/frontend/runner.mli @@ -0,0 +1,11 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +type error = Parse of string + +val pp_error : Format.formatter -> error -> unit +val parse : string -> (program, string) Result.t +val run : string -> (program, error) Result.t diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml new file mode 100644 index 00000000..54a5eca3 --- /dev/null +++ b/EML/lib/middleend/anf.ml @@ -0,0 +1,312 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +open Base +open Utils.Monads.ANFMonad +open Utils.Monads.ANFMonad.Syntax + +type immediate = + | ImmediateConst of const + | ImmediateVar of ident +[@@deriving show { with_path = false }] + +type complex_expr = + | ComplexImmediate of immediate + | ComplexUnit + | ComplexBinOper of bin_oper * immediate * immediate + | ComplexUnarOper of unar_oper * immediate + | ComplexTuple of immediate * immediate * immediate list + | ComplexField of immediate * int + | ComplexList of immediate list + | ComplexOption of immediate option + | ComplexApp of immediate * immediate * immediate list + | ComplexLambda of pattern list * anf_expr + | ComplexBranch of immediate * anf_expr * anf_expr +[@@deriving show { with_path = false }] + +and anf_expr = + | AnfLet of is_rec * ident * complex_expr * anf_expr + | AnfExpr of complex_expr +[@@deriving show { with_path = false }] + +type arity = int + +let pp_arity ppf (n : arity) = Stdlib.Format.pp_print_int ppf n + +type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] +type anf_fun_bind = ident * arity * anf_expr [@@deriving show { with_path = false }] + +type anf_structure = + | AnfEval of anf_expr + | AnfValue of is_rec * anf_fun_bind * anf_fun_bind list +[@@deriving show { with_path = false }] + +type anf_program = anf_structure list [@@deriving show { with_path = false }] + +let rec anf_expr_arity = function + | AnfExpr (ComplexLambda (pat_list, body)) -> List.length pat_list + anf_expr_arity body + | AnfLet (_, _, _, body) -> anf_expr_arity body + | _ -> 0 +;; + +let optimize_anf_let (is_rec, name1, expr, body) = + match is_rec, body with + | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 + -> AnfExpr expr + | _, AnfLet (is_rec', orig_name, ComplexImmediate (ImmediateVar name2), body') + when String.equal name1 name2 -> AnfLet (is_rec', orig_name, expr, body') + | _ -> AnfLet (is_rec, name1, expr, body) +;; + +let bind_complex_expr complex_expr k = + let* var = fresh in + let* body_expr = k (ImmediateVar var) in + return (optimize_anf_let (NonRec, var, complex_expr, body_expr)) +;; + +let get_var = function + | PatVariable id -> return id + | _ -> fresh +;; + +let tuple_indices pats = List.mapi pats ~f:(fun i p -> i, p) + +let match_list_cases cases = + let is_nil = function + | PatConstruct ("[]", None) | PatList [] -> true + | _ -> false + in + let is_cons = function + | PatConstruct ("::", Some (PatTuple (_, _, []))) -> true + | _ -> false + in + let get_cons_pats = function + | PatConstruct ("::", Some (PatTuple (head_pat, tail_pat, []))) -> + Some (head_pat, tail_pat) + | _ -> None + in + match cases with + | [ (pat1, expr1); (pat2, expr2) ] when is_nil pat1 && is_cons pat2 -> + (match get_cons_pats pat2 with + | Some (head_pat, tail_pat) -> Some (expr1, head_pat, tail_pat, expr2) + | None -> None) + | [ (pat1, expr1); (pat2, expr2) ] when is_cons pat1 && is_nil pat2 -> + (match get_cons_pats pat1 with + | Some (head_pat, tail_pat) -> Some (expr2, head_pat, tail_pat, expr1) + | None -> None) + | _ -> None +;; + +let build_tuple_lets tuple_var indices_pats body = + let rec aux tuple_var indices_pats body = + match indices_pats with + | [] -> return body + | (i, pat) :: rest -> + let* bind_id = get_var pat in + let* body_with_rest = aux tuple_var rest body in + let* inner = + match pat with + | PatTuple (p1, p2, rest_pats) -> + aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest + | _ -> return body_with_rest + in + return (AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tuple_var, i), inner)) + in + aux tuple_var indices_pats body +;; + +let build_tuple_top_level_bindings tuple_var indices_pats = + let rec aux tuple_var = function + | [] -> return [] + | (i, pat) :: rest -> + let* bind_id = get_var pat in + let cur = bind_id, AnfExpr (ComplexField (ImmediateVar tuple_var, i)) in + let* inner = + match pat with + | PatTuple (p1, p2, rest_pats) -> + aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) + | _ -> return [] + in + let* rest_bindings = aux tuple_var rest in + return ((cur :: inner) @ rest_bindings) + in + aux tuple_var indices_pats +;; + +let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = + match expr with + | ExpConst c -> k (ImmediateConst c) + | ExpIdent x -> k (ImmediateVar x) + | ExpUnarOper (op, expr) -> + anf expr (fun imm -> bind_complex_expr (ComplexUnarOper (op, imm)) k) + | ExpBinOper (op, exp1, exp2) -> + anf exp1 (fun imm1 -> + anf exp2 (fun imm2 -> bind_complex_expr (ComplexBinOper (op, imm1, imm2)) k)) + | ExpBranch (cond, then_exp, else_exp_opt) -> + anf cond (fun imm_cond -> + let* then_aexp = anf_to_immediate_expr then_exp in + let* else_aexp = + match else_exp_opt with + | None -> return (AnfExpr ComplexUnit) + | Some else_exp -> anf_to_immediate_expr else_exp + in + bind_complex_expr (ComplexBranch (imm_cond, then_aexp, else_aexp)) k) + | ExpLet (flag, (pat, expr), _, body) -> + (match pat with + | PatAny | PatUnit | PatConstruct ("()", None) -> anf expr (fun _ -> anf body k) + | PatTuple (p1, p2, rest) -> + let pats = p1 :: p2 :: rest in + anf expr (fun tuple_imm -> + let* tuple_var = fresh in + let* body_anf_expr = anf body k in + let* with_lets = build_tuple_lets tuple_var (tuple_indices pats) body_anf_expr in + return (AnfLet (flag, tuple_var, ComplexImmediate tuple_imm, with_lets))) + | PatVariable _ | PatConst _ -> + anf expr (fun imm -> + let* body_anf_expr = anf body k in + let* var = get_var pat in + return (AnfLet (flag, var, ComplexImmediate imm, body_anf_expr))) + | _ -> fail "Complex patterns in let not supported") + | ExpApply (exp1, exp2) -> + let func, args_list = + let rec collect_args acc = function + | ExpApply (f, arg) -> collect_args (arg :: acc) f + | f -> f, acc + in + collect_args [] (ExpApply (exp1, exp2)) + in + anf func (fun immediate_func -> + anf_list args_list (function + | arg1 :: arg_tl -> + bind_complex_expr (ComplexApp (immediate_func, arg1, arg_tl)) k + | [] -> fail "application with no arguments")) + | ExpTuple (exp1, exp2, exp_list) -> + let all_exprs = exp1 :: exp2 :: exp_list in + anf_list all_exprs (function + | imm1 :: imm2 :: rest -> bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k + | _ -> fail "Invalid tuple") + | ExpLambda (pat, pat_list, body) -> + let params = pat :: pat_list in + let* body_anf_expr = anf_to_immediate_expr body in + let rec wrap_params current_body = function + | [] -> return current_body + | ((PatVariable _ | PatConst _) as param) :: remaining_params -> + let* body_with_rest = wrap_params current_body remaining_params in + return (AnfExpr (ComplexLambda ([ param ], body_with_rest))) + | (PatAny | PatUnit | PatConstruct ("()", None)) :: remaining_params -> + let* body_with_rest = wrap_params current_body remaining_params in + let* ignored = fresh in + return (AnfExpr (ComplexLambda ([ PatVariable ignored ], body_with_rest))) + | PatType (inner_pat, _) :: remaining_params -> + wrap_params current_body (inner_pat :: remaining_params) + | PatTuple (p1, p2, rest_pats) :: remaining_params -> + let* body_with_rest = wrap_params current_body remaining_params in + let* var = fresh in + let* body_with_tuple_destructured = + build_tuple_lets var (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest + in + return + (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) + | _ -> fail "Only variable, constant and tuple patterns in lambda" + in + let* lambda_anf = wrap_params body_anf_expr params in + (match lambda_anf with + | AnfExpr (ComplexLambda (pats, body)) -> + bind_complex_expr (ComplexLambda (pats, body)) k + | _ -> fail "ExpLambda: wrap_params must return ComplexLambda") + | ExpConstruct ("()", None) -> bind_complex_expr ComplexUnit k + | ExpTypeAnnotation (e, _) -> anf e k + | ExpList exprs -> + anf_list exprs (fun imm_list -> bind_complex_expr (ComplexList imm_list) k) + | ExpOption None -> bind_complex_expr ComplexUnit k + | ExpOption (Some e) -> anf e k + | ExpMatch (scrut, first_case, rest_cases) -> + (match match_list_cases (first_case :: rest_cases) with + | Some (nil_expr, head_pat, tail_pat, cons_expr) -> + anf scrut (fun scrut_imm -> + let* scrut_var = fresh in + let* cond_var = fresh in + let* nil_aexp = anf_to_immediate_expr nil_expr in + let* cons_aexp_base = anf_to_immediate_expr cons_expr in + let* cons_aexp = + build_tuple_lets + scrut_var + (tuple_indices [ head_pat; tail_pat ]) + cons_aexp_base + in + let* branch_result = + bind_complex_expr + (ComplexBranch (ImmediateVar cond_var, nil_aexp, cons_aexp)) + k + in + return + (AnfLet + ( NonRec + , scrut_var + , ComplexImmediate scrut_imm + , AnfLet + ( NonRec + , cond_var + , ComplexBinOper + (Equal, ImmediateVar scrut_var, ImmediateConst (ConstInt 0)) + , branch_result ) ))) + | None -> fail "Only list match with [] and h::tl is supported") + | ExpFunction _ -> fail "Match/function cases not implemented" + | ExpConstruct ("[]", None) -> k (ImmediateConst (ConstInt 0)) + | ExpConstruct ("::", Some (ExpTuple (head_e, tail_e, []))) -> + anf head_e (fun head_imm -> + anf tail_e (fun tail_imm -> + bind_complex_expr (ComplexTuple (head_imm, tail_imm, [])) k)) + | ExpConstruct _ -> fail "Constructors not implemented" + +and anf_to_immediate_expr expr = + anf expr (fun imm -> return (AnfExpr (ComplexImmediate imm))) + +and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = + match exprs with + | [] -> k [] + | hd :: tl -> + anf hd (fun immediate_hd -> + anf_list tl (fun immediate_tl -> k (immediate_hd :: immediate_tl))) +;; + +let to_fun_bind (id, e) = id, anf_expr_arity e, e + +let anf_structure_item (item : structure) : anf_structure list t = + match item with + | SEval expr -> + let* result = anf_to_immediate_expr expr in + return [ AnfEval result ] + | SValue (rec_flag, (pat, expr), binds) -> + let bindings = (pat, expr) :: binds in + List.fold_left bindings ~init:(return []) ~f:(fun acc (pat, expr) -> + let* acc_list = acc in + let* anf_expr_body = anf_to_immediate_expr expr in + match pat with + | PatTuple (p1, p2, rest) -> + let* tuple_var = fresh in + let* component_bindings = + build_tuple_top_level_bindings tuple_var (tuple_indices (p1 :: p2 :: rest)) + in + let one_value (id, e) = AnfValue (NonRec, to_fun_bind (id, e), []) in + let new_items = + AnfValue (rec_flag, to_fun_bind (tuple_var, anf_expr_body), []) + :: List.map component_bindings ~f:one_value + in + return (acc_list @ new_items) + | _ -> + let* var = get_var pat in + return (acc_list @ [ AnfValue (rec_flag, to_fun_bind (var, anf_expr_body), []) ])) +;; + +let anf_program (ast : program) : (anf_program, string) Result.t = + let program' = + List.fold_left ast ~init:(return []) ~f:(fun acc item -> + let* acc_list = acc in + let* item_anf = anf_structure_item item in + return (acc_list @ item_anf)) + in + run program' +;; diff --git a/EML/lib/middleend/anf.mli b/EML/lib/middleend/anf.mli new file mode 100644 index 00000000..737acee6 --- /dev/null +++ b/EML/lib/middleend/anf.mli @@ -0,0 +1,42 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +type immediate = + | ImmediateConst of const + | ImmediateVar of ident +[@@deriving show { with_path = false }] + +type complex_expr = + | ComplexImmediate of immediate + | ComplexUnit + | ComplexBinOper of bin_oper * immediate * immediate + | ComplexUnarOper of unar_oper * immediate + | ComplexTuple of immediate * immediate * immediate list + | ComplexField of immediate * int + | ComplexList of immediate list + | ComplexOption of immediate option + | ComplexApp of immediate * immediate * immediate list + | ComplexLambda of pattern list * anf_expr + | ComplexBranch of immediate * anf_expr * anf_expr +[@@deriving show { with_path = false }] + +and anf_expr = + | AnfLet of is_rec * ident * complex_expr * anf_expr + | AnfExpr of complex_expr +[@@deriving show { with_path = false }] + +type arity = int +type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] +type anf_fun_bind = ident * arity * anf_expr [@@deriving show { with_path = false }] + +type anf_structure = + | AnfEval of anf_expr + | AnfValue of is_rec * anf_fun_bind * anf_fun_bind list +[@@deriving show { with_path = false }] + +type anf_program = anf_structure list [@@deriving show { with_path = false }] + +val anf_program : program -> (anf_program, string) Result.t diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml new file mode 100644 index 00000000..9d4d4b7f --- /dev/null +++ b/EML/lib/middleend/anf_pp.ml @@ -0,0 +1,142 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Stdlib.Format +open Frontend +open Ast +open Anf +open Utils.Pretty_printer + +let pp_ty = Frontend.Ast.pp_ty + +let rec pp_immediate fmt = function + | ImmediateConst c -> + (match c with + | ConstInt n -> fprintf fmt "%d" n + | ConstBool b -> fprintf fmt "%b" b + | ConstString s -> fprintf fmt "%S" s + | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) + | ImmediateVar x -> fprintf fmt "%s" x + +and pp_complex_expr fmt = function + | ComplexImmediate imm -> pp_immediate fmt imm + | ComplexUnit -> fprintf fmt "()" + | ComplexField (imm, i) -> fprintf fmt "%a.%d" pp_immediate imm i + | ComplexBinOper (op, e1, e2) -> + let op_str = string_of_bin_op op in + fprintf fmt "(%a %s %a)" pp_immediate e1 op_str pp_immediate e2 + | ComplexUnarOper (op, e) -> + let op_str = string_of_unary_op op in + fprintf fmt "(%s %a)" op_str pp_immediate e + | ComplexTuple (e1, e2, rest) -> + let all_exprs = e1 :: e2 :: rest in + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) + all_exprs + | ComplexList exprs -> + fprintf + fmt + "[%a]" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) + exprs + | ComplexOption None -> fprintf fmt "None" + | ComplexOption (Some e) -> fprintf fmt "Some %a" pp_immediate e + | ComplexApp (f, arg, args) -> + let all_args = arg :: args in + fprintf + fmt + "%a %a" + pp_immediate + f + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_immediate) + all_args + | ComplexLambda (patterns, body) -> + let rec pp_pattern fmt = function + | PatVariable x -> fprintf fmt "%s" x + | PatConst c -> + (match c with + | ConstInt n -> fprintf fmt "%d" n + | ConstBool b -> fprintf fmt "%b" b + | ConstString s -> fprintf fmt "%S" s + | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) + | PatTuple (p1, p2, rest) -> + let all_pats = p1 :: p2 :: rest in + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) + all_pats + | PatAny -> fprintf fmt "_" + | PatType (p, t) -> fprintf fmt "%a : %a" pp_pattern p pp_ty t + | PatUnit -> fprintf fmt "()" + | PatList pats -> + fprintf + fmt + "[%a]" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) + pats + | PatOption None -> fprintf fmt "None" + | PatOption (Some p) -> fprintf fmt "Some %a" pp_pattern p + | PatConstruct (name, opt) -> + (match opt with + | None -> fprintf fmt "%s" name + | Some p -> fprintf fmt "%s %a" name pp_pattern p) + in + fprintf + fmt + "fun %a -> %a" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_pattern) + patterns + pp_anf_expr + body + | ComplexBranch (cond, then_expr, else_expr) -> + fprintf + fmt + "if %a then %a else %a" + pp_immediate + cond + pp_anf_expr + then_expr + pp_anf_expr + else_expr + +and pp_anf_expr fmt = function + | AnfLet (rf, name, v, body) -> + let rec_flag = + match rf with + | Rec -> "rec " + | NonRec -> "" + in + fprintf fmt "let %s%s = %a in@ %a" rec_flag name pp_complex_expr v pp_anf_expr body + | AnfExpr e -> pp_complex_expr fmt e + +and pp_anf_fun_bind fmt (name, _arity, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr + +and pp_anf_structure fmt = function + | AnfEval expr -> fprintf fmt "%a" pp_anf_expr expr + | AnfValue (rf, bind, binds) -> + let rec_flag = + match rf with + | Rec -> "rec " + | NonRec -> "" + in + let all_binds = bind :: binds in + fprintf + fmt + "let %s%a" + rec_flag + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@ and ") pp_anf_fun_bind) + all_binds + +and pp_anf_program fmt program = + fprintf + fmt + "%a" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@\n\n") pp_anf_structure) + program +;; + +let anf_to_string anf_program = Stdlib.Format.asprintf "%a" pp_anf_program anf_program diff --git a/EML/lib/middleend/anf_pp.mli b/EML/lib/middleend/anf_pp.mli new file mode 100644 index 00000000..c45191a6 --- /dev/null +++ b/EML/lib/middleend/anf_pp.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val pp_anf_program : Format.formatter -> Anf.anf_program -> unit +val anf_to_string : Anf.anf_program -> string diff --git a/EML/lib/middleend/cc.ml b/EML/lib/middleend/cc.ml new file mode 100644 index 00000000..0d0489f3 --- /dev/null +++ b/EML/lib/middleend/cc.ml @@ -0,0 +1,352 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Frontend.Ast +open Runtime.Primitives +module VarSet = Set.Make (String) +module EnvMap = Map.Make (String) + +let union_map_list f list = + List.fold_left (fun acc x -> VarSet.union acc (f x)) VarSet.empty list +;; + +let var_set_of_list lst = List.fold_left (fun s x -> VarSet.add x s) VarSet.empty lst + +let vars_in_pattern p = + let rec walk = function + | PatAny -> VarSet.empty + | PatVariable x -> VarSet.singleton x + | PatConst _ -> VarSet.empty + | PatTuple (p1, p2, rest) -> union_map_list walk (p1 :: p2 :: rest) + | PatConstruct (_, None) -> VarSet.empty + | PatConstruct (_, Some q) -> walk q + | PatType (q, _) -> walk q + | PatUnit | PatList _ | PatOption _ -> VarSet.empty + in + walk p +;; + +let rec collect_free_vars = function + | ExpIdent v -> VarSet.singleton v + | ExpConst _ -> VarSet.empty + | ExpLet (flag, (pat, exp), binds, body) -> + let all_binds = (pat, exp) :: binds in + let bound_vars = union_map_list (fun (p, _) -> vars_in_pattern p) all_binds in + let free_vars_in_rhs = + match flag with + | Rec -> + union_map_list + (fun (_, e) -> VarSet.diff (collect_free_vars e) bound_vars) + all_binds + | NonRec -> union_map_list (fun (_, e) -> collect_free_vars e) all_binds + in + VarSet.union free_vars_in_rhs (VarSet.diff (collect_free_vars body) bound_vars) + | ExpLambda (pat, pats, exp) -> + let bound_vars = union_map_list vars_in_pattern (pat :: pats) in + VarSet.diff (collect_free_vars exp) bound_vars + | ExpApply (e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) + | ExpFunction ((pat, exp), cases) -> + let one (p, e) = VarSet.diff (collect_free_vars e) (vars_in_pattern p) in + union_map_list one ((pat, exp) :: cases) + | ExpMatch (e, (pat, branch), cases) -> + let one (p, e) = VarSet.diff (collect_free_vars e) (vars_in_pattern p) in + let in_branches = union_map_list one ((pat, branch) :: cases) in + VarSet.union (collect_free_vars e) in_branches + | ExpBranch (cond, then_e, else_opt) -> + union_map_list + collect_free_vars + (cond + :: then_e + :: + (match else_opt with + | None -> [] + | Some e -> [ e ])) + | ExpTuple (e1, e2, rest) -> union_map_list collect_free_vars (e1 :: e2 :: rest) + | ExpConstruct (_, None) -> VarSet.empty + | ExpConstruct (_, Some e) -> collect_free_vars e + | ExpTypeAnnotation (e, _) -> collect_free_vars e + | ExpBinOper (_, e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) + | ExpUnarOper (_, e) -> collect_free_vars e + | ExpList es -> union_map_list collect_free_vars es + | ExpOption e_opt -> + (match e_opt with + | None -> VarSet.empty + | Some e -> collect_free_vars e) +;; + +type context = + { globals : VarSet.t + ; env : VarSet.t EnvMap.t + } + +let with_globals ctx g = { ctx with globals = g } +let with_env ctx e = { ctx with env = e } + +type error = LambdaWithoutParameters + +let pp_error ppf = function + | LambdaWithoutParameters -> fprintf ppf "closure_conversion: lambda without parameters" +;; + +type 'a t = context -> ('a, error) Result.t + +let return (value : 'a) : 'a t = fun _ -> Ok value +let fail (error : error) : 'a t = fun _ -> Error error + +let bind (computation : 'a t) (next : 'a -> 'b t) : 'b t = + fun ctx -> + match computation ctx with + | Ok a -> next a ctx + | Error e -> Error e +;; + +let ask = fun ctx -> Ok ctx +let local f m = fun ctx -> m (f ctx) +let run ctx m = m ctx +let ( let* ) = bind + +let of_result = function + | Ok x -> return x + | Error e -> fail e +;; + +let extend_capture_env env pat captured_set = + let rec add_captures_for_pat acc = function + | PatAny | PatConst _ | PatConstruct (_, None) -> acc + | PatVariable name -> EnvMap.add name captured_set acc + | PatTuple (p1, p2, rest) -> + let acc = add_captures_for_pat acc p1 in + let acc = add_captures_for_pat acc p2 in + List.fold_left add_captures_for_pat acc rest + | PatConstruct (_, Some p) | PatType (p, _) -> add_captures_for_pat acc p + | PatUnit | PatList _ | PatOption _ -> acc + in + add_captures_for_pat env pat +;; + +let rec build_closure ~apply param_list body_ast captured_ids = + let* body_ast' = convert_expr body_ast in + let make_lam first rest = ExpLambda (first, rest, body_ast') in + match param_list with + | [] -> fail LambdaWithoutParameters + | first :: rest_params -> + if VarSet.is_empty captured_ids + then return (make_lam first rest_params) + else ( + let captured_list = VarSet.elements captured_ids in + let all_params = + List.map (fun id -> PatVariable id) captured_list @ (first :: rest_params) + in + let lam = make_lam (List.hd all_params) (List.tl all_params) in + return + (if apply + then List.fold_left (fun t id -> ExpApply (t, ExpIdent id)) lam captured_list + else lam)) + +and convert_expr = function + | ExpIdent id -> + let* current_ctx = ask in + return + (try + let env_fvs = EnvMap.find id current_ctx.env in + List.fold_left + (fun t fv -> ExpApply (t, ExpIdent fv)) + (ExpIdent id) + (VarSet.elements env_fvs) + with + | Not_found -> ExpIdent id) + | ExpConst c -> return (ExpConst c) + | ExpLet (flag, (pat, exp), more, body) -> + let* (pat', exp'), rest_binds, body_ctx = convert_let_bindings flag (pat, exp) more in + let* body' = local (fun _ -> body_ctx) (convert_expr body) in + return (ExpLet (flag, (pat', exp'), rest_binds, body')) + | ExpLambda (pat, pats, body) as lam -> + let* current_ctx = ask in + let param_list = pat :: pats in + let captured = VarSet.diff (collect_free_vars lam) current_ctx.globals in + build_closure ~apply:true param_list body captured + | ExpApply (f, arg) -> + let* f' = convert_expr f in + let* arg' = convert_expr arg in + return (ExpApply (f', arg')) + | ExpFunction ((pat, exp), cases) -> + let* first_exp = convert_expr exp in + let* rest_cases = + List.fold_right + (fun (p, e) acc -> + let* e' = convert_expr e in + let* rest = acc in + return ((p, e') :: rest)) + cases + (return []) + in + return (ExpFunction ((pat, first_exp), rest_cases)) + | ExpMatch (e, (pat, branch), cases) -> + let* scrutinee' = convert_expr e in + let* branch' = convert_expr branch in + let* rest_cases = + List.fold_right + (fun (p, e) acc -> + let* e' = convert_expr e in + let* rest = acc in + return ((p, e') :: rest)) + cases + (return []) + in + return (ExpMatch (scrutinee', (pat, branch'), rest_cases)) + | ExpBranch (cond, then_e, else_opt) -> + let* cond' = convert_expr cond in + let* then_e' = convert_expr then_e in + let* else_e' = + match else_opt with + | None -> return None + | Some e -> + let* e' = convert_expr e in + return (Some e') + in + return (ExpBranch (cond', then_e', else_e')) + | ExpTuple (e1, e2, rest) -> + let* e1' = convert_expr e1 in + let* e2' = convert_expr e2 in + let* rest' = + List.fold_right + (fun e acc -> + let* e' = convert_expr e in + let* rest_acc = acc in + return (e' :: rest_acc)) + rest + (return []) + in + return (ExpTuple (e1', e2', rest')) + | ExpConstruct (_, None) as e -> return e + | ExpConstruct (tag, Some e) -> + let* e' = convert_expr e in + return (ExpConstruct (tag, Some e')) + | ExpTypeAnnotation (e, typ) -> + let* e' = convert_expr e in + return (ExpTypeAnnotation (e', typ)) + | ExpBinOper (op, e1, e2) -> + let* e1' = convert_expr e1 in + let* e2' = convert_expr e2 in + return (ExpBinOper (op, e1', e2')) + | ExpUnarOper (op, e) -> + let* e' = convert_expr e in + return (ExpUnarOper (op, e')) + | ExpList es -> + let* es' = + List.fold_right + (fun e acc -> + let* e' = convert_expr e in + let* acc' = acc in + return (e' :: acc')) + es + (return []) + in + return (ExpList es') + | ExpOption e_opt -> + (match e_opt with + | None -> return (ExpOption None) + | Some e -> + let* e' = convert_expr e in + return (ExpOption (Some e'))) + +and convert_let_bindings rec_flag (pat, exp) rest_binds = + let* current_ctx = ask in + let bind_group = (pat, exp) :: rest_binds in + let bound_ids = union_map_list (fun (p, _) -> vars_in_pattern p) bind_group in + match rec_flag with + | Rec -> + let globals' = VarSet.union current_ctx.globals bound_ids in + let group_captured = + union_map_list (fun (_, e) -> VarSet.diff (collect_free_vars e) globals') bind_group + in + let env' = + List.fold_left + (fun acc (p, _) -> extend_capture_env acc p group_captured) + current_ctx.env + bind_group + in + let rec_group_ctx = with_env (with_globals current_ctx globals') env' in + let rec loop acc = function + | [] -> return (List.rev acc) + | (p, e) :: rest -> + let fvs = VarSet.diff group_captured (vars_in_pattern p) in + let res = + match e with + | ExpLambda (lam_pat, lam_pats, body) -> + run rec_group_ctx (build_closure ~apply:false (lam_pat :: lam_pats) body fvs) + | _ -> run rec_group_ctx (convert_expr e) + in + let* e' = of_result res in + loop ((p, e') :: acc) rest + in + let* transformed_binds = loop [] bind_group in + return (List.hd transformed_binds, List.tl transformed_binds, rec_group_ctx) + | NonRec -> + let rec loop env_acc rev_binds = function + | [] -> + let transformed_binds = List.rev rev_binds in + return + ( List.hd transformed_binds + , List.tl transformed_binds + , with_env current_ctx env_acc ) + | (p, e) :: rest -> + let captured = VarSet.diff (collect_free_vars e) current_ctx.globals in + let ctx_with_env = { current_ctx with env = env_acc } in + let res = + match e with + | ExpLambda (lam_pat, lam_pats, body) -> + run + ctx_with_env + (build_closure ~apply:false (lam_pat :: lam_pats) body captured) + | _ -> run ctx_with_env (convert_expr e) + in + let* e' = of_result res in + let env_next = + match e with + | ExpLambda _ -> extend_capture_env env_acc p captured + | _ -> env_acc + in + loop env_next ((p, e') :: rev_binds) rest + in + loop current_ctx.env [] bind_group +;; + +let convert_item = function + | SEval expr -> + let* e' = convert_expr expr in + let* current_ctx = ask in + return (current_ctx.globals, SEval e') + | SValue (rec_flag, (pat, expr), and_binds) -> + let* (pat', expr'), rest_binds, after_binds_ctx = + convert_let_bindings rec_flag (pat, expr) and_binds + in + let bound_ids = + union_map_list (fun (p, _) -> vars_in_pattern p) ((pat, expr) :: and_binds) + in + return + ( VarSet.union after_binds_ctx.globals bound_ids + , SValue (rec_flag, (pat', expr'), rest_binds) ) +;; + +let builtin_globals = + var_set_of_list (List.map (fun f -> f.name) predefined_runtime_funcs) +;; + +let initial_context = { globals = builtin_globals; env = EnvMap.empty } + +let closure_conversion_result (program : Frontend.Ast.program) + : (structure list, error) Result.t + = + let rec convert_items rev_acc item_ctx = function + | [] -> Ok (List.rev rev_acc) + | item :: tail -> + (match run item_ctx (convert_item item) with + | Ok (globals', item') -> + convert_items (item' :: rev_acc) { globals = globals'; env = EnvMap.empty } tail + | Error e -> Error e) + in + convert_items [] initial_context program +;; diff --git a/EML/lib/middleend/cc.mli b/EML/lib/middleend/cc.mli new file mode 100644 index 00000000..0d1fec45 --- /dev/null +++ b/EML/lib/middleend/cc.mli @@ -0,0 +1,11 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type error = LambdaWithoutParameters + +val pp_error : Format.formatter -> error -> unit + +val closure_conversion_result + : Frontend.Ast.program + -> (Frontend.Ast.program, error) Result.t diff --git a/EML/lib/middleend/inferencer.ml b/EML/lib/middleend/inferencer.ml new file mode 100644 index 00000000..ebe7b073 --- /dev/null +++ b/EML/lib/middleend/inferencer.ml @@ -0,0 +1,854 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(* Template: https://gitlab.com/Kakadu/fp2020course-materials/-/tree/master/code/miniml?ref_type=heads*) + +(* Inference state is purely immutable: no Hashtbl, no [ref] or [mutable]. We use + [Map] (tree-like) for [var_levels] and thread state through the monad. *) + +open Base +open Frontend.Ast +open Stdlib.Format + +type error = + | OccursCheck of string * ty + | NoVariable of string + | UnificationFailed of ty * ty + | SeveralBounds of string + | LHS of string + | RHS of string + | UnexpectedFunction of ty + +let pp_error fmt = function + | OccursCheck (id, ty) -> + fprintf fmt "Occurs check failed. Type variable '%s' occurs inside %a." id pp_ty ty + | NoVariable name -> fprintf fmt "Unbound variable '%s'." name + | UnificationFailed (ty1, ty2) -> + fprintf fmt "Failed to unify types: %a and %a." pp_ty ty1 pp_ty ty2 + | SeveralBounds name -> fprintf fmt "Multiple bounds for variable '%s'." name + | LHS msg -> fprintf fmt "Left-hand side error: %s." msg + | RHS msg -> fprintf fmt "Right-hand side error: %s." msg + | UnexpectedFunction ty1 -> fprintf fmt "UnexpectedFunction error: %a" pp_ty ty1 +;; + +module VarSet = struct + include Stdlib.Set.Make (String) +end + +module ResultMonad : sig + type 'a t + + val return : 'a -> 'a t + val fail : error -> 'a t + + include Monad.Infix with type 'a t := 'a t + + module Syntax : sig + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + end + + val fresh : int t + val current_level : int t + val enter_level : unit t + val leave_level : unit t + val set_var_level : string -> int -> unit t + val get_var_level : string -> int option t + val run : 'a t -> ('a, error) Result.t + + module RMap : sig + val fold : ('a, 'b, 'c) Map.t -> init:'d t -> f:('a -> 'b -> 'd -> 'd t) -> 'd t + end +end = struct + type state = + { counter : int + ; current_level : int + ; var_levels : (string, int, String.comparator_witness) Map.t + } + + type 'a t = state -> state * ('a, error) Result.t + + let ( >>= ) m f state = + let last, r = m state in + match r with + | Result.Error x -> last, Result.fail x + | Result.Ok a -> f a last + ;; + + let return x last = last, Result.return x + let fail e st = st, Result.fail e + + let ( >>| ) m f st = + match m st with + | st, Ok x -> st, Result.return (f x) + | st, Result.Error e -> st, Result.fail e + ;; + + module Syntax = struct + let ( let* ) = ( >>= ) + end + + module RMap = struct + let fold map ~init ~f = + Map.fold map ~init ~f:(fun ~key ~data acc -> + let open Syntax in + let* acc = acc in + f key data acc) + ;; + end + + let fresh : int t = + fun st -> { st with counter = st.counter + 1 }, Result.return st.counter + ;; + + let current_level : int t = fun st -> st, Result.return st.current_level + + let enter_level : unit t = + fun st -> { st with current_level = st.current_level + 1 }, Result.return () + ;; + + let leave_level : unit t = + fun st -> { st with current_level = max 0 (st.current_level - 1) }, Result.return () + ;; + + let set_var_level var lvl : unit t = + fun st -> + { st with var_levels = Map.set st.var_levels ~key:var ~data:lvl }, Result.return () + ;; + + let get_var_level var : int option t = + fun st -> st, Result.return (Map.find st.var_levels var) + ;; + + let run monad = + snd (monad { counter = 0; current_level = 0; var_levels = Map.empty (module String) }) + ;; +end + +module Type = struct + let rec occurs_in var = function + | TyVar b -> String.equal b var + | TyArrow (left, right) -> occurs_in var left || occurs_in var right + | TyTuple types -> List.exists types ~f:(occurs_in var) + | TyList ty -> occurs_in var ty + | TyOption ty -> occurs_in var ty + | TyPrim _ -> false + ;; + + let free_vars = + let rec helper acc = function + | TyVar b -> VarSet.add b acc + | TyArrow (left, right) -> helper (helper acc left) right + | TyTuple types -> List.fold_left types ~init:acc ~f:helper + | TyList ty -> helper acc ty + | TyOption ty -> helper acc ty + | TyPrim _ -> acc + in + helper VarSet.empty + ;; +end + +module Substitution : sig + type t + + val empty : t + val singleton : string -> ty -> t ResultMonad.t + val remove : t -> string -> t + val apply : t -> ty -> ty + val unify : ty -> ty -> t ResultMonad.t + val compose : t -> t -> t ResultMonad.t + val compose_all : t list -> t ResultMonad.t +end = struct + open ResultMonad + open ResultMonad.Syntax + + type t = (string, ty, String.comparator_witness) Map.t + + let empty = Map.empty (module String) + + let mapping key value = + if Type.occurs_in key value + then fail (OccursCheck (key, value)) + else + let* key_lvl = get_var_level key in + let vars = Type.free_vars value |> VarSet.elements in + let* () = + match key_lvl with + | None -> return () + | Some key_lvl -> + List.fold_left vars ~init:(return ()) ~f:(fun acc v -> + let* () = acc in + let* v_lvl = get_var_level v in + match v_lvl with + | Some v_lvl when v_lvl > key_lvl -> set_var_level v key_lvl + | _ -> return ()) + in + return (key, value) + ;; + + let singleton key value = + match value with + | TyVar v when String.equal v key -> return empty + | _ -> + let* key, value = mapping key value in + return (Map.singleton (module String) key value) + ;; + + let find = Map.find + let remove = Map.remove + + let apply subst = + let rec helper = function + | TyPrim x -> TyPrim x + | TyVar b as ty -> + (match find subst b with + | None -> ty + | Some x -> x) + | TyArrow (left, right) -> TyArrow (helper left, helper right) + | TyList ty -> TyList (helper ty) + | TyOption ty -> TyOption (helper ty) + | TyTuple types -> TyTuple (List.map ~f:helper types) + in + helper + ;; + + let rec unify left right = + match left, right with + | TyPrim l, TyPrim r when String.equal l r -> return empty + | TyPrim _, TyPrim _ -> fail (UnificationFailed (left, right)) + | TyVar l, TyVar r when String.equal l r -> return empty + | TyVar b, ty | ty, TyVar b -> singleton b ty + | TyArrow (left1, right1), TyArrow (left2, right2) -> + let* subst1 = unify left1 left2 in + let* subst2 = unify (apply subst1 right1) (apply subst1 right2) in + compose subst1 subst2 + | TyTuple types1, TyTuple types2 -> + if List.length types1 <> List.length types2 + then fail (UnificationFailed (left, right)) + else ( + let rec unify_tuples subst types1 types2 = + match types1, types2 with + | [], [] -> return subst + | t1 :: rest1, t2 :: rest2 -> + let* subst' = unify (apply subst t1) (apply subst t2) in + let* composed_subst = compose subst subst' in + unify_tuples composed_subst rest1 rest2 + | _, _ -> fail (UnificationFailed (left, right)) + in + unify_tuples empty types1 types2) + | TyList ty1, TyList ty2 -> unify ty1 ty2 + | TyOption ty1, TyOption ty2 -> unify ty1 ty2 + | _ -> fail (UnificationFailed (left, right)) + + and extend key value subst = + match find subst key with + | None -> + let value = apply subst value in + let* subst2 = singleton key value in + RMap.fold subst ~init:(return subst2) ~f:(fun key value acc -> + let value = apply subst2 value in + let* key, value = mapping key value in + return (Map.update acc key ~f:(fun _ -> value))) + | Some value2 -> + let* subst2 = unify value value2 in + compose subst subst2 + + and compose subst1 subst2 = RMap.fold subst2 ~init:(return subst1) ~f:extend + + let compose_all = + List.fold_left ~init:(return empty) ~f:(fun acc subst -> + let* acc = acc in + compose acc subst) + ;; +end + +module Scheme = struct + type t = Scheme of VarSet.t * ty + + let free_vars (Scheme (vars, ty)) = VarSet.diff (Type.free_vars ty) vars + + let apply subst (Scheme (vars, ty)) = + let subst2 = + VarSet.fold (fun key subst -> Substitution.remove subst key) vars subst + in + Scheme (vars, Substitution.apply subst2 ty) + ;; +end + +module TypeEnv = struct + type t = (ident, Scheme.t, String.comparator_witness) Map.t + + let extend env key value = Map.update env key ~f:(fun _ -> value) + + let free_vars : t -> VarSet.t = + Map.fold ~init:VarSet.empty ~f:(fun ~key:_ ~data:scheme acc -> + VarSet.union acc (Scheme.free_vars scheme)) + ;; + + let apply subst env = Map.map env ~f:(Scheme.apply subst) + let find = Map.find + + let initial_env = + let open Base.Map in + empty (module String) + |> set + ~key:"print_int" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "int", TyPrim "unit"))) + |> set + ~key:"print_endline" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "string", TyPrim "unit"))) + |> set + ~key:"print_bool" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "bool", TyPrim "unit"))) + ;; + + let env_with_gc = + let open Base.Map in + initial_env + |> set + ~key:"collect" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "unit"))) + |> set + ~key:"print_gc_status" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "unit"))) + |> set + ~key:"get_heap_start" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "int"))) + |> set + ~key:"get_heap_final" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "int"))) + ;; +end + +open ResultMonad +open ResultMonad.Syntax + +let fresh_var = + let* n = fresh in + let* lvl = current_level in + let name = "t" ^ Int.to_string n in + let* () = set_var_level name lvl in + return (TyVar name) +;; + +let instantiate : Scheme.t -> ty ResultMonad.t = + fun (Scheme (vars, ty)) -> + VarSet.fold + (fun var typ -> + let* typ = typ in + let* fresh_ty = fresh_var in + let* subst = Substitution.singleton var fresh_ty in + return (Substitution.apply subst typ)) + vars + (return ty) +;; + +let generalize _env ty = + let* lvl = current_level in + let vars = Type.free_vars ty |> VarSet.elements in + let* generic = + List.fold_left vars ~init:(return VarSet.empty) ~f:(fun acc v -> + let* acc = acc in + let* v_lvl = get_var_level v in + match v_lvl with + | Some v_lvl when v_lvl > lvl -> return (VarSet.add v acc) + | _ -> return acc) + in + return (Scheme.Scheme (generic, ty)) +;; + +let infer_const = function + | ConstInt _ -> TyPrim "int" + | ConstBool _ -> TyPrim "bool" + | ConstString _ -> TyPrim "string" + | ConstChar _ -> TyPrim "char" +;; + +let rec infer_pattern env = function + | PatAny -> + let* fresh = fresh_var in + return (Substitution.empty, fresh, env) + | PatConst const -> return (Substitution.empty, infer_const const, env) + | PatVariable var -> + let* fresh = fresh_var in + let env = TypeEnv.extend env var (Scheme.Scheme (VarSet.empty, fresh)) in + return (Substitution.empty, fresh, env) + | PatTuple (first_pat, second_pat, rest_pats) -> + let* sub_first, type_first, env_first = infer_pattern env first_pat in + let updated_env_second = TypeEnv.apply sub_first env_first in + let* sub_second, type_second, env_second = + infer_pattern updated_env_second second_pat + in + let process_remaining_patterns acc pat = + let open ResultMonad.Syntax in + let* current_sub, types, current_env = acc in + let* sub_new, type_new, env_new = infer_pattern current_env pat in + let* combined_sub = Substitution.compose current_sub sub_new in + return (combined_sub, type_new :: types, env_new) + in + let initial_state = return (sub_second, [ type_second; type_first ], env_second) in + let* final_sub, collected_types, final_env = + List.fold_left rest_pats ~init:initial_state ~f:process_remaining_patterns + in + let tuple_type = TyTuple (List.rev collected_types) in + return (final_sub, tuple_type, final_env) + | PatList pats -> + let* fresh_el_type = fresh_var in + let* final_sub, final_env = + List.fold_left + pats + ~init:(return (Substitution.empty, env)) + ~f:(fun acc pat -> + let open ResultMonad.Syntax in + let* sub_acc, env_acc = acc in + let* sub_cur, el_type, env_cur = infer_pattern env_acc pat in + let* unified_sub = Substitution.compose sub_acc sub_cur in + let* final_sub = + Substitution.unify (Substitution.apply sub_cur fresh_el_type) el_type + in + let* combined_sub = Substitution.compose unified_sub final_sub in + return (combined_sub, TypeEnv.apply final_sub env_cur)) + in + return (final_sub, TyList (Substitution.apply final_sub fresh_el_type), final_env) + | PatOption opt -> + let* sub, typ, env = + match opt with + | None -> + let* fresh = fresh_var in + return (Substitution.empty, fresh, env) + | Some p -> infer_pattern env p + in + return (sub, TyOption typ, env) + | PatType (pat, annotated_ty) -> + let* subst, inferred_ty, env = infer_pattern env pat in + let* unified_subst = Substitution.unify inferred_ty annotated_ty in + let* total_subst = Substitution.compose subst unified_subst in + return + ( total_subst + , Substitution.apply total_subst annotated_ty + , TypeEnv.apply total_subst env ) + | PatUnit -> return (Substitution.empty, TyPrim "unit", env) + | PatConstruct (name, opt) -> + (match name, opt with + | "()", None -> return (Substitution.empty, TyPrim "unit", env) + | "None", None -> + let* fresh = fresh_var in + return (Substitution.empty, TyOption fresh, env) + | "Some", Some p -> + let* sub, typ, env' = infer_pattern env p in + return (sub, TyOption typ, env') + | "[]", None -> + let* fresh = fresh_var in + return (Substitution.empty, TyList fresh, env) + | "::", Some (PatTuple (_, _, []) as pair_pat) -> + let* sub_pair, ty_pair, env' = infer_pattern env pair_pat in + let* fresh_hd = fresh_var in + let* fresh_tl = fresh_var in + let* sub_cons = Substitution.unify ty_pair (TyTuple [ fresh_hd; fresh_tl ]) in + let* sub_total = Substitution.compose sub_cons sub_pair in + return + ( sub_total + , Substitution.apply sub_total (TyList fresh_hd) + , TypeEnv.apply sub_total env' ) + | "::", _ -> fail (RHS "Constructor (::) expects a pair pattern") + | _ -> fail (RHS ("Unknown constructor: " ^ name))) +;; + +let infer_binop_type = function + | Equal | NotEqual | GreaterThan | GreatestEqual | LowerThan | LowestEqual -> + fresh_var >>| fun fresh_ty -> fresh_ty, fresh_ty, TyPrim "bool" + | Plus | Minus | Multiply | Division -> return (TyPrim "int", TyPrim "int", TyPrim "int") + | And | Or -> return (TyPrim "bool", TyPrim "bool", TyPrim "bool") +;; + +let rec infer_expr env = function + | ExpConst const -> return (Substitution.empty, infer_const const) + | ExpIdent var -> + (match TypeEnv.find env var with + | Some scheme -> + let* ty = instantiate scheme in + return (Substitution.empty, ty) + | None -> fail (NoVariable var)) + | ExpUnarOper (operation, expr) -> + let* subst, ty = infer_expr env expr in + let* operation_type = + match operation with + | Negative -> return (TyArrow (TyPrim "int", TyPrim "int")) + | Not -> return (TyArrow (TyPrim "bool", TyPrim "bool")) + in + let* subst2 = + match operation_type with + | TyArrow (arg, _) -> Substitution.unify ty arg + | ty -> fail (UnexpectedFunction ty) + in + let* subst2 = Substitution.compose_all [ subst2; subst ] in + (match operation_type with + | TyArrow (_, x) -> return (subst2, Substitution.apply subst2 x) + | ty -> fail (UnexpectedFunction ty)) + | ExpBinOper (op, expr1, expr2) -> + let* subst1, ty = infer_expr env expr1 in + let* subst2, ty' = infer_expr (TypeEnv.apply subst1 env) expr2 in + let* ty1_op, ty2_op, ty_res = infer_binop_type op in + let* subst3 = Substitution.unify (Substitution.apply subst2 ty) ty1_op in + let* subst4 = Substitution.unify (Substitution.apply subst3 ty') ty2_op in + let* subst = Substitution.compose_all [ subst1; subst2; subst3; subst4 ] in + return (subst, Substitution.apply subst ty_res) + | ExpBranch (cond, then_expr, else_expr) -> + let* subst1, ty1 = infer_expr env cond in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env) then_expr in + let* ty3 = + match else_expr with + | Some el -> + let* _, ty3 = infer_expr (TypeEnv.apply subst2 env) el in + return ty3 + | None -> return (TyPrim "unit") + in + let* subst4 = Substitution.unify ty1 (TyPrim "bool") in + let* subst5 = Substitution.unify ty2 ty3 in + let* total_subst = + match else_expr with + | Some el -> + let* subst3, _ = infer_expr (TypeEnv.apply subst2 env) el in + Substitution.compose_all [ subst5; subst4; subst3; subst2; subst1 ] + | None -> Substitution.compose_all [ subst5; subst4; subst2; subst1 ] + in + return (total_subst, Substitution.apply total_subst ty2) + | ExpTuple (expr1, expr2, exprs) -> + let* subst1, ty1 = infer_expr env expr1 in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env) expr2 in + let infer_tuple_elements env es = + let rec aux env = function + | [] -> return ([], []) + | e :: es' -> + let* s, t = infer_expr env e in + let* s', ts = aux (TypeEnv.apply s env) es' in + return (s' @ [ s ], t :: ts) + in + aux env es + in + let* subst3, tys = infer_tuple_elements (TypeEnv.apply subst2 env) exprs in + let* subst = Substitution.compose_all (subst3 @ [ subst2; subst1 ]) in + return (subst, TyTuple (ty1 :: ty2 :: tys)) + | ExpList exprs -> + (match exprs with + | [] -> + let* fresh = fresh_var in + return (Substitution.empty, TyList fresh) + | _ :: _ -> + let infer_list_elements env es = + let rec aux env = function + | [] -> return ([], []) + | e :: es' -> + let* s, t = infer_expr env e in + let* s', ts = aux (TypeEnv.apply s env) es' in + return (s' @ [ s ], t :: ts) + in + aux env es + in + let* subst, tys = infer_list_elements env exprs in + let* total_subst = Substitution.compose_all subst in + (match tys with + | [] -> fail (SeveralBounds "inferred empty list type") + | ty :: _ -> return (total_subst, TyList ty))) + | ExpLet (NonRec, (PatVariable x, expr1), _, expr2) -> + let* () = enter_level in + let* subst1, ty1 = infer_expr env expr1 in + let* () = leave_level in + let env2 = TypeEnv.apply subst1 env in + let* ty_gen = generalize env2 ty1 in + let env3 = TypeEnv.extend env x ty_gen in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env3) expr2 in + let* total_subst = Substitution.compose subst1 subst2 in + return (total_subst, ty2) + | ExpLet (NonRec, (pattern, expr1), bindings, expr2) -> + let* () = enter_level in + let* subst1, ty1 = infer_expr env expr1 in + let* () = leave_level in + let* subst2, ty_pat, env1 = infer_pattern env pattern in + let* subst = Substitution.compose subst1 subst2 in + let* unified_subst = Substitution.unify (Substitution.apply subst ty_pat) ty1 in + let initial_env = TypeEnv.apply unified_subst env1 in + let* extended_env = + List.fold_left + ~f:(fun acc_env (pattern, expr) -> + let* acc_env = acc_env in + let* subst_bind, ty_bind = infer_expr acc_env expr in + let* subst_pattern, _, env_pattern = infer_pattern acc_env pattern in + let* combined_subst = Substitution.compose subst_bind subst_pattern in + let* final_subst = + Substitution.unify (Substitution.apply combined_subst ty_pat) ty_bind + in + let updated_env = + Map.fold + ~init:(TypeEnv.apply final_subst acc_env) + ~f:(fun ~key ~data acc_env -> TypeEnv.extend acc_env key data) + (TypeEnv.apply final_subst env_pattern) + in + return updated_env) + ~init:(return initial_env) + bindings + in + let* subst3, ty2 = infer_expr extended_env expr2 in + let* total_subst = Substitution.compose_all [ subst3; unified_subst; subst ] in + return (total_subst, ty2) + | ExpLet (Rec, (PatVariable x, expr1), [], expr2) -> + let* expr1 = + match expr1 with + | ExpLambda _ -> return expr1 + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* tv = fresh_var in + let env2 = TypeEnv.extend env x (Scheme.Scheme (VarSet.empty, tv)) in + let* () = enter_level in + let* subst1, ty1 = infer_expr env2 expr1 in + let* () = leave_level in + let* subst2 = Substitution.unify (Substitution.apply subst1 tv) ty1 in + let* subst_total = Substitution.compose subst1 subst2 in + let env3 = TypeEnv.apply subst_total env in + let env4 = TypeEnv.apply subst1 env3 in + let* ty_gen = generalize env4 (Substitution.apply subst_total tv) in + let* subst3, ty2 = infer_expr (TypeEnv.extend env4 x ty_gen) expr2 in + let* subst_total = Substitution.compose subst_total subst3 in + return (subst_total, ty2) + | ExpLet (Rec, value_binding, value_bindings, expr2) -> + let* env_ext, subst_acc = + List.fold_left + ~f:(fun acc_env (pat, expr) -> + let* expr = + match expr with + | ExpLambda _ -> return expr + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* pat = + match pat with + | PatVariable _ -> return pat + | _ -> + fail (LHS "Only variables are allowed on the left-hand side of let rec") + in + let* env_acc, _ = acc_env in + let* () = enter_level in + let* subst_expr, ty_expr = infer_expr env_acc expr in + let* () = leave_level in + let* subst_pattern, ty_pat, env_pat = infer_pattern env_acc pat in + let* subst = Substitution.compose subst_expr subst_pattern in + let* unified_subst = Substitution.unify ty_expr ty_pat in + let* combined_subst = Substitution.compose subst unified_subst in + let extended_env = TypeEnv.apply combined_subst env_pat in + return (extended_env, combined_subst)) + ~init:(return (env, Substitution.empty)) + (value_binding :: value_bindings) + in + let* subst2, ty2 = infer_expr env_ext expr2 in + let* total_subst = Substitution.compose subst_acc subst2 in + return (total_subst, ty2) + | ExpLambda (pat, pats, body) -> + let patterns = pat :: pats in + let* env, pat_types = + List.fold_left + patterns + ~init:(return (env, [])) + ~f:(fun acc pat -> + let* env, pat_types = acc in + let* _, typ, env = infer_pattern env pat in + return (env, typ :: pat_types)) + in + let* subst_body, ty_body = infer_expr env body in + let arrow_type = + List.fold_right + ~f:(fun pat_type acc -> TyArrow (Substitution.apply subst_body pat_type, acc)) + ~init:ty_body + (List.rev pat_types) + in + return (subst_body, arrow_type) + | ExpApply (func, arg) -> + let* subst1, ty_func = infer_expr env func in + let* subst2, ty_arg = infer_expr (TypeEnv.apply subst1 env) arg in + let* tv = fresh_var in + let* subst3 = + Substitution.unify (Substitution.apply subst2 ty_func) (TyArrow (ty_arg, tv)) + in + let* total_subst = Substitution.compose_all [ subst3; subst2; subst1 ] in + return (total_subst, Substitution.apply total_subst tv) + | ExpFunction ((pat, body), rest_cases) -> + (match rest_cases with + | [] -> + let patterns = [ pat ] in + let* env', pat_types = + List.fold_left + patterns + ~init:(return (env, [])) + ~f:(fun acc p -> + let* env_acc, pat_types = acc in + let* _, typ, env_new = infer_pattern env_acc p in + return (env_new, typ :: pat_types)) + in + let* subst_body, ty_body = infer_expr env' body in + let arrow_type = + List.fold_right + ~f:(fun pt acc -> TyArrow (Substitution.apply subst_body pt, acc)) + ~init:ty_body + (List.rev pat_types) + in + return (subst_body, arrow_type) + | _ -> fail (RHS "Multiple function cases not yet supported")) + | ExpOption opt_expr -> + (match opt_expr with + | Some expr -> + let* subst, ty = infer_expr env expr in + return (subst, TyOption ty) + | None -> + let* tv = fresh_var in + return (Substitution.empty, TyOption tv)) + | ExpTypeAnnotation (expr, t) -> + let* subst1, ty1 = infer_expr env expr in + let* subst2 = Substitution.unify ty1 (Substitution.apply subst1 t) in + let* total_subst = Substitution.compose subst1 subst2 in + return (total_subst, Substitution.apply subst2 ty1) + | ExpMatch (scrut, (pat, expr), bind_list) -> + let* subst_scrut, ty_scrut = infer_expr env scrut in + let all_cases = (pat, expr) :: bind_list in + let* final_subst, ty_res = + List.fold_left + all_cases + ~init:(return (subst_scrut, None)) + ~f:(fun acc (pat', expr') -> + let* sub_acc, ty_res_opt = acc in + let env' = TypeEnv.apply sub_acc env in + let* sub_pat, ty_pat, env_pat = infer_pattern env' pat' in + let* sub_u = + Substitution.unify + (Substitution.apply sub_pat (Substitution.apply sub_acc ty_scrut)) + ty_pat + in + let* sub_comp = Substitution.compose sub_u sub_pat in + let* sub_expr, ty_branch = infer_expr (TypeEnv.apply sub_comp env_pat) expr' in + let* sub_total = Substitution.compose_all [ sub_expr; sub_comp; sub_acc ] in + let ty_branch' = Substitution.apply sub_total ty_branch in + match ty_res_opt with + | None -> return (sub_total, Some ty_branch') + | Some ty_prev -> + let* sub_merge = Substitution.unify ty_prev ty_branch' in + let* sub_final = Substitution.compose sub_total sub_merge in + return (sub_final, Some (Substitution.apply sub_merge ty_prev))) + in + (match ty_res with + | Some t -> return (final_subst, t) + | None -> fail (RHS "Empty match")) + | ExpConstruct (name, opt_expr) -> + (match name, opt_expr with + | "()", None -> return (Substitution.empty, TyPrim "unit") + | "None", None -> + let* tv = fresh_var in + return (Substitution.empty, TyOption tv) + | "Some", Some e -> + let* subst, ty = infer_expr env e in + return (subst, TyOption ty) + | "[]", None -> + let* tv = fresh_var in + return (Substitution.empty, TyList tv) + | "::", Some (ExpTuple (head_e, tail_e, [])) -> + let* subst_h, ty_h = infer_expr env head_e in + let* subst_t, _ty_t = infer_expr (TypeEnv.apply subst_h env) tail_e in + let ty_h = Substitution.apply subst_t ty_h in + let* subst_total = Substitution.compose_all [ subst_t; subst_h ] in + return (subst_total, Substitution.apply subst_total (TyList ty_h)) + | "::", _ -> fail (RHS "Constructor (::) expects a pair argument") + | _ -> fail (RHS ("Unknown constructor: " ^ name))) +;; + +let infer_structure_item env = function + | SEval expr -> + let* subst, _ = infer_expr env expr in + let updated_env = TypeEnv.apply subst env in + return (subst, updated_env) + | SValue (Rec, (PatVariable x, expr), []) -> + let* expr = + match expr with + | ExpLambda _ -> return expr + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* tv = fresh_var in + let env = TypeEnv.extend env x (Scheme.Scheme (VarSet.empty, tv)) in + let* () = enter_level in + let* subst, ty = infer_expr env expr in + let* () = leave_level in + let* subst2 = Substitution.unify (Substitution.apply subst tv) ty in + let* composed_subst = Substitution.compose subst subst2 in + let env2 = TypeEnv.apply composed_subst env in + let* generalized_ty = generalize env2 (Substitution.apply composed_subst ty) in + let env = TypeEnv.extend env2 x generalized_ty in + return (composed_subst, env) + | SValue (Rec, value_binding, value_bindings) -> + let all_bindings = value_binding :: value_bindings in + let* env_with_placeholders = + List.fold_left + ~f:(fun acc_env (pat, _) -> + let* pat = + match pat with + | PatVariable _ -> return pat + | _ -> + fail (LHS "Only variables are allowed on the left-hand side of let rec") + in + let* env_acc = acc_env in + let* subst_pat, _, env_pat = infer_pattern env_acc pat in + let extended_env = TypeEnv.apply subst_pat env_pat in + return extended_env) + ~init:(return env) + all_bindings + in + let* env_ext, subst_acc = + List.fold_left + ~f:(fun acc_env (ty_pattern, expr) -> + let* expr = + match expr with + | ExpLambda _ -> return expr + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* env_acc, _ = acc_env in + let* subst_expr, ty_expr = infer_expr env_acc expr in + let* subst_pat, ty_pat, env_pat = infer_pattern env_acc ty_pattern in + let* subst = Substitution.compose subst_expr subst_pat in + let* unified_subst = Substitution.unify ty_expr ty_pat in + let* combined_subst = Substitution.compose subst unified_subst in + let extended_env = TypeEnv.apply combined_subst env_pat in + return (extended_env, combined_subst)) + ~init:(return (env_with_placeholders, Substitution.empty)) + all_bindings + in + return (subst_acc, env_ext) + | SValue (NonRec, (PatVariable x, expr), _) -> + let* () = enter_level in + let* subst, ty = infer_expr env expr in + let* () = leave_level in + let env2 = TypeEnv.apply subst env in + let* generalized_ty = generalize env2 ty in + let env = TypeEnv.extend (TypeEnv.apply subst env) x generalized_ty in + return (subst, env) + | SValue (NonRec, (pattern, expr), _) -> + let* subst_expr, ty = infer_expr env expr in + let* subst_pat, ty_pat, env_pat = infer_pattern env pattern in + let* combined_subst = Substitution.compose subst_expr subst_pat in + let* unified_subst = + Substitution.unify (Substitution.apply combined_subst ty_pat) ty + in + let updated_env = TypeEnv.apply unified_subst env_pat in + let* final_subst = Substitution.compose unified_subst combined_subst in + return (final_subst, updated_env) +;; + +let infer_structure env structure = + let rec process_structure env subst = function + | [] -> return (subst, env) + | item :: rest -> + let* subst1, env1 = infer_structure_item env item in + let* composed_subst = Substitution.compose subst subst1 in + process_structure env1 composed_subst rest + in + process_structure env Substitution.empty structure +;; + +let infer_simple_expression expr = + Result.map ~f:snd (run (infer_expr TypeEnv.initial_env expr)) +;; + +let run_infer str = Result.map ~f:snd (run (infer_structure TypeEnv.initial_env str)) diff --git a/EML/lib/middleend/inferencer.mli b/EML/lib/middleend/inferencer.mli new file mode 100644 index 00000000..f501e0bf --- /dev/null +++ b/EML/lib/middleend/inferencer.mli @@ -0,0 +1,64 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +type error = + | OccursCheck of string * ty + | NoVariable of string + | UnificationFailed of ty * ty + | SeveralBounds of string + | LHS of string + | RHS of string + | UnexpectedFunction of ty + +val pp_error : Format.formatter -> error -> unit + +module ResultMonad : sig + type 'a t + + val return : 'a -> 'a t + val fail : error -> 'a t + + include Base.Monad.Infix with type 'a t := 'a t + + module Syntax : sig + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + end + + val fresh : int t + val current_level : int t + val enter_level : unit t + val leave_level : unit t + val set_var_level : string -> int -> unit t + val get_var_level : string -> int option t + val run : 'a t -> ('a, error) Result.t +end + +module Substitution : sig + type t + + val empty : t +end + +module VarSet : Stdlib.Set.S with type elt = string + +module Scheme : sig + type t = Scheme of VarSet.t * ty +end + +module TypeEnv : sig + type t = (ident, Scheme.t, Base.String.comparator_witness) Base.Map.t + + val extend : t -> ident -> Scheme.t -> t + val free_vars : t -> VarSet.t + val apply : Substitution.t -> t -> t + val find : t -> ident -> Scheme.t option + val initial_env : t + val env_with_gc : t +end + +val infer_structure : TypeEnv.t -> program -> (Substitution.t * TypeEnv.t) ResultMonad.t +val infer_simple_expression : expr -> (ty, error) Result.t +val run_infer : program -> (TypeEnv.t, error) Result.t diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml new file mode 100644 index 00000000..d1b14b6e --- /dev/null +++ b/EML/lib/middleend/ll.ml @@ -0,0 +1,413 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +module StringSet = Set.Make (String) +module Map = Map.Make (String) + +module type NAMING = sig + type t + + val fresh : t -> string * t + val init : t +end + +module Default_naming : NAMING = struct + type t = int + + let init = 0 + + let fresh n = + let s = "lifted_" ^ Int.to_string n in + s, n + 1 + ;; +end + +type lift_result = + { structures : structure list + ; expr : expr + } + +let names_in_pattern p = + let rec collect = function + | PatAny -> [] + | PatVariable s -> [ s ] + | PatConst _ -> [] + | PatConstruct (_, None) -> [] + | PatConstruct (_, Some q) -> collect q + | PatType (q, _) -> collect q + | PatTuple (p1, p2, rest) -> List.concat_map collect (p1 :: p2 :: rest) + | PatUnit -> [] + | PatList ps -> List.concat_map collect ps + | PatOption p_opt -> + (match p_opt with + | None -> [] + | Some x -> collect x) + in + collect p +;; + +let rename_pattern env p = + let rec subst = function + | PatVariable s -> + let s' = + try Map.find s env with + | Not_found -> s + in + PatVariable s' + | PatConstruct (id, p_opt) -> PatConstruct (id, Option.map subst p_opt) + | PatType (p, t) -> PatType (subst p, t) + | PatList ps -> PatList (List.map subst ps) + | PatOption p_opt -> PatOption (Option.map subst p_opt) + | other -> other + in + subst p +;; + +let unique_names_in_bind_group binds = + let add_if_new (rev_list, set) id = + if StringSet.mem id set then rev_list, set else id :: rev_list, StringSet.add id set + in + let rev_list, _seen = + List.fold_left + (fun (rev_list, set) (p, _) -> + List.fold_left add_if_new (rev_list, set) (names_in_pattern p)) + ([], StringSet.empty) + binds + in + List.rev rev_list +;; + +type error = + | RecLetEmptyBinding + | SValueEmptyBinding + +let pp_error ppf = function + | RecLetEmptyBinding -> + Format.fprintf ppf "lambda_lifting: Rec let must have at least one binding" + | SValueEmptyBinding -> + Format.fprintf ppf "lambda_lifting: SValue must have at least one binding" +;; + +module Make (N : NAMING) = struct + type 'a t = N.t -> ('a * N.t, error) Result.t + + let return (x : 'a) : 'a t = fun st -> Ok (x, st) + let fail (e : error) : _ t = fun _ -> Error e + + let bind (m : 'a t) (f : 'a -> 'b t) : 'b t = + fun st -> + match m st with + | Ok (x, st') -> f x st' + | Error e -> Error e + ;; + + let ( let* ) = bind + + let take_names k : string list t = + fun st -> + let rec loop acc st' i = + if i <= 0 + then Ok (List.rev acc, st') + else ( + let name, st'' = N.fresh st' in + loop (name :: acc) st'' (i - 1)) + in + loop [] st k + ;; + + let map2 (m1 : 'a t) (m2 : 'b t) (f : 'a -> 'b -> 'c) : 'c t = + fun st -> + match m1 st with + | Error e -> Error e + | Ok (x1, st1) -> + (match m2 st1 with + | Error e -> Error e + | Ok (x2, st2) -> Ok (f x1 x2, st2)) + ;; + + let pair (m1 : lift_result t) (m2 : lift_result t) (f : expr -> expr -> expr) + : lift_result t + = + map2 m1 m2 (fun r1 r2 -> + { structures = r1.structures @ r2.structures; expr = f r1.expr r2.expr }) + ;; + + let triple + (m1 : lift_result t) + (m2 : lift_result t) + (m3 : lift_result t) + (f : expr -> expr -> expr -> expr) + : lift_result t + = + map2 + m1 + (map2 m2 m3 (fun r2 r3 -> r2, r3)) + (fun r1 (r2, r3) -> + { structures = r1.structures @ r2.structures @ r3.structures + ; expr = f r1.expr r2.expr r3.expr + }) + ;; + + let list (exprs : expr list) (m : expr -> lift_result t) + : (structure list * expr list) t + = + fun st -> + let rec loop rev_structs rev_exprs st' = function + | [] -> Ok ((List.concat (List.rev rev_structs), List.rev rev_exprs), st') + | e :: rest -> + (match m e st' with + | Error e_err -> Error e_err + | Ok (r, st'') -> + loop (r.structures :: rev_structs) (r.expr :: rev_exprs) st'' rest) + in + loop [] [] st exprs + ;; + + type context = + { renames : string Map.t + ; at_toplevel : bool + } + + let initial_renames = Map.empty + + let without_bindings renames names = + List.fold_left (fun m k -> Map.remove k m) renames names + ;; + + let inner (ctx : context) = { ctx with at_toplevel = false } + + let fold_binds (ctx : context) binds (f : context -> pattern -> expr -> lift_result t) + : (structure list * (pattern * expr) list) t + = + List.fold_left + (fun acc (p, e) -> + let* rev_structures, rev_binds = acc in + let* res = f ctx p e in + return (res.structures :: rev_structures, (p, res.expr) :: rev_binds)) + (return ([], [])) + binds + |> fun m -> + let* rev_structures, rev_binds = m in + return (List.concat (List.rev rev_structures), List.rev rev_binds) + ;; + + let rec lift_expr (ctx : context) (e : expr) : lift_result t = + match e with + | ExpIdent name -> + let name' = + try Map.find name ctx.renames with + | Not_found -> name + in + return { structures = []; expr = ExpIdent name' } + | (ExpConst _ | ExpConstruct (_, None)) as e -> return { structures = []; expr = e } + | ExpLet (NonRec, (pat, exp), more, body) -> + let* res_rhs = lift_expr (inner ctx) exp in + let* extra_structures, rest_binds = lift_binds (inner ctx) more in + let all_defs = + names_in_pattern pat @ List.concat_map (fun (p, _) -> names_in_pattern p) more + in + let body_ctx = + { (inner ctx) with renames = without_bindings ctx.renames all_defs } + in + let* res_body = lift_expr body_ctx body in + return + { structures = res_rhs.structures @ extra_structures @ res_body.structures + ; expr = ExpLet (NonRec, (pat, res_rhs.expr), rest_binds, res_body.expr) + } + | ExpLet (Rec, (pat, exp), more, body) -> + let rec_binds = (pat, exp) :: more in + let unique_ids = unique_names_in_bind_group rec_binds in + let* names = take_names (List.length unique_ids) in + let rec_ctx = + { (inner ctx) with + renames = + List.fold_left + (fun env (id, name) -> Map.add id name env) + ctx.renames + (List.combine unique_ids names) + } + in + let* inner_structures, lifted_binds = + List.fold_left + (fun acc (p, e) -> + let* structures_acc, binds_acc = acc in + let* res = lift_expr rec_ctx e in + return + ( structures_acc @ res.structures + , binds_acc @ [ rename_pattern rec_ctx.renames p, res.expr ] )) + (return ([], [])) + rec_binds + in + let* res_body = lift_expr rec_ctx body in + let* first_bind, rest_binds = + match lifted_binds with + | first :: rest -> return (first, rest) + | [] -> fail RecLetEmptyBinding + in + return + { res_body with + structures = + inner_structures + @ [ SValue (Rec, first_bind, rest_binds) ] + @ res_body.structures + } + | ExpLambda (pat, pats, body) when ctx.at_toplevel -> + let* res = lift_expr (inner ctx) body in + return { res with expr = ExpLambda (pat, pats, res.expr) } + | ExpLambda (pat, pats, body) -> + let* names = take_names 1 in + let name = List.hd names in + let args = pat :: pats in + let bound = List.concat_map names_in_pattern args in + let* res = + lift_expr { (inner ctx) with renames = without_bindings ctx.renames bound } body + in + let value_def = + SValue (NonRec, (PatVariable name, ExpLambda (pat, pats, res.expr)), []) + in + return { structures = res.structures @ [ value_def ]; expr = ExpIdent name } + | ExpApply (e1, e2) -> + pair + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (fun e1' e2' -> ExpApply (e1', e2')) + | ExpFunction ((pat, exp), cases) when ctx.at_toplevel -> + let ctx_rhs = + { (inner ctx) with renames = without_bindings ctx.renames (names_in_pattern pat) } + in + let* res_rhs = lift_expr ctx_rhs exp in + let* case_structures, lifted_cases = + lift_binds_with_pattern_scope (inner ctx) cases + in + return + { structures = res_rhs.structures @ case_structures + ; expr = ExpFunction ((pat, res_rhs.expr), lifted_cases) + } + | ExpFunction ((pat1, exp1), cases) -> + let* names = take_names 1 in + let name = List.hd names in + let ctx_body = + { ctx with renames = without_bindings ctx.renames (names_in_pattern pat1) } + in + let* res_body = lift_expr ctx_body exp1 in + let* case_structures, lifted_cases = lift_binds_with_pattern_scope ctx cases in + let value_def = + SValue + ( NonRec + , (PatVariable name, ExpFunction ((pat1, res_body.expr), lifted_cases)) + , [] ) + in + return + { structures = res_body.structures @ case_structures @ [ value_def ] + ; expr = ExpIdent name + } + | ExpMatch (e, (pat, branch), cases) -> + let* res_scrut = lift_expr (inner ctx) e in + let ctx_branch = + { (inner ctx) with renames = without_bindings ctx.renames (names_in_pattern pat) } + in + let* res_branch = lift_expr ctx_branch branch in + let* case_structures, lifted_cases = lift_binds_with_pattern_scope ctx cases in + return + { structures = res_scrut.structures @ res_branch.structures @ case_structures + ; expr = ExpMatch (res_scrut.expr, (pat, res_branch.expr), lifted_cases) + } + | ExpBranch (e1, e2, e3_opt) -> + (match e3_opt with + | None -> + pair + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (fun e1' e2' -> ExpBranch (e1', e2', None)) + | Some e3 -> + triple + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (lift_expr (inner ctx) e3) + (fun e1' e2' e3' -> ExpBranch (e1', e2', Some e3'))) + | ExpConstruct (id, Some e) -> + let* res = lift_expr (inner ctx) e in + return { res with expr = ExpConstruct (id, Some res.expr) } + | ExpTypeAnnotation (e, typ) -> + let* res = lift_expr (inner ctx) e in + return { res with expr = ExpTypeAnnotation (res.expr, typ) } + | ExpBinOper (op, e1, e2) -> + pair + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (fun e1' e2' -> ExpBinOper (op, e1', e2')) + | ExpUnarOper (op, e) -> + let* res = lift_expr (inner ctx) e in + return { res with expr = ExpUnarOper (op, res.expr) } + | ExpTuple (e1, e2, rest) -> + let* first = lift_expr (inner ctx) e1 in + let* second = lift_expr (inner ctx) e2 in + let* rest_structures, rest_exprs = list rest (lift_expr (inner ctx)) in + return + { structures = first.structures @ second.structures @ rest_structures + ; expr = ExpTuple (first.expr, second.expr, rest_exprs) + } + | ExpList es -> + let* elem_structures, lifted_elems = list es (lift_expr (inner ctx)) in + return { structures = elem_structures; expr = ExpList lifted_elems } + | ExpOption None -> return { structures = []; expr = ExpOption None } + | ExpOption (Some e) -> + let* res = lift_expr (inner ctx) e in + return { res with expr = ExpOption (Some res.expr) } + + and lift_binds (ctx : context) binds : (structure list * (pattern * expr) list) t = + fold_binds ctx binds (fun ctx _ e -> lift_expr ctx e) + + and lift_binds_with_pattern_scope (ctx : context) binds + : (structure list * (pattern * expr) list) t + = + fold_binds ctx binds (fun ctx p e -> + let ctx_binding = + { ctx with renames = without_bindings ctx.renames (names_in_pattern p) } + in + lift_expr ctx_binding e) + ;; + + let lift_structure : structure -> structure list t = function + | SEval e -> + let toplevel = { renames = initial_renames; at_toplevel = true } in + let* res = lift_expr toplevel e in + return (res.structures @ [ SEval res.expr ]) + | SValue (is_rec, bind, more) -> + let toplevel = { renames = initial_renames; at_toplevel = true } in + let* inner_structures, lifted_binds = lift_binds toplevel (bind :: more) in + (match lifted_binds with + | first :: rest -> return (inner_structures @ [ SValue (is_rec, first, rest) ]) + | [] -> fail SValueEmptyBinding) + ;; + + let run_program (program : program) (naming_init : N.t) + : (structure list * N.t, error) Result.t + = + let m = + List.fold_left + (fun acc item -> + let* rev_structure_lists = acc in + let* struct_structures = lift_structure item in + return (struct_structures :: rev_structure_lists)) + (return []) + program + in + match m naming_init with + | Ok (rev_structure_lists, st_final) -> + Ok (List.concat (List.rev rev_structure_lists), st_final) + | Error e -> Error e + ;; +end + +module Transform = Make (Default_naming) + +let lambda_lifting_result (program : Frontend.Ast.program) + : (structure list, error) Result.t + = + match Transform.run_program program Default_naming.init with + | Ok (lst, _) -> Ok lst + | Error e -> Error e +;; diff --git a/EML/lib/middleend/ll.mli b/EML/lib/middleend/ll.mli new file mode 100644 index 00000000..ba81839f --- /dev/null +++ b/EML/lib/middleend/ll.mli @@ -0,0 +1,13 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type error = + | RecLetEmptyBinding + | SValueEmptyBinding + +val pp_error : Format.formatter -> error -> unit + +val lambda_lifting_result + : Frontend.Ast.program + -> (Frontend.Ast.structure list, error) Result.t diff --git a/EML/lib/middleend/runner.ml b/EML/lib/middleend/runner.ml new file mode 100644 index 00000000..a093518e --- /dev/null +++ b/EML/lib/middleend/runner.ml @@ -0,0 +1,46 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Frontend.Ast +open Inferencer +open Cc +open Ll +open Anf + +type error = + | Infer of Inferencer.error + | Closure of Cc.error + | Lifting of Ll.error + | Anf of string + +let pp_error ppf = function + | Infer e -> fprintf ppf "inference: %a" Inferencer.pp_error e + | Closure e -> fprintf ppf "closure conversion: %a" Cc.pp_error e + | Lifting e -> fprintf ppf "lambda lifting: %a" Ll.pp_error e + | Anf s -> fprintf ppf "ANF: %s" s +;; + +let run (program : program) (env : Inferencer.TypeEnv.t) + : (anf_program * Inferencer.TypeEnv.t, error) Result.t + = + let ( >>= ) = Result.bind in + let env' = + match Inferencer.ResultMonad.run (infer_structure env program) with + | Error (Inferencer.OccursCheck _) -> Ok env + | Error e -> Error (Infer e) + | Ok (_subst, env'') -> Ok env'' + in + env' + >>= fun env'' -> + closure_conversion_result program + |> Result.map_error (fun e -> Closure e) + >>= fun cc_ast -> + lambda_lifting_result cc_ast + |> Result.map_error (fun e -> Lifting e) + >>= fun ll_ast -> + anf_program ll_ast + |> Result.map_error (fun e -> Anf e) + >>= fun anf_ast -> Ok (anf_ast, env'') +;; diff --git a/EML/lib/middleend/runner.mli b/EML/lib/middleend/runner.mli new file mode 100644 index 00000000..7dd28ccb --- /dev/null +++ b/EML/lib/middleend/runner.mli @@ -0,0 +1,16 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +open Inferencer +open Anf + +type error = + | Infer of Inferencer.error + | Closure of Cc.error + | Lifting of Ll.error + | Anf of string + +val pp_error : Format.formatter -> error -> unit +val run : program -> TypeEnv.t -> (anf_program * TypeEnv.t, error) Result.t diff --git a/EML/lib/runtime/dune b/EML/lib/runtime/dune new file mode 100644 index 00000000..75ae5317 --- /dev/null +++ b/EML/lib/runtime/dune @@ -0,0 +1,26 @@ +(rule + (targets rv64_runtime.a) + (deps runtime.c) + (action + (progn + (run + riscv64-linux-gnu-gcc + -march=rv64gc + -mabi=lp64d + -O2 + -DEML_RISCV + -DENABLE_GC + -c + %{dep:runtime.c} + -o + rv64_runtime.o) + (run riscv64-linux-gnu-ar rcs %{targets} rv64_runtime.o)))) + +;; Optional: build LLVM runtime object for linking with .ll output. +;; Usage: dune build lib/runtime/llvm_runtime.o (with C compiler on host) + +(rule + (targets llvm_runtime.o) + (deps runtime.c) + (action + (run gcc -O2 -DEML_LLVM -DENABLE_GC -c %{dep:runtime.c} -o %{targets}))) diff --git a/EML/lib/runtime/llvm_call.S b/EML/lib/runtime/llvm_call.S new file mode 100644 index 00000000..7fa0984f --- /dev/null +++ b/EML/lib/runtime/llvm_call.S @@ -0,0 +1,65 @@ + .text + .globl llvm_call_indirect + .type llvm_call_indirect, @function +llvm_call_indirect: + pushq %rbp + movq %rsp, %rbp + movq %rdi, %rax + movq %rsi, %r10 + movq %rdx, %r11 + + cmpq $6, %r11 + jbe .Lload_regs + + pushq %r12 + movq %rax, %r12 + pushq %r13 + pushq %r14 + + leaq -6(%r11), %rcx + shlq $3, %rcx + movq %rsp, %rax + subq %rcx, %rax + andq $15, %rax + addq %rax, %rcx + movq %rcx, %r13 + subq %rcx, %rsp + + movq $6, %r8 +.Lstack_loop: + cmpq %r8, %r11 + jle .Lrestore_fn + movq (%r10,%r8,8), %rax + movq %r8, %rcx + subq $6, %rcx + movq %rax, 0(%rsp,%rcx,8) + incq %r8 + jmp .Lstack_loop + +.Lrestore_fn: + movq %r12, %rax + +.Lload_regs: + movq (%r10), %rdi + movq 8(%r10), %rsi + movq 16(%r10), %rdx + movq 24(%r10), %rcx + movq 32(%r10), %r8 + movq 40(%r10), %r9 + + movq %r11, %r14 + call *%rax + movq %r14, %r11 + + cmpq $6, %r11 + jbe .Lepilogue + addq %r13, %rsp + popq %r14 + popq %r13 + popq %r12 + +.Lepilogue: + leave + ret + .size llvm_call_indirect, .-llvm_call_indirect + .section .note.GNU-stack,"",%progbits diff --git a/EML/lib/runtime/primitives.ml b/EML/lib/runtime/primitives.ml new file mode 100644 index 00000000..3e9b959d --- /dev/null +++ b/EML/lib/runtime/primitives.ml @@ -0,0 +1,45 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type llvm_arg = + | Ptr + | Int + | I32 + +type llvm_ret = + | RPtr + | RInt + | RVoid + +type runtime_func_sig = + { name : string + ; ret : llvm_ret + ; args : llvm_arg list + } + +let predefined_runtime_funcs : runtime_func_sig list = + [ { name = "eml_applyN"; ret = RPtr; args = [ Ptr; Int; Ptr ] } + ; { name = "create_tuple"; ret = RPtr; args = [ Int; Ptr ] } + ; { name = "alloc_closure"; ret = RPtr; args = [ Ptr; Int ] } + ; { name = "field"; ret = RPtr; args = [ Ptr; Int ] } + ; { name = "llvm_call_indirect"; ret = RPtr; args = [ Ptr; Ptr; Int ] } + ; { name = "print_int"; ret = RVoid; args = [ Int ] } + ; { name = "init_gc"; ret = RVoid; args = [] } + ; { name = "destroy_gc"; ret = RVoid; args = [] } + ; { name = "set_ptr_stack"; ret = RVoid; args = [ Ptr ] } + ; { name = "get_heap_start"; ret = RInt; args = [] } + ; { name = "get_heap_final"; ret = RInt; args = [] } + ; { name = "collect"; ret = RPtr; args = [] } + ; { name = "print_gc_status"; ret = RPtr; args = [] } + ; { name = "llvm.frameaddress.p0"; ret = RPtr; args = [ I32 ] } + ] +;; + +let runtime_primitive_arities : (string * int) list = + List.map (fun { name; args; _ } -> name, List.length args) predefined_runtime_funcs +;; + +let is_reserved (name : string) : bool = + List.exists (fun { name = n; _ } -> String.equal n name) predefined_runtime_funcs +;; diff --git a/EML/lib/runtime/primitives.mli b/EML/lib/runtime/primitives.mli new file mode 100644 index 00000000..a5c05e59 --- /dev/null +++ b/EML/lib/runtime/primitives.mli @@ -0,0 +1,23 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type llvm_arg = + | Ptr + | Int + | I32 + +type llvm_ret = + | RPtr + | RInt + | RVoid + +type runtime_func_sig = + { name : string + ; ret : llvm_ret + ; args : llvm_arg list + } + +val predefined_runtime_funcs : runtime_func_sig list +val runtime_primitive_arities : (string * int) list +val is_reserved : string -> bool diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/runtime.c new file mode 100644 index 00000000..867d41ec --- /dev/null +++ b/EML/lib/runtime/runtime.c @@ -0,0 +1,413 @@ +#include +#include +#include +#include +#include +#include +#include + +typedef void *eml_value; + +static int64_t tag_int_val(int64_t n) { return (n << 1) | 1; } + +#define TO_ML_INTEGER(tagged_n) ((int64_t)(tagged_n) >> 1) + +void print_int(int64_t tagged_n) { + printf("%ld\n", (long)TO_ML_INTEGER(tagged_n)); +} + +#define TAG_TUPLE 0 +#define TAG_CLOSURE 1 +#define TAG_LAST 2 +#define HEADER_WORDS 1 + +#define SIZE_HEAP_DEFAULT 800 +#define MAX_STACK_SCAN_SLOTS (128 * 1024) + +#define IS_INT(v) ((v) & 0x1) +#define IS_PTR(v) ((v) != 0 && !IS_INT(v)) + +typedef struct { + uint8_t tag; + uint8_t _pad1; + uint16_t size; + uint32_t _pad2; +} box_header_t; + +static inline box_header_t *get_header(uint64_t *payload) { + return (box_header_t *)((uint64_t *)payload - 1); +} +static inline uint64_t *get_payload(box_header_t *hdr) { + return (uint64_t *)(hdr + 1); +} + +static const size_t TAG_SCAN_START[] = { + [TAG_TUPLE] = 1, + [TAG_CLOSURE] = 3, +}; + +typedef struct { + uint64_t *start[2]; + uint64_t *end[2]; + uint64_t *alloc_ptr; + int current_bank; + uint64_t allocations; + uint64_t collections; + uint64_t words_allocated_total; +} gc_state; + +static gc_state GC; +static uint64_t *PTR_STACK = NULL; +static uint64_t *STACK_SCAN_LOW = NULL; +static uint64_t *STACK_SCAN_HIGH = NULL; +static bool gc_enabled = false; +static size_t size_heap = SIZE_HEAP_DEFAULT; + +static inline int get_current_bank_idx(void) { return GC.current_bank; } +static inline int get_another_bank_idx(void) { return GC.current_bank ^ 1; } +static inline bool in_bank(uint64_t *ptr, int bank_idx) { + return (GC.start[bank_idx] <= ptr) && (ptr < GC.end[bank_idx]); +} + +#if defined(ENABLE_GC) + +static void mark_and_copy(uint64_t *stack_slot); + +static uint64_t *forward_or_copy(uint64_t *old_payload, bool *did_copy) { + int from_bank = get_another_bank_idx(); + if (old_payload <= (uint64_t *)GC.start[from_bank] + HEADER_WORDS - 1) { + *did_copy = false; + return old_payload; + } + box_header_t *old_header = get_header(old_payload); + if (old_header->tag >= TAG_LAST || old_header->size == 0 || + old_header->size > size_heap) { + *did_copy = false; + return old_payload; + } + uint64_t possible_forward_ptr = *((uint64_t *)old_payload - 1); + if (in_bank((uint64_t *)possible_forward_ptr, get_current_bank_idx())) { + *did_copy = false; + return (uint64_t *)possible_forward_ptr; + } + *did_copy = true; + uint16_t payload_words = old_header->size; + uint8_t object_tag = old_header->tag; + if (GC.alloc_ptr + payload_words + HEADER_WORDS > GC.end[GC.current_bank]) { + *did_copy = false; + return old_payload; + } + box_header_t *new_header = (box_header_t *)GC.alloc_ptr; + new_header->tag = object_tag; + new_header->size = payload_words; + uint64_t *new_payload = get_payload(new_header); + memcpy(new_payload, old_payload, payload_words * sizeof(uint64_t)); + GC.alloc_ptr += payload_words + HEADER_WORDS; + GC.words_allocated_total += payload_words + HEADER_WORDS; + *((uint64_t *)old_payload - 1) = (uint64_t)new_payload; + return new_payload; +} + +static void scan_object(uint64_t *obj) { + box_header_t *header = get_header(obj); + size_t start = (header->tag < TAG_LAST) ? TAG_SCAN_START[header->tag] : 0; + for (size_t i = start; i < header->size; i++) + mark_and_copy(obj + i); +} + +static void mark_and_copy(uint64_t *stack_slot) { + uint64_t raw_value = *stack_slot; + if (!IS_PTR(raw_value)) return; + uint64_t *old_object_payload = (uint64_t *)raw_value; + int another_bank = get_another_bank_idx(); + if (!in_bank(old_object_payload, another_bank)) { + if (STACK_SCAN_LOW && STACK_SCAN_HIGH) { + uint64_t *low = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_LOW : STACK_SCAN_HIGH; + uint64_t *high = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_HIGH : STACK_SCAN_LOW; + if (old_object_payload >= low && old_object_payload <= high) + return; + } + return; + } + if (old_object_payload < (uint64_t *)GC.start[another_bank] + HEADER_WORDS) + return; + bool object_was_copied_now; + uint64_t *new_object_payload = + forward_or_copy(old_object_payload, &object_was_copied_now); + *stack_slot = (uint64_t)new_object_payload; + if (object_was_copied_now) scan_object(new_object_payload); +} + +static void allocate_banks(void) { + for (int i = 0; i < 2; i++) { + GC.start[i] = (uint64_t *)malloc(size_heap * sizeof(uint64_t)); + if (GC.start[i] == NULL) { + fprintf(stderr, "Failed to allocate GC bank\n"); + abort(); + } + GC.end[i] = GC.start[i] + size_heap; + } +} + +eml_value collect(void) { + uint64_t dummy; + uint64_t *current_stack_top = &dummy; + if (!PTR_STACK || current_stack_top > PTR_STACK) + return (eml_value)(uintptr_t)tag_int_val(0); + STACK_SCAN_LOW = current_stack_top; + STACK_SCAN_HIGH = PTR_STACK; + GC.current_bank ^= 1; + GC.alloc_ptr = GC.start[GC.current_bank]; + { + uint64_t *stack_slot = current_stack_top; + size_t n = 0; + for (; n < MAX_STACK_SCAN_SLOTS && stack_slot <= PTR_STACK; n++, stack_slot++) + mark_and_copy(stack_slot); + } + STACK_SCAN_LOW = NULL; + STACK_SCAN_HIGH = NULL; + GC.collections++; + return (eml_value)(uintptr_t)tag_int_val(0); +} + +uint64_t *gc_alloc(size_t words, uint64_t tag) { + size_t total_words = words + HEADER_WORDS; + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { + collect(); + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + } + box_header_t *header = (box_header_t *)GC.alloc_ptr; + header->tag = (uint8_t)tag; + header->size = (uint16_t)words; + uint64_t *obj = get_payload(header); + memset(obj, 0, words * sizeof(uint64_t)); + GC.alloc_ptr += total_words; + GC.allocations++; + GC.words_allocated_total += total_words; + return obj; +} + +void init_gc(void) { + if (gc_enabled) return; + gc_enabled = true; + size_heap = SIZE_HEAP_DEFAULT; + { + const char *heap_size_env = getenv("EML_HEAP_SIZE"); + if (heap_size_env) { + int heap_size_val = atoi(heap_size_env); + if (heap_size_val >= 400 && heap_size_val <= 1024 * 1024) + size_heap = (size_t)heap_size_val; + } + } + allocate_banks(); + GC.current_bank = 0; + GC.alloc_ptr = GC.start[0]; + GC.allocations = 0; + GC.collections = 0; + GC.words_allocated_total = 0; +} + +void destroy_gc(void) { + if (!gc_enabled) return; + for (int i = 0; i < 2; i++) { + free(GC.start[i]); + GC.start[i] = NULL; + GC.end[i] = NULL; + } + GC.alloc_ptr = NULL; + PTR_STACK = NULL; + gc_enabled = false; +} + +void set_ptr_stack(uint64_t *stack_bottom) { PTR_STACK = stack_bottom; } + +eml_value print_gc_status(void) { + int bank = GC.current_bank; + ptrdiff_t current_alloc = GC.alloc_ptr - GC.start[bank]; + ptrdiff_t free_space = GC.end[bank] - GC.alloc_ptr; + printf("=== GC Status ===\n"); + printf("Current allocated: %td\n", current_alloc); + printf("Total allocated: %" PRIu64 "\n", GC.words_allocated_total); + printf("Free space: %td\n", free_space); + printf("Heap size: %zu\n", size_heap); + printf("Current bank index: %d\n", bank); + printf("GC collections: %" PRIu64 "\n", GC.collections); + printf("GC allocations: %" PRIu64 "\n", GC.allocations); + printf("=================\n"); + fflush(stdout); + return (eml_value)(uintptr_t)tag_int_val(0); +} + +static void *eml_alloc(size_t bytes, uint64_t tag) { + if (gc_enabled) { + size_t words = (bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); + return gc_alloc(words, tag); + } + (void)tag; + return malloc(bytes); +} + +int64_t get_heap_start(void) { return tag_int_val(0); } +int64_t get_heap_final(void) { return tag_int_val((int64_t)size_heap); } + +#else /* !ENABLE_GC */ + +void init_gc(void) {} +void destroy_gc(void) {} +void set_ptr_stack(uint64_t *stack_bottom) { (void)stack_bottom; } + +eml_value collect(void) { + return (eml_value)(uintptr_t)tag_int_val(0); +} + +eml_value print_gc_status(void) { + (void)printf("GC disabled\n"); + return (eml_value)(uintptr_t)tag_int_val(0); +} + +static void *eml_alloc(size_t bytes, uint64_t tag) { + (void)tag; + return malloc(bytes); +} + +int64_t get_heap_start(void) { return tag_int_val(0); } +int64_t get_heap_final(void) { return tag_int_val(0); } + +#endif /* ENABLE_GC */ + +typedef struct { + void *code; + int64_t arity; + int64_t received; + void *args[]; +} closure; + +#if defined(EML_LLVM) + +extern void *llvm_call_indirect(void *fn, void **args, int64_t n); + +static void *call_closure_full(closure *c, void **args) { + return llvm_call_indirect(c->code, args, c->arity); +} + +#else /* EML_RISCV */ + +#define RISCV_REG_ARGS 8 + +static void *call_closure_full(closure *c, void **args) { + int64_t arity = c->arity; + int64_t args_in_stack = (arity > RISCV_REG_ARGS) ? (arity - RISCV_REG_ARGS) : 0; + size_t storage_for_stack_args = (size_t)args_in_stack * sizeof(void *); + void **stack_args = (args_in_stack > 0) ? args + RISCV_REG_ARGS : NULL; + void *result; + + asm volatile( + "mv t0, %[storage_for_stack_args]\n" + "sub sp, sp, t0\n" + "beqz %[args_in_stack], .Lend_stack_push\n" + "mv t1, sp\n" + "mv t2, %[stack_args]\n" + "mv t3, %[args_in_stack]\n" + "li t4, 0\n" + ".Lloop_stack_push:\n" + "beq t4, t3, .Lend_stack_push\n" + "slli t5, t4, 3\n" + "add t6, t2, t5\n" + "ld t0, 0(t6)\n" + "sd t0, 0(t1)\n" + "addi t1, t1, 8\n" + "addi t4, t4, 1\n" + "j .Lloop_stack_push\n" + ".Lend_stack_push:\n" + "mv a0, %[a0]\n" + "mv a1, %[a1]\n" + "mv a2, %[a2]\n" + "mv a3, %[a3]\n" + "mv a4, %[a4]\n" + "mv a5, %[a5]\n" + "mv a6, %[a6]\n" + "mv a7, %[a7]\n" + "mv t6, %[fn]\n" + "jalr ra, t6, 0\n" + "mv t0, %[storage_for_stack_args]\n" + "add sp, sp, t0\n" + "mv %[result], a0\n" + : [result] "=r"(result) + : [fn] "r"(c->code), + [a0] "r"(args[0]), [a1] "r"(args[1]), [a2] "r"(args[2]), [a3] "r"(args[3]), + [a4] "r"(args[4]), [a5] "r"(args[5]), [a6] "r"(args[6]), [a7] "r"(args[7]), + [stack_args] "r"(stack_args), [args_in_stack] "r"(args_in_stack), + [storage_for_stack_args] "r"(storage_for_stack_args) + : "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "memory"); + + return result; +} + +#endif /* EML_LLVM / EML_RISCV */ + +closure *alloc_closure(void *code, int64_t arity) { + size_t slots = (arity > 0) ? (size_t)arity : 1; + size_t sz = sizeof(closure) + slots * sizeof(void *); + closure *c = (closure *)eml_alloc(sz, TAG_CLOSURE); + c->code = code; + c->arity = arity; + c->received = 0; + memset(c->args, 0, slots * sizeof(void *)); + return c; +} + +static closure *copy_closure(const closure *src) { + size_t slots = (src->arity > 0) ? (size_t)src->arity : 1; + size_t sz = sizeof(closure) + slots * sizeof(void *); + closure *dst = (closure *)eml_alloc(sz, TAG_CLOSURE); + memcpy(dst, src, sz); + return dst; +} + +void *eml_applyN(closure *c, int64_t argc, void **argv) { + int64_t all = c->received + argc; + if (all == c->arity) { + void **all_args = (void **)eml_alloc((size_t)c->arity * sizeof(void *), TAG_CLOSURE); + for (int64_t i = 0; i < c->received; i++) all_args[i] = c->args[i]; + for (int64_t i = 0; i < argc; i++) all_args[c->received + i] = argv[i]; + void *result = call_closure_full(c, all_args); +#if !defined(ENABLE_GC) + free(all_args); +#endif + return result; + } + closure *partial = copy_closure(c); + for (int64_t i = 0; i < argc; i++) + partial->args[partial->received++] = argv[i]; + return partial; +} + +typedef struct { + int64_t arity; + void *args[]; +} tuple; + +tuple *create_tuple(int64_t argc, void **args) { + size_t words = 1 + (size_t)argc; + tuple *t = (tuple *)eml_alloc(words * sizeof(uint64_t), TAG_TUPLE); + t->arity = argc; + for (size_t i = 0; i < (size_t)argc; i++) t->args[i] = args[i]; + return t; +} + +void *field(tuple *t, long n) { return t->args[n >> 1]; } + +#if defined(EML_LLVM) && !defined(EML_LLVM_STANDALONE) +/* When linking with RTS that provides main (e.g. custom runner), call eml_main. */ +extern void eml_main(void); + +int main(void) { + eml_main(); + return 0; +} +#endif /* EML_LLVM && !EML_LLVM_STANDALONE */ + diff --git a/EML/lib/utils/monads.ml b/EML/lib/utils/monads.ml new file mode 100644 index 00000000..b9d534c8 --- /dev/null +++ b/EML/lib/utils/monads.ml @@ -0,0 +1,26 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base + +module ANFMonad = struct + type 'a t = int -> int * ('a, string) Result.t + + let return x = fun counter -> counter, Ok x + + let ( >>= ) m f = + fun counter -> + match m counter with + | counter', Ok a -> f a counter' + | counter', Error e -> counter', Error e + ;; + + let fresh : string t = fun counter -> counter + 1, Ok ("anf_t" ^ Int.to_string counter) + let run m = m 0 |> snd + let fail msg = fun counter -> counter, Error msg + + module Syntax = struct + let ( let* ) = ( >>= ) + end +end diff --git a/EML/lib/utils/monads.mli b/EML/lib/utils/monads.mli new file mode 100644 index 00000000..b385e75d --- /dev/null +++ b/EML/lib/utils/monads.mli @@ -0,0 +1,19 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base + +module ANFMonad : sig + type 'a t = int -> int * ('a, string) Result.t + + val return : 'a -> 'a t + val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t + val fresh : string t + val run : 'a t -> ('a, string) Result.t + val fail : string -> 'a t + + module Syntax : sig + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + end +end diff --git a/EML/lib/utils/pretty_printer.ml b/EML/lib/utils/pretty_printer.ml new file mode 100644 index 00000000..1c46b19a --- /dev/null +++ b/EML/lib/utils/pretty_printer.ml @@ -0,0 +1,170 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +let string_of_bin_op = function + | Plus -> "+" + | Minus -> "-" + | Multiply -> "*" + | Division -> "/" + | And -> "&&" + | Or -> "||" + | GreatestEqual -> ">=" + | LowestEqual -> "<=" + | GreaterThan -> ">" + | LowerThan -> "<" + | Equal -> "=" + | NotEqual -> "<>" +;; + +let string_of_unary_op = function + | Negative -> "-" + | Not -> "not" +;; + +let pp_bin_op ppf op = Format.fprintf ppf "%s" (string_of_bin_op op) +let pp_unary_op ppf op = Format.fprintf ppf "%s" (string_of_unary_op op) + +let pp_const ppf = function + | ConstInt i -> Format.fprintf ppf "%d" i + | ConstBool b -> Format.fprintf ppf "%b" b + | ConstString s -> Format.fprintf ppf "%S" s + | ConstChar c -> Format.fprintf ppf "'%c'" c +;; + +let rec pp_pattern ppf = function + | PatVariable v -> Format.fprintf ppf "%s" v + | PatConst c -> pp_const ppf c + | PatAny -> Format.fprintf ppf "_" + | PatUnit -> Format.fprintf ppf "()" + | PatType (p, t) -> Format.fprintf ppf "(%a : %a)" pp_pattern p pp_ty t + | PatTuple (p1, p2, rest) -> + Format.fprintf + ppf + "(%a)" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp_pattern) + (p1 :: p2 :: rest) + | PatList ps -> + Format.fprintf + ppf + "[%a]" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf "; ") pp_pattern) + ps + | PatOption None -> Format.fprintf ppf "None" + | PatOption (Some p) -> Format.fprintf ppf "Some (%a)" pp_pattern p + | PatConstruct ("[]", None) -> Format.fprintf ppf "[]" + | PatConstruct ("::", Some (PatTuple (h, t, []))) -> + Format.fprintf ppf "%a::%a" pp_pattern h pp_pattern t + | PatConstruct (id, None) -> Format.fprintf ppf "%s" id + | PatConstruct (id, Some p) -> Format.fprintf ppf "%s (%a)" id pp_pattern p + +and pp_expr ppf = function + | ExpIdent v -> Format.fprintf ppf "%s" v + | ExpConst c -> pp_const ppf c + | ExpBranch (c, t, None) -> Format.fprintf ppf "if %a then %a" pp_expr c pp_expr t + | ExpBranch (c, t, Some e) -> + Format.fprintf ppf "if %a then %a else %a" pp_expr c pp_expr t pp_expr e + | ExpBinOper (op, l, r) -> + Format.fprintf ppf "(%a %a %a)" pp_expr l pp_bin_op op pp_expr r + | ExpUnarOper (op, e) -> Format.fprintf ppf "(%a %a)" pp_unary_op op pp_expr e + | ExpTuple (e1, e2, rest) -> + Format.fprintf + ppf + "(%a)" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp_expr) + (e1 :: e2 :: rest) + | ExpList es -> + Format.fprintf + ppf + "[%a]" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf "; ") pp_expr) + es + | ExpLambda (p, ps, body) -> + Format.fprintf + ppf + "fun %a -> %a" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_pattern) + (p :: ps) + pp_expr + body + | ExpTypeAnnotation (e, t) -> Format.fprintf ppf "(%a : %a)" pp_expr e pp_ty t + | ExpLet (is_rec, bind, more, body) -> + let kw = + match is_rec with + | Rec -> "let rec" + | NonRec -> "let" + in + Format.fprintf ppf "%s %a in %a" kw pp_binds (bind, more) pp_expr body + | ExpApply _ as e -> + let f, args = flatten_apply e in + Format.fprintf + ppf + "%a %a" + pp_atomic_expr + f + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_atomic_expr) + args + | ExpOption None -> Format.fprintf ppf "None" + | ExpOption (Some e) -> Format.fprintf ppf "Some (%a)" pp_expr e + | ExpFunction (first, more) -> + Format.fprintf + ppf + "function %a" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_case) + (first :: more) + | ExpMatch (e, first, more) -> + Format.fprintf + ppf + "match %a with %a" + pp_expr + e + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_case) + (first :: more) + | ExpConstruct ("()", None) -> Format.fprintf ppf "()" + | ExpConstruct ("[]", None) -> Format.fprintf ppf "[]" + | ExpConstruct ("::", Some (ExpTuple (h, t, []))) -> + Format.fprintf ppf "%a::%a" pp_expr h pp_expr t + | ExpConstruct (id, None) -> Format.fprintf ppf "%s" id + | ExpConstruct (id, Some e) -> Format.fprintf ppf "%s (%a)" id pp_expr e + +and pp_atomic_expr ppf = function + | (ExpIdent _ | ExpConst _ | ExpOption None | ExpConstruct (_, None)) as e -> + pp_expr ppf e + | e -> Format.fprintf ppf "(%a)" pp_expr e + +and flatten_apply e = + let rec go f args = + match f with + | ExpApply (f', a) -> go f' (a :: args) + | _ -> f, args + in + go e [] + +and pp_case ppf (p, e) = Format.fprintf ppf "| %a -> %a" pp_pattern p pp_expr e +and pp_bind ppf (p, e) = Format.fprintf ppf "%a = %a" pp_pattern p pp_expr e + +and pp_binds ppf (first, more) = + Format.fprintf ppf "%a" pp_bind first; + List.iter (fun b -> Format.fprintf ppf "@ and %a" pp_bind b) more +;; + +let pp_structure_item ppf = function + | SEval e -> Format.fprintf ppf "%a;;" pp_expr e + | SValue (is_rec, bind, more) -> + let kw = + match is_rec with + | Rec -> "let rec" + | NonRec -> "let" + in + Format.fprintf ppf "%s %a;;" kw pp_binds (bind, more) +;; + +let pp_structure ppf (lst : structure list) = + Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "@\n") + pp_structure_item + ppf + lst +;; diff --git a/EML/lib/utils/pretty_printer.mli b/EML/lib/utils/pretty_printer.mli new file mode 100644 index 00000000..3eb6bfae --- /dev/null +++ b/EML/lib/utils/pretty_printer.mli @@ -0,0 +1,15 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +val string_of_bin_op : bin_oper -> string +val string_of_unary_op : unar_oper -> string +val pp_bin_op : Format.formatter -> bin_oper -> unit +val pp_unary_op : Format.formatter -> unar_oper -> unit +val pp_const : Format.formatter -> const -> unit +val pp_pattern : Format.formatter -> pattern -> unit +val pp_expr : Format.formatter -> expr -> unit +val pp_structure_item : Format.formatter -> structure -> unit +val pp_structure : Format.formatter -> structure list -> unit diff --git a/EML/out.ll b/EML/out.ll new file mode 100644 index 00000000..715a3d8d --- /dev/null +++ b/EML/out.ll @@ -0,0 +1,11 @@ +; ModuleID = 'main' +source_filename = "main" +target triple = "x86_64-pc-linux-gnu" + +declare void @print_int(i64) + +define i64 @main() { +entry: + call void @print_int(i64 70) + ret i64 0 +} diff --git a/EML/tests/Makefile b/EML/tests/Makefile new file mode 100644 index 00000000..43246f72 --- /dev/null +++ b/EML/tests/Makefile @@ -0,0 +1,61 @@ +SHELL := /bin/bash + +EML_ROOT ?= $(CURDIR)/.. +EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) +RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) + +ARGS := $(filter-out compile_riscv infer compile_llvm,$(MAKECMDGOALS)) +INPUT := $(firstword $(ARGS)) +EXTRA_GOALS := $(ARGS) +GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) +CFLAGS_GC := $(if $(filter 1 true yes on,$(GC)),-DENABLE_GC,) + +.PHONY: compile_riscv infer compile_llvm $(EXTRA_GOALS) + +compile_riscv: + @set -euo pipefail; \ + FILE="$(INPUT)"; \ + [[ -n "$$FILE" ]] || { echo "Usage: make compile_riscv [GC=1] " >&2; exit 1; }; \ + TMP_SRC="$$(mktemp -d)"; TMP_BIN="$$(mktemp -d)"; \ + trap 'rm -rf "$$TMP_SRC" "$$TMP_BIN"' EXIT; \ + if [[ "$$FILE" == *.s ]]; then \ + ASM_FILE="$$FILE"; \ + else \ + [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ + SRC="$$(realpath "$$FILE")"; \ + ASM_FILE="$$TMP_SRC/prog.s"; \ + "$(EML_BIN)" $(GC_FLAG) -fromfile "$$SRC" -o "$$ASM_FILE"; \ + fi; \ + OBJ_FILE="$$TMP_BIN/prog.o"; EXE_FILE="$$TMP_BIN/prog.exe"; \ + riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ + riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ + qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu max "$$EXE_FILE" + +infer: + @set -euo pipefail; \ + FILE="$(INPUT)"; \ + [[ -n "$$FILE" ]] || { echo "Usage: make infer [GC=1] " >&2; exit 1; }; \ + [[ "$$FILE" != *.s ]] || { echo "Infer mode expects .ml source, got assembly: $$FILE" >&2; exit 1; }; \ + [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ + SRC="$$(realpath "$$FILE")"; \ + "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" + +compile_llvm: + @set -euo pipefail; \ + RT_DIR=""; d="$$(pwd)"; while [ -n "$$d" ] && [ "$$d" != "/" ]; do [ -f "$$d/lib/runtime/runtime.c" ] && RT_DIR="$$(realpath "$$d/lib/runtime")" && break; d="$$(dirname "$$d")"; done; \ + [[ -n "$$RT_DIR" ]] || { echo "runtime not found (no lib/runtime/runtime.c in $$(pwd) or parents)" >&2; exit 1; }; \ + FILE="$(INPUT)"; \ + [[ -n "$$FILE" ]] || { echo "Usage: make compile_llvm [GC=1] " >&2; exit 1; }; \ + [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ + SRC="$$(realpath "$$FILE")"; \ + BASENAME="$$(basename "$$FILE" .ml)"; \ + TMP="$$(mktemp -d)"; trap 'rm -rf "$$TMP"' EXIT; \ + "$(EML_BIN)" $(GC_FLAG) -backend llvm -fromfile "$$SRC" -o "$$TMP/prog.ll"; \ + clang -O0 -Wno-override-module $(CFLAGS_GC) -DEML_LLVM -DEML_LLVM_STANDALONE -c "$$RT_DIR/runtime.c" -o "$$TMP/runtime.o"; \ + clang -c "$$RT_DIR/llvm_call.S" -o "$$TMP/llvm_call.o"; \ + clang -O0 -Wno-override-module "$$TMP/prog.ll" "$$TMP/runtime.o" "$$TMP/llvm_call.o" -o "$$TMP/prog.exe"; \ + [ -n "$(CFLAGS_GC)" ] && [ "$$BASENAME" = "010fibcps_ll" ] && export EML_HEAP_SIZE=1600; \ + "$$TMP/prog.exe" || true + +$(EXTRA_GOALS): + @: diff --git a/EML/tests/additional_tests/mangling_test.ml b/EML/tests/additional_tests/mangling_test.ml new file mode 100644 index 00000000..2e68a122 --- /dev/null +++ b/EML/tests/additional_tests/mangling_test.ml @@ -0,0 +1,6 @@ +let rec field n = if n <= 1 then 1 else n * field (n - 1) + +let main = + let () = print_int (field 4) in + 0 +;; diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml new file mode 100644 index 00000000..f0caba99 --- /dev/null +++ b/EML/tests/anf_tests.ml @@ -0,0 +1,301 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib.Frontend.Parser +open EML_lib.Middleend.Anf +open EML_lib.Middleend.Anf_pp +open EML_lib.Middleend.Runner +open EML_lib.Middleend.Inferencer + +let parse_and_anf input = + match parse input with + | Ok ast -> + (match anf_program ast with + | Ok anf_ast -> Printf.printf "%s\n" (show_anf_program anf_ast) + | Error e -> Printf.printf "ANF error: %s\n" e) + | Error e -> Printf.printf "Parsing error: %s\n" e +;; + +let parse_and_anf_pp input = + match parse input with + | Ok ast -> + (match anf_program ast with + | Ok anf_ast -> Printf.printf "%s\n" (anf_to_string anf_ast) + | Error e -> Printf.printf "ANF error: %s\n" e) + | Error e -> Printf.printf "Parsing error: %s\n" e +;; + +let anf_roundtrip_typecheck ~env program_str : (unit, string) Result.t = + let ( >>= ) = Result.bind in + parse program_str + |> Result.map_error (fun s -> "Parse error: " ^ s) + >>= fun ast -> + run ast env + |> Result.map_error (fun e -> + Format.asprintf "Middleend: %a" EML_lib.Middleend.Runner.pp_error e) + >>= fun (anf_ast, _env_after) -> + let printed = anf_to_string anf_ast in + parse printed + |> Result.map_error (fun s -> "ANF round-trip parse error: " ^ s) + >>= fun ast2 -> + ResultMonad.run (infer_structure env ast2) + |> Result.map_error (fun e -> + Format.asprintf + "ANF round-trip typecheck failed: %a" + EML_lib.Middleend.Inferencer.pp_error + e) + |> Result.map (fun _ -> ()) +;; + +let%expect_test "001.ml" = + parse_and_anf "let recfac n = if n<=1 then 1 else n * fac (n-1)"; + [%expect + {| +[(AnfValue (NonRec, + ("recfac", 1, + (AnfExpr + (ComplexLambda ([(PatVariable "n")], + (AnfLet (NonRec, "anf_t0", + (ComplexBinOper (LowestEqual, (ImmediateVar "n"), + (ImmediateConst (ConstInt 1)))), + (AnfExpr + (ComplexBranch ((ImmediateVar "anf_t0"), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), + (AnfLet (NonRec, "anf_t1", + (ComplexBinOper (Minus, (ImmediateVar "n"), + (ImmediateConst (ConstInt 1)))), + (AnfLet (NonRec, "anf_t2", + (ComplexApp ((ImmediateVar "fac"), + (ImmediateVar "anf_t1"), [])), + (AnfExpr + (ComplexBinOper (Multiply, (ImmediateVar "n"), + (ImmediateVar "anf_t2")))) + )) + )) + ))) + )) + )))), + [])) + ]|}] +;; + +let%expect_test "003occurs.ml" = + parse_and_anf "let fix f = (fun x -> f (fun f -> x x f)) (fun x -> f (fun f -> x x f))"; + [%expect + {| +[(AnfValue (NonRec, + ("fix", 1, + (AnfExpr + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t3", + (ComplexLambda ([(PatVariable "x")], + (AnfLet (NonRec, "anf_t1", + (ComplexLambda ([(PatVariable "f")], + (AnfExpr + (ComplexApp ((ImmediateVar "x"), + (ImmediateVar "x"), [(ImmediateVar "f")]))) + )), + (AnfExpr + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t1"), []))) + )) + )), + (AnfLet (NonRec, "anf_t7", + (ComplexLambda ([(PatVariable "x")], + (AnfLet (NonRec, "anf_t5", + (ComplexLambda ([(PatVariable "f")], + (AnfExpr + (ComplexApp ((ImmediateVar "x"), + (ImmediateVar "x"), [(ImmediateVar "f")]))) + )), + (AnfExpr + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t5"), []))) + )) + )), + (AnfExpr + (ComplexApp ((ImmediateVar "anf_t3"), + (ImmediateVar "anf_t7"), []))) + )) + )) + )))), + [])) + ]|}] +;; + +let%expect_test "004let_poly.ml" = + parse_and_anf "let temp =\n (fun f -> (f 1, f true)) (fun x -> x)"; + [%expect + {| +[(AnfValue (NonRec, + ("temp", 0, + (AnfLet (NonRec, "anf_t3", + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t0", + (ComplexApp ((ImmediateVar "f"), (ImmediateConst (ConstInt 1)), + [])), + (AnfLet (NonRec, "anf_t1", + (ComplexApp ((ImmediateVar "f"), + (ImmediateConst (ConstBool true)), [])), + (AnfExpr + (ComplexTuple ((ImmediateVar "anf_t0"), + (ImmediateVar "anf_t1"), []))) + )) + )) + )), + (AnfLet (NonRec, "anf_t4", + (ComplexLambda ([(PatVariable "x")], + (AnfExpr (ComplexImmediate (ImmediateVar "x"))))), + (AnfExpr + (ComplexApp ((ImmediateVar "anf_t3"), (ImmediateVar "anf_t4"), + []))) + )) + ))), + [])) + ]|}] +;; + +let%expect_test "002if.ml" = + parse_and_anf "let main = if true then 1 else false"; + [%expect + {| + [(AnfValue (NonRec, + ("main", 0, + (AnfExpr + (ComplexBranch ((ImmediateConst (ConstBool true)), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))))))), + [])) + ]|}] +;; + +let%expect_test "pretty_printer_test1" = + parse_and_anf_pp + "let rec fac n = if n <= 1 then 1 else n * fac (n - 1)\n let main = fac 4"; + [%expect + {| + let rec fac = fun n -> let anf_t0 = (n <= 1) in + if anf_t0 then 1 else let anf_t1 = (n - 1) in let anf_t2 = fac anf_t1 in + (n * anf_t2) + + let main = fac 4 |}] +;; + +let%expect_test "pretty_printer_test2" = + parse_and_anf_pp + "let rec fibo = fun n -> if n < 1 then 1 else fibo (n-1) + fibo (n-2)\n\ + \ let main = fibo 10"; + [%expect + {| + let rec fibo = fun n -> let anf_t0 = (n < 1) in + if anf_t0 then 1 else let anf_t1 = (n - 1) in let anf_t2 = fibo anf_t1 in + let anf_t3 = (n - 2) in let anf_t4 = fibo anf_t3 in + (anf_t2 + anf_t4) + + let main = fibo 10|}] +;; + +let%expect_test "anf_match_list_lowering_nil_cons_order" = + parse_and_anf + {| let rec h xs = + match xs with + | [] -> 0 + | hd::tl -> hd |}; + [%expect + {| +[(AnfValue (Rec, + ("h", 1, + (AnfExpr + (ComplexLambda ([(PatVariable "xs")], + (AnfLet (NonRec, "anf_t0", (ComplexImmediate (ImmediateVar "xs")), + (AnfLet (NonRec, "anf_t1", + (ComplexBinOper (Equal, (ImmediateVar "anf_t0"), + (ImmediateConst (ConstInt 0)))), + (AnfExpr + (ComplexBranch ((ImmediateVar "anf_t1"), + (AnfExpr + (ComplexImmediate (ImmediateConst (ConstInt 0)))), + (AnfLet (NonRec, "hd", + (ComplexField ((ImmediateVar "anf_t0"), 0)), + (AnfLet (NonRec, "tl", + (ComplexField ((ImmediateVar "anf_t0"), 1)), + (AnfExpr (ComplexImmediate (ImmediateVar "hd"))) + )) + )) + ))) + )) + )) + )))), + [])) + ]|}] +;; + +let%expect_test "anf_match_list_lowering_cons_nil_order" = + parse_and_anf + {| let rec h xs = + match xs with + | hd::tl -> hd + | [] -> 0 |}; + [%expect + {| +[(AnfValue (Rec, + ("h", 1, + (AnfExpr + (ComplexLambda ([(PatVariable "xs")], + (AnfLet (NonRec, "anf_t0", (ComplexImmediate (ImmediateVar "xs")), + (AnfLet (NonRec, "anf_t1", + (ComplexBinOper (Equal, (ImmediateVar "anf_t0"), + (ImmediateConst (ConstInt 0)))), + (AnfExpr + (ComplexBranch ((ImmediateVar "anf_t1"), + (AnfExpr + (ComplexImmediate (ImmediateConst (ConstInt 0)))), + (AnfLet (NonRec, "hd", + (ComplexField ((ImmediateVar "anf_t0"), 0)), + (AnfLet (NonRec, "tl", + (ComplexField ((ImmediateVar "anf_t0"), 1)), + (AnfExpr (ComplexImmediate (ImmediateVar "hd"))) + )) + )) + ))) + )) + )) + )))), + [])) + ]|}] +;; + +let%expect_test "anf_roundtrip_types_fac" = + let env = TypeEnv.initial_env in + (match + anf_roundtrip_typecheck + ~env + "let rec fac n = if n <= 1 then 1 else n * fac (n - 1)\nlet main = fac 4" + with + | Ok () -> Printf.printf "OK: types preserved after ANF round-trip\n" + | Error e -> Printf.printf "FAIL: %s\n" e); + [%expect {| OK: types preserved after ANF round-trip |}] +;; + +let%expect_test "anf_roundtrip_types_fib" = + let env = TypeEnv.initial_env in + (match + anf_roundtrip_typecheck + ~env + "let rec fib n = if n < 2 then n else fib (n - 1) + fib (n - 2)\nlet main = fib 5" + with + | Ok () -> Printf.printf "OK: types preserved after ANF round-trip\n" + | Error e -> Printf.printf "FAIL: %s\n" e); + [%expect {| OK: types preserved after ANF round-trip |}] +;; + +let%expect_test "anf_roundtrip_types_partial" = + let env = TypeEnv.initial_env in + (match + anf_roundtrip_typecheck ~env "let add x y = x + y\nlet main = let f = add 1 in f 2" + with + | Ok () -> Printf.printf "OK: types preserved after ANF round-trip\n" + | Error e -> Printf.printf "FAIL: %s\n" e); + [%expect {| OK: types preserved after ANF round-trip |}] +;; diff --git a/EML/tests/anf_tests.mli b/EML/tests/anf_tests.mli new file mode 100644 index 00000000..2101d1ea --- /dev/null +++ b/EML/tests/anf_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val parse_and_anf : string -> unit +val parse_and_anf_pp : string -> unit diff --git a/EML/tests/cc_tests.ml b/EML/tests/cc_tests.ml new file mode 100644 index 00000000..b38efba8 --- /dev/null +++ b/EML/tests/cc_tests.ml @@ -0,0 +1,152 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib +open Frontend +open Middleend +open Parser +open Utils.Pretty_printer + +let run str = + match parse str with + | Error error -> Format.printf "%s" error + | Ok ast -> + (match Cc.closure_conversion_result ast with + | Error e -> Format.eprintf "%a\n" Cc.pp_error e + | Ok lst -> Format.printf "%a\n" pp_structure lst) +;; + +let%expect_test "captured_lambda_in_nonrec_let" = + run + {| + let mk_adder a = + let add b = a + b in + add + ;; + |}; + [%expect {| let mk_adder = fun a -> let add = fun a b -> (a + b) in add a;; |}] +;; + +let%expect_test "top_level_nonrec_and_group" = + run + {| + let f x = + let id y = y + and addk z = x + z in + id 3 + addk 4 + ;; + |}; + [%expect + {| + let f = fun x -> let id = fun y -> y + and addk = fun x z -> (x + z) in (id 3 + addk x 4);; |}] +;; + +let%expect_test "recursive_and_with_external_capture" = + run + {| + let solve bound = + let rec go n = if n <= bound then true else stop (n - 1) + and stop n = if n > bound then false else go (n - 1) in + go 20 + ;; + |}; + [%expect + {| + let solve = fun bound -> let rec go = fun bound n -> if (n <= bound) then true else stop bound ((n - 1)) + and stop = fun bound n -> if (n > bound) then false else go bound ((n - 1)) in go bound 20;; |}] +;; + +let%expect_test "recursive_local_function_value_capture" = + run + {| + let run x = + let rec plus y = x + y + and call c = c + plus 7 in + plus 1 + call 2 + ;; + |}; + [%expect + {| + let run = fun x -> let rec plus = fun x y -> (x + y) + and call = fun x c -> (c + plus x 7) in (plus x 1 + call x 2);; |}] +;; + +let%expect_test "nested_levels_of_captures" = + run + {| + let outer x = + let middle y = + let inner z = + let deepest w = x + y + z + w in + deepest 1 + in + inner 2 + in + middle 3 + ;; + |}; + [%expect + {| let outer = fun x -> let middle = fun x y -> let inner = fun x y z -> let deepest = fun x y z w -> (((x + y) + z) + w) in deepest x y z 1 in inner x y 2 in middle x 3;; |}] +;; + +let%expect_test "if_with_lambda_in_both_branches" = + run + {| + let choose flag base alt = + if flag then (fun v -> base + v) else (fun v -> alt + v) + |}; + [%expect + {| let choose = fun flag base alt -> if flag then (fun base v -> (base + v)) base else (fun alt v -> (alt + v)) alt;; |}] +;; + +let%expect_test "match_with_option_lambda_capture" = + run + {| + let mapper x = + match x with + | Some y -> fun z -> y + z + | None -> fun z -> z + ;; + |}; + [%expect + {| let mapper = fun x -> match x with | Some (y) -> (fun y z -> (y + z)) y | None -> fun z -> z;; |}] +;; + +let%expect_test "sequence_and_tuple_pattern_capture" = + run + {| + let consume a b = + print_int a; + let use_pair (x, y) = a + b + x + y in + use_pair (3, 4) + ;; + |}; + [%expect + {| let consume = fun a b -> let () = print_int a in let use_pair = fun a b (x, y) -> (((a + b) + x) + y) in use_pair a b ((3, 4));; |}] +;; + +let%expect_test "list_and_option_expressions" = + run + {| + let build seed = + let f x = Some (seed + x) in + [f 1; f 2] + ;; + |}; + [%expect + {| let build = fun seed -> let f = fun seed x -> Some ((seed + x)) in f seed 1::f seed 2::[];; |}] +;; + +let%expect_test "type_annotation_inside_capture" = + run + {| + let annotated base = + let g x = ((base + x) : int) in + g 5 + ;; + |}; + [%expect + {| let annotated = fun base -> let g = fun base x -> ((base + x) : int) in g base 5;; |}] +;; diff --git a/EML/tests/cc_tests.mli b/EML/tests/cc_tests.mli new file mode 100644 index 00000000..4378f996 --- /dev/null +++ b/EML/tests/cc_tests.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val run : string -> unit diff --git a/EML/tests/closure_tests.t b/EML/tests/closure_tests.t new file mode 100644 index 00000000..14ba42f4 --- /dev/null +++ b/EML/tests/closure_tests.t @@ -0,0 +1,120 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + + $ make compile_riscv GC=1 gc_tests/closure/01_add5_staged_partial_gc.ml + === GC Status === + Current allocated: 18 + Total allocated: 18 + Free space: 782 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 2 + ================= + === GC Status === + Current allocated: 27 + Total allocated: 27 + Free space: 773 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 18 + Total allocated: 45 + Free space: 782 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 27 + Total allocated: 81 + Free space: 773 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 4 + ================= + 15 + + $ make compile_riscv GC=1 gc_tests/closure/02_affine_live_dead_gc.ml + === GC Status === + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 + ================= + === GC Status === + Current allocated: 14 + Total allocated: 42 + Free space: 786 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 4 + ================= + === GC Status === + Current allocated: 28 + Total allocated: 56 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 6 + ================= + === GC Status === + Current allocated: 21 + Total allocated: 77 + Free space: 779 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 6 + ================= + 17 + + $ make compile_riscv GC=1 gc_tests/closure/03_add10_staged_partial_gc.ml + === GC Status === + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 2 + ================= + === GC Status === + Current allocated: 42 + Total allocated: 42 + Free space: 758 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 28 + Total allocated: 70 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 42 + Total allocated: 126 + Free space: 758 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 4 + ================= + 55 diff --git a/EML/tests/dune b/EML/tests/dune new file mode 100644 index 00000000..7bf49fbb --- /dev/null +++ b/EML/tests/dune @@ -0,0 +1,56 @@ +(library + (name tests) + (libraries EML_lib) + (modules :standard) + (preprocess + (pps ppx_deriving.show ppx_expect)) + (inline_tests) + (instrumentation + (backend bisect_ppx))) + +(cram + (applies_to riscv) + (deps + (file ../bin/EML.exe) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree additional_tests) + (source_tree gc_tests) + (source_tree many_tests))) + +(cram + (applies_to infer) + (deps + (file ../bin/EML.exe) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree gc_tests) + (source_tree many_tests))) + +(cram + (applies_to closure_tests) + (deps + (file ../bin/EML.exe) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree gc_tests) + (source_tree many_tests))) + +(cram + (applies_to tuple_tests) + (deps + (file ../bin/EML.exe) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree gc_tests) + (source_tree many_tests))) + +;; LLVM tests require clang to be installed (e.g. apt-get install clang). + +(cram + (applies_to llvm) + (deps + (file ../bin/EML.exe) + (file Makefile) + (source_tree additional_tests) + (source_tree many_tests))) diff --git a/EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml b/EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml new file mode 100644 index 00000000..e4de1f35 --- /dev/null +++ b/EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml @@ -0,0 +1,14 @@ +let add5 a b c d e = a + b + c + d + e + +let main = + let p1 = add5 1 2 in + let _ = print_gc_status () in + let p2 = p1 3 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + let p3 = p2 4 in + let _ = collect () in + let _ = print_gc_status () in + print_int (p3 5) +;; diff --git a/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml b/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml new file mode 100644 index 00000000..67ac8224 --- /dev/null +++ b/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml @@ -0,0 +1,14 @@ +let affine a b x = (a * x) + b + +let main = + let live = affine 2 7 in + let _dead = affine 100 1 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + let _dead2 = affine 50 3 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + print_int (live 5) +;; diff --git a/EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml b/EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml new file mode 100644 index 00000000..82a8f155 --- /dev/null +++ b/EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml @@ -0,0 +1,15 @@ +let add10 a b c d e f g h i j = a + b + c + d + e + f + g + h + i + j + +let main = + let c1 = add10 1 2 3 in + let _ = print_gc_status () in + let c2 = c1 4 5 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + let c3 = c2 6 7 in + let _ = collect () in + let _ = print_gc_status () in + let c4 = c3 8 9 in + print_int (c4 10) +;; diff --git a/EML/tests/gc_tests/tuple_tests/01adder.ml b/EML/tests/gc_tests/tuple_tests/01adder.ml new file mode 100644 index 00000000..d0d5a21e --- /dev/null +++ b/EML/tests/gc_tests/tuple_tests/01adder.ml @@ -0,0 +1,10 @@ +let inc x = x + 1 + +let main = + let t = 41, (2, inc) in + let n, (_, f) = t in + let _ = print_gc_status () in + let _ = print_int (f n) in + let _ = collect () in + print_gc_status () +;; diff --git a/EML/tests/gc_tests/tuple_tests/02nested.ml b/EML/tests/gc_tests/tuple_tests/02nested.ml new file mode 100644 index 00000000..b1f53ddd --- /dev/null +++ b/EML/tests/gc_tests/tuple_tests/02nested.ml @@ -0,0 +1,10 @@ +let mul2 x = x * 2 + +let main = + let t = 1, 2, 3, 4, (5, (6, mul2)), 7, 8, 9 in + let a, b, c, d, (e, (f, g)), h, i, j = t in + let _ = print_gc_status () in + let _ = print_int (g (a + b + c + d + e + f + h + i + j)) in + let _ = collect () in + print_gc_status () +;; diff --git a/EML/tests/gc_tests/tuple_tests/03args.ml b/EML/tests/gc_tests/tuple_tests/03args.ml new file mode 100644 index 00000000..7d83da83 --- /dev/null +++ b/EML/tests/gc_tests/tuple_tests/03args.ml @@ -0,0 +1,11 @@ +let add3 x = x + 3 + +let main = + let pack = 10, 20, 30, 40, 50, (60, (70, add3)), 80, 90, 100, 110, 120, 130, 140 in + let a, b, c, d, e, (f, (g, h)), i, j, k, l, m, n, o = pack in + let base = a + b + c + d + e + f + g + i + j + k + l + m + n + o in + let _ = print_gc_status () in + let _ = print_int (h base) in + let _ = collect () in + print_gc_status () +;; diff --git a/EML/tests/infer.t b/EML/tests/infer.t new file mode 100644 index 00000000..08e22066 --- /dev/null +++ b/EML/tests/infer.t @@ -0,0 +1,116 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + + $ make infer many_tests/typed/001fac.ml + val fac: int -> int + val main: int + + $ make infer many_tests/typed/002fac.ml + val fac_cps: int -> (int -> int) -> int + val main: int + + $ make infer many_tests/typed/003fib.ml + val fib: int -> int + val fib_acc: int -> int -> int -> int + val main: int + + $ make infer many_tests/typed/004manyargs.ml + val main: int + val test10: int -> int -> int -> int -> int -> int -> int -> int -> int -> int -> int + val test3: int -> int -> int -> int + val wrap: t0 -> t0 + + $ make infer many_tests/typed/005fix.ml + val fac: (int -> int) -> int -> int + val fix: ((int -> int) -> int -> int) -> int -> int + val main: int + + $ make infer many_tests/typed/006partial.ml + val foo: int -> int + val main: int + + $ make infer many_tests/typed/006partial2.ml + val foo: int -> int -> int -> int + val main: int + + $ make infer many_tests/typed/006partial3.ml + val foo: int -> int -> int -> unit + val main: int + + $ make infer many_tests/typed/007order.ml + val _start: unit -> unit -> int -> unit -> int -> int -> unit -> int -> int -> int + val main: unit + + $ make infer many_tests/typed/008ascription.ml + val addi: (t2 -> bool -> int) -> (t2 -> bool) -> t2 -> int + val main: int + + $ make infer many_tests/typed/009let_poly.ml + val temp: (int * bool) + + $ make infer many_tests/typed/010fac_anf.ml + val fac: int -> int + val main: int + + $ make infer many_tests/typed/010faccps_ll.ml + val fac_cps: int -> (int -> int) -> int + val fresh_1: int -> (int -> t4) -> int -> t4 + val id: t0 -> t0 + val main: int + + $ make infer many_tests/typed/010fibcps_ll.ml + val fib: int -> (int -> int) -> int + val fresh_1: int -> (int -> t10) -> (int -> (int -> t10) -> t13) -> int -> t13 + val fresh_2: int -> (int -> t4) -> int -> t4 + val id: t0 -> t0 + val main: int + + $ make infer many_tests/typed/011mapcps.ml + val iter: (int -> unit) -> int list -> unit + val main: unit + val map: (int -> int) -> int list -> (int list -> int list) -> int list + + $ make infer many_tests/typed/012faccps.ml + val fac: int -> (int -> int) -> int + val main: unit + + $ make infer many_tests/typed/012fibcps.ml + val fib: int -> (int -> int) -> int + val main: unit + + $ make infer many_tests/typed/013foldfoldr.ml + val fold_right: (int -> (int -> int) -> int -> int) -> (int -> int) -> int list -> int -> int + val foldl: (int -> int -> int) -> int -> int list -> int + val id: t0 -> t0 + val main: unit + + $ make infer many_tests/typed/015tuples.ml + val feven: (t29 * int -> t33) -> int -> int + val fix: ((((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int)) -> ((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int)) -> ((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int) + val fixpoly: ((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int) + val fodd: (int -> t40 * t37) -> int -> int + val main: int + val map: (t9 -> t11) -> (t9 * t9) -> (t10 * t11) + val meven: int -> int + val modd: int -> int + val tie: (int -> int * int -> int) + + $ make infer many_tests/do_not_type/001.ml 2>&1 | sed -n '1p' + Inferencer error: Unbound variable 'fac'. + + $ make infer many_tests/do_not_type/002if.ml 2>&1 | sed -n '1p' + Inferencer error: Failed to unify types: int and bool. + + $ make infer many_tests/do_not_type/003occurs.ml 2>&1 | sed -n '1p' + Inferencer error: Occurs check failed. Type variable 't1' occurs inside t1 -> t3. + + $ make infer many_tests/do_not_type/004let_poly.ml 2>&1 | sed -n '1p' + Inferencer error: Failed to unify types: int and bool. + + $ make infer many_tests/do_not_type/015tuples.ml 2>&1 | sed -n '1p' + Inferencer error: Left-hand side error: Only variables are allowed on the left-hand side of let rec. + + $ make infer many_tests/do_not_type/099.ml 2>&1 | sed -n '1p' + Inferencer error: Left-hand side error: Only variables are allowed on the left-hand side of let rec. + diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml new file mode 100644 index 00000000..78369081 --- /dev/null +++ b/EML/tests/inferencer_tests.ml @@ -0,0 +1,442 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib.Middleend.Inferencer +open EML_lib.Frontend.Ast +open EML_lib.Frontend.Parser + +let pretty_printer_parse_and_infer s = + match parse s with + | Ok parsed -> + (match run_infer parsed with + | Ok env -> + let filtered_env = + Base.Map.filter_keys env ~f:(fun key -> + not (List.mem key [ "print_int"; "print_endline"; "print_bool" ])) + in + Base.Map.iteri filtered_env ~f:(fun ~key ~data:(Scheme.Scheme (_, ty)) -> + Format.printf "val %s: %a\n" key pp_ty ty) + | Error e -> Format.printf "Infer error. %a\n" pp_error e) + | Error e -> Format.printf "Parsing error. %s\n" e +;; + +let pretty_printer_infer_simple_expression expr = + match infer_simple_expression expr with + | Ok ty -> Format.printf "%a\n" pp_ty ty + | Error e -> Format.printf "Infer error. %a\n" pp_error e +;; + +let%expect_test "test_factorial" = + pretty_printer_parse_and_infer + {| let rec fac n = + if n <= 1 + then 1 + else let n1 = n-1 in + let m = fac n1 in + n*m + +let main = fac 4 |}; + [%expect + {| + val fac: int -> int + val main: int|}] +;; + +let%expect_test "test_primitives_and_data" = + pretty_printer_parse_and_infer + {| let a = 1 + 2 +let b = true && false +let c = if true then 1 else 2 +let d = (1, true, 'a') +let e = [1; 2; 3] +let f = Some 1 +let g = None +|}; + [%expect + {| + val a: int + val b: bool + val c: int + val d: (int * bool * char) + val e: int list + val f: int option + val g: t1 option|}] +;; + +let%expect_test "test_match_and_recursion" = + pretty_printer_parse_and_infer + {| let rec len xs = + match xs with + | [] -> 0 + | _::tl -> 1 + len tl + +let main = len [1;2;3] |}; + [%expect + {| + val len: int list -> int + val main: int|}] +;; + +let%expect_test "test_mutual_recursion" = + pretty_printer_parse_and_infer + {| let rec even n = + if n = 0 then true else odd (n - 1) +and odd n = + if n = 0 then false else even (n - 1) + +let main = even 4 |}; + [%expect + {| + val even: int -> bool + val main: bool + val odd: int -> bool|}] +;; + +let%expect_test "test_annotations" = + pretty_printer_parse_and_infer + {| let id = ((fun x -> x) : int -> int) +let main = id 10 |}; + [%expect + {| + val id: int -> int + val main: int|}] +;; + +let%expect_test "test_rec_rhs_error" = + pretty_printer_parse_and_infer {| let rec x = 1 |}; + [%expect + {|Infer error. Right-hand side error: Right-hand side of let rec must be a lambda expression.|}] +;; + +let%expect_test "test_list_constructors_and_match" = + pretty_printer_parse_and_infer + {| let rec head_or_zero xs = + match xs with + | [] -> 0 + | h::tl -> h + +let x = 1 :: [] +let main = head_or_zero x |}; + [%expect + {| + val head_or_zero: int list -> int + val main: int + val x: int list|}] +;; + +let%expect_test "test_pattern_option_and_list" = + pretty_printer_parse_and_infer + {| let f = function + | Some (h::tl) -> h +|}; + [%expect {|val f: t2 list option -> t2|}] +;; + +let%expect_test "test_annotation_mismatch_error" = + pretty_printer_parse_and_infer {| let x = (1 : bool) |}; + [%expect {|Infer error. Failed to unify types: int and bool.|}] +;; + +let%expect_test "test_unexpected_function_error_branch" = + pretty_printer_parse_and_infer {| let x = not 1 |}; + [%expect {|Infer error. Failed to unify types: int and bool.|}] +;; + +let%expect_test "test_if_without_else_returns_unit_branch" = + pretty_printer_parse_and_infer {| let x = if true then 1 |}; + [%expect {|Infer error. Failed to unify types: int and unit.|}] +;; + +let%expect_test "test_unbound_var" = + pretty_printer_parse_and_infer "let f = x"; + [%expect {|Infer error. Unbound variable 'x'.|}] +;; + +let%expect_test "test_annotate" = + pretty_printer_parse_and_infer "let sum = fun (x : int) (y : int) -> x + y"; + [%expect {|val sum: int -> int -> int|}] +;; + +let%expect_test "test_annotate_fac" = + pretty_printer_parse_and_infer + "let rec fac = fun (n : int) (acc : int) -> if n < 2 then acc else fac (n-1) (acc * \ + n);;"; + [%expect {|val fac: int -> int -> int|}] +;; + +let%expect_test "test_program_1" = + pretty_printer_parse_and_infer + "let div = fun x y -> x / y \n\ + \ let sum = fun x y -> x + y\n\ + \ let res = fun x y z -> div x (sum y z)"; + [%expect + {| + val div: int -> int -> int + val res: int -> int -> int -> int + val sum: int -> int -> int|}] +;; + +let%expect_test "test_program_2" = + pretty_printer_parse_and_infer + "let square = fun x -> x * x\n\ + \ let result = square 10"; + [%expect + {| + val result: int + val square: int -> int|}] +;; + +let%expect_test "test_annotate_error" = + pretty_printer_parse_and_infer "let sum (x : int) (y : string) = x + y"; + [%expect {|Infer error. Failed to unify types: string and int.|}] +;; + +let%expect_test "test_unification_types" = + pretty_printer_parse_and_infer "fun x -> x + true"; + [%expect {|Infer error. Failed to unify types: bool and int.|}] +;; + +let%expect_test "test_option_type_error" = + pretty_printer_parse_and_infer + "let f x = Some (x + 1) in let g y = Some (y && true) in f = g"; + [%expect {|Infer error. Failed to unify types: bool and int.|}] +;; + +let%expect_test "test_polymorphic_identity" = + pretty_printer_parse_and_infer + {| let id x = x +let a = id 1 +let b = id true |}; + [%expect + {| + val a: int + val b: bool + val id: t0 -> t0|}] +;; + +let%expect_test "test_polymorphic_tuple_use" = + pretty_printer_parse_and_infer + {| let id x = x +let pair = (id 1, id true) |}; + [%expect + {| + val id: t0 -> t0 + val pair: (int * bool)|}] +;; + +let%expect_test "test_higher_order_function" = + pretty_printer_parse_and_infer + {| let apply f x = f x +let inc x = x + 1 +let main = apply inc 10 |}; + [%expect + {| + val apply: (t1 -> t2) -> t1 -> t2 + val inc: int -> int + val main: int|}] +;; + +let%expect_test "test_lambda_returning_lambda" = + pretty_printer_parse_and_infer + {| let add x = fun y -> x + y +let f = add 5 |}; + [%expect + {| + val add: int -> int -> int + val f: int -> int|}] +;; + +let%expect_test "test_partial_application" = + pretty_printer_parse_and_infer + {| let add x y = x + y +let inc = add 1 +let main = inc 10 |}; + [%expect + {| + val add: int -> int -> int + val inc: int -> int + val main: int|}] +;; + +let%expect_test "test_tuple_pattern" = + pretty_printer_parse_and_infer + {| let sum_pair (x, y) = x + y +let main = sum_pair (3, 4) |}; + [%expect + {| + val main: int + val sum_pair: (int * int) -> int|}] +;; + +let%expect_test "test_nested_let_scope" = + pretty_printer_parse_and_infer + {| let x = 10 +let f y = + let x = y + 1 in + x + y +let main = f 5 |}; + [%expect + {| + val f: int -> int + val main: int + val x: int|}] +;; + +let%expect_test "test_function_composition" = + pretty_printer_parse_and_infer + {| let compose f g x = f (g x) +let inc x = x + 1 +let double x = x * 2 +let main = compose inc double 10 |}; + [%expect + {| + val compose: (t3 -> t4) -> (t2 -> t3) -> t2 -> t4 + val double: int -> int + val inc: int -> int + val main: int|}] +;; + +let%expect_test "test_occurs_check_error" = + pretty_printer_parse_and_infer {| fun x -> x x |}; + [%expect + {|Infer error. Occurs check failed. Type variable 't0' occurs inside t0 -> t1.|}] +;; + +let%expect_test "test_list_polymorphism" = + pretty_printer_parse_and_infer + {| let singleton x = [x] +let a = singleton 1 +let b = singleton true |}; + [%expect + {| + val a: int list + val b: bool list + val singleton: t0 -> t0 list|}] +;; + +let%expect_test "test_nonrec_tuple_pattern_binding" = + pretty_printer_parse_and_infer + {| let (x, y) = (1, true) +let main = x |}; + [%expect + {| + val main: int + val x: int + val y: bool|}] +;; + +let%expect_test "test_match_option_none_some" = + pretty_printer_parse_and_infer + {| let unwrap_or_zero o = + match o with + | None -> 0 + | Some x -> x +let main = unwrap_or_zero None |}; + [%expect + {| + val main: int + val unwrap_or_zero: int option -> int|}] +;; + +let%expect_test "test_match_list_literal_pattern" = + pretty_printer_parse_and_infer + {| let sum2 xs = + match xs with + | [a; b] -> a + b + | _ -> 0 +let main = sum2 [1; 2] |}; + [%expect + {| + val main: int + val sum2: int list -> int|}] +;; + +let%expect_test "test_lambda_wildcard_and_unit_pattern" = + pretty_printer_parse_and_infer + {| let ignore_first _ y = y +let run () = ignore_first 1 42 +let main = run () |}; + [%expect + {| + val ignore_first: t0 -> t1 -> t1 + val main: int + val run: unit -> int|}] +;; + +let%expect_test "test_rec_lhs_not_variable_error" = + pretty_printer_parse_and_infer {| let rec Some x = Some 1 |}; + [%expect + {|Infer error. Left-hand side error: Only variables are allowed on the left-hand side of let rec.|}] +;; + +let%expect_test "test_expr_let_rec_in" = + pretty_printer_parse_and_infer + {| let main = + let rec fact n = if n = 0 then 1 else n * fact (n - 1) in + fact 4 |}; + [%expect + {| + val main: int|}] +;; + +let%expect_test "test_expr_let_rec_and_in" = + pretty_printer_infer_simple_expression + (ExpLet + ( Rec + , (PatVariable "f", ExpLambda (PatVariable "x", [], ExpIdent "x")) + , [ PatVariable "g", ExpLambda (PatVariable "y", [], ExpIdent "y") ] + , ExpApply (ExpIdent "g", ExpConst (ConstInt 1)) )); + [%expect {|int|}] +;; + +let%expect_test "test_string_const_and_const_pattern" = + pretty_printer_parse_and_infer + {| let is_hi s = + match s with + | "hi" -> true + | _ -> false +let main = is_hi "hello" |}; + [%expect + {| + val is_hi: string -> bool + val main: bool|}] +;; + +let%expect_test "test_ast_exp_list_empty" = + pretty_printer_infer_simple_expression (ExpList []); + [%expect {|t0 list|}] +;; + +let%expect_test "test_ast_exp_list_non_empty" = + pretty_printer_infer_simple_expression + (ExpList [ ExpConst (ConstInt 1); ExpConst (ConstInt 2) ]); + [%expect {|int list|}] +;; + +let%expect_test "test_ast_exp_option_none" = + pretty_printer_infer_simple_expression (ExpOption None); + [%expect {|t0 option|}] +;; + +let%expect_test "test_ast_exp_option_some" = + pretty_printer_infer_simple_expression (ExpOption (Some (ExpConst (ConstInt 1)))); + [%expect {|int option|}] +;; + +let%expect_test "test_ast_pattern_option_lambda" = + pretty_printer_infer_simple_expression + (ExpLambda (PatOption (Some (PatVariable "x")), [], ExpIdent "x")); + [%expect {|t0 option -> t0|}] +;; + +let%expect_test "test_ast_pattern_list_lambda" = + pretty_printer_infer_simple_expression + (ExpLambda (PatList [ PatVariable "x"; PatVariable "y" ], [], ExpIdent "x")); + [%expect {|t1 list -> t1|}] +;; + +let%expect_test "test_ast_pattern_unit_lambda" = + pretty_printer_infer_simple_expression (ExpLambda (PatUnit, [], ExpConst (ConstInt 1))); + [%expect {|unit -> int|}] +;; diff --git a/EML/tests/inferencer_tests.mli b/EML/tests/inferencer_tests.mli new file mode 100644 index 00000000..8ed1aec6 --- /dev/null +++ b/EML/tests/inferencer_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val pretty_printer_parse_and_infer : string -> unit +val pretty_printer_infer_simple_expression : EML_lib.Frontend.Ast.expr -> unit diff --git a/EML/tests/ll_tests.ml b/EML/tests/ll_tests.ml new file mode 100644 index 00000000..a60f76d8 --- /dev/null +++ b/EML/tests/ll_tests.ml @@ -0,0 +1,134 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib +open Frontend +open Middleend +open Parser +open Utils.Pretty_printer + +let run str = + match parse str with + | Error error -> Format.printf "%s" error + | Ok ast -> + (match Ll.lambda_lifting_result ast with + | Error e -> Format.eprintf "%a\n" Ll.pp_error e + | Ok lst -> Format.printf "%a\n" pp_structure lst) +;; + +let%expect_test "nonrecursive_multiple_lets" = + run + {| + let foo x = + let bar x y = x + y + and baz = 2 in + bar x 2 + baz + ;; + |}; + [%expect + {| + let lifted_0 = fun x y -> (x + y);; + let foo = fun x -> let bar = lifted_0 + and baz = 2 in (bar x 2 + baz);; |}] +;; + +let%expect_test "nonrecursive_multiple_functions" = + run + {| + let foo x = + let bar y = y + and baz x c = x + c in + bar 2 + baz x 5 + ;; + |}; + [%expect + {| + let lifted_0 = fun y -> y;; + let lifted_1 = fun x c -> (x + c);; + let foo = fun x -> let bar = lifted_0 + and baz = lifted_1 in (bar 2 + baz x 5);; |}] +;; + +let%expect_test "mutual_recursion_in_let_rec_and" = + run + {| + let foo = + let limit = 10 in + let rec is_small limit n = + if n <= limit then true else is_big limit (n - 1) + and is_big limit n = + if n > limit then false else is_small limit (n - 1) in + is_small limit 13 + ;; + |}; + [%expect + {| + let lifted_2 = fun limit n -> if (n <= limit) then true else lifted_1 limit ((n - 1));; + let lifted_3 = fun limit n -> if (n > limit) then false else lifted_0 limit ((n - 1));; + let rec lifted_0 = lifted_2 + and lifted_1 = lifted_3;; + let foo = let limit = 10 in lifted_0 limit 13;; |}] +;; + +let%expect_test "recursive_local_bindings_use_renamed_functions" = + run + {| + let foo x = + let rec bar x y = x + y + and baz x c = c + bar x 5 in + bar x 5 + baz x 6 + ;; + |}; + [%expect + {| + let lifted_2 = fun x y -> (x + y);; + let lifted_3 = fun x c -> (c + lifted_0 x 5);; + let rec lifted_0 = lifted_2 + and lifted_1 = lifted_3;; + let foo = fun x -> (lifted_0 x 5 + lifted_1 x 6);; |}] +;; + +let%expect_test "sequence_with_local_lambda" = + run + {| + let g x = + print_int x; + (let h x y = x + y in h x 10) + ;; + |}; + [%expect + {| + let lifted_0 = fun x y -> (x + y);; + let g = fun x -> let () = print_int x in let h = lifted_0 in h x 10;; |}] +;; + +let%expect_test "tuple_pattern_lambda_lifting" = + run + {| + let pair_sum a b = + let f a b (x, y) = a + b + x + y in + f a b (1, 2) + ;; + |}; + [%expect + {| + let lifted_0 = fun a b (x, y) -> (((a + b) + x) + y);; + let pair_sum = fun a b -> let f = lifted_0 in f a b ((1, 2));; |}] +;; + +let%expect_test "match_with_option_and_inline_lambdas" = + run + {| + let f x = + match x with + | Some y -> (fun y z -> y - z) y + | None -> fun z -> z + 1 + ;; + |}; + [%expect + {| + let lifted_0 = fun y z -> (y - z);; + let lifted_1 = fun z -> (z + 1);; + let f = fun x -> match x with | Some (y) -> lifted_0 y | None -> lifted_1;; |}] +;; diff --git a/EML/tests/ll_tests.mli b/EML/tests/ll_tests.mli new file mode 100644 index 00000000..4378f996 --- /dev/null +++ b/EML/tests/ll_tests.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val run : string -> unit diff --git a/EML/tests/llvm.t b/EML/tests/llvm.t new file mode 100644 index 00000000..6255a5a2 --- /dev/null +++ b/EML/tests/llvm.t @@ -0,0 +1,80 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + $ make compile_llvm many_tests/typed/001fac.ml + 24 + + $ make compile_llvm many_tests/typed/002fac.ml + 24 + + $ make compile_llvm many_tests/typed/003fib.ml + 3 + 3 + + $ make compile_llvm many_tests/typed/004manyargs.ml + 1111111111 + 1 + 10 + 100 + + $ make compile_llvm many_tests/typed/005fix.ml + 720 + + $ make compile_llvm many_tests/typed/006partial.ml + 1122 + + $ make compile_llvm many_tests/typed/006partial2.ml + 1 + 2 + 3 + 7 + + $ make compile_llvm many_tests/typed/006partial3.ml + 4 + 8 + 9 + + $ make compile_llvm many_tests/typed/007order.ml + 1 + 2 + 4 + -1 + 103 + -555555 + 10000 + + $ make compile_llvm many_tests/typed/008ascription.ml + 8 + + $ make compile_llvm many_tests/typed/009let_poly.ml + + $ make compile_llvm many_tests/typed/010fac_anf.ml + + $ make compile_llvm many_tests/typed/010faccps_ll.ml + 24 + + $ make compile_llvm many_tests/typed/010fibcps_ll.ml + 8 + + $ make compile_llvm many_tests/typed/011mapcps.ml + 2 + 3 + 4 + + $ make compile_llvm many_tests/typed/012faccps.ml + 720 + + $ make compile_llvm many_tests/typed/012fibcps.ml + 8 + + $ make compile_llvm many_tests/typed/013foldfoldr.ml + 6 + + $ make compile_llvm many_tests/typed/015tuples.ml + 1 + 1 + 1 + 1 + + $ make compile_llvm additional_tests/mangling_test.ml + 24 diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml new file mode 100644 index 00000000..f88257c7 --- /dev/null +++ b/EML/tests/llvm_tests.ml @@ -0,0 +1,985 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib +open Frontend.Parser +open Middleend.Anf + +let compile_llvm src : string = + match parse src with + | Error e -> "Parse error: " ^ e + | Ok ast -> + (match anf_program ast with + | Error e -> "ANF error: " ^ e + | Ok anf -> + let buf = Buffer.create 4096 in + let ppf = Format.formatter_of_buffer buf in + (match Backend.Llvm_ir.Runner.gen_program ~enable_gc:false ppf anf with + | Ok () -> + Format.pp_print_flush ppf (); + Buffer.contents buf + | Error e -> "Codegen error: " ^ e)) +;; + +let compile_llvm_show src = Format.printf "%s" (compile_llvm src) + +let%expect_test "unary_minus" = + compile_llvm_show "let x = -5"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @x() { +entry: + ret ptr inttoptr (i64 -9 to ptr) +} + +define ptr @main() { +entry: + ret ptr inttoptr (i64 1 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "unary_not" = + compile_llvm_show "let x = not true"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @x() { +entry: + ret ptr inttoptr (i64 2 to ptr) +} + +define ptr @main() { +entry: + ret ptr inttoptr (i64 1 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "unit_main" = + compile_llvm_show "let main = ()"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @main() { +entry: + ret ptr inttoptr (i64 1 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "mul_only" = + compile_llvm_show "let main = 7 * 8"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @main() { +entry: + ret ptr inttoptr (i64 113 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "double_fn" = + compile_llvm_show + {| + let double x = x + x + let main = double 21 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @double(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %x to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %add = add i64 %untagged, %untagged3 + %twice = mul i64 %add, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + ret ptr %result_int + } + + define ptr @main() { + entry: + %direct_double = call ptr @double(ptr inttoptr (i64 43 to ptr)) + ret ptr %direct_double + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "abs_fn" = + compile_llvm_show + {| + let abs x = if x < 0 then -x else x + let main = abs 7 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @abs(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_slt = icmp slt i64 %untagged, 0 + %tagged_bool = select i1 %icmp_slt, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + %raw_bool = ptrtoint ptr %result_bool to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_0, label %else_0 + + then_0: ; preds = %entry + %raw_int1 = ptrtoint ptr %x to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %neg = sub i64 0, %untagged3 + %twice = mul i64 %neg, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + br label %merge_0 + + else_0: ; preds = %entry + br label %merge_0 + + merge_0: ; preds = %else_0, %then_0 + %ite_result = phi ptr [ %result_int, %then_0 ], [ %x, %else_0 ] + ret ptr %ite_result + } + + define ptr @main() { + entry: + %direct_abs = call ptr @abs(ptr inttoptr (i64 15 to ptr)) + ret ptr %direct_abs + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "nested_calls" = + compile_llvm_show + {| + let sq x = x * x + let sum_of_squares a b = sq a + sq b + let main = sum_of_squares 3 4 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @sq(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %x to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %mul = mul i64 %untagged, %untagged3 + %twice = mul i64 %mul, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + ret ptr %result_int + } + + define ptr @sum_of_squares(ptr %a, ptr %b) { + entry: + %direct_sq = call ptr @sq(ptr %a) + %direct_sq1 = call ptr @sq(ptr %b) + %raw_int = ptrtoint ptr %direct_sq to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int2 = ptrtoint ptr %direct_sq1 to i64 + %minus13 = sub i64 %raw_int2, 1 + %untagged4 = sdiv i64 %minus13, 2 + %add = add i64 %untagged, %untagged4 + %twice = mul i64 %add, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + ret ptr %result_int + } + + define ptr @main() { + entry: + %direct_sum_of_squares = call ptr @sum_of_squares(ptr inttoptr (i64 7 to ptr), ptr inttoptr (i64 9 to ptr)) + ret ptr %direct_sum_of_squares + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "fibonacci" = + compile_llvm_show + {| + let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) + let main = fib 6 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @fib(ptr %n) { + entry: + %raw_int = ptrtoint ptr %n to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_slt = icmp slt i64 %untagged, 2 + %tagged_bool = select i1 %icmp_slt, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + %raw_bool = ptrtoint ptr %result_bool to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_0, label %else_0 + + then_0: ; preds = %entry + br label %merge_0 + + else_0: ; preds = %entry + %raw_int1 = ptrtoint ptr %n to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %sub = sub i64 %untagged3, 1 + %twice = mul i64 %sub, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + %direct_fib = call ptr @fib(ptr %result_int) + %raw_int4 = ptrtoint ptr %n to i64 + %minus15 = sub i64 %raw_int4, 1 + %untagged6 = sdiv i64 %minus15, 2 + %sub7 = sub i64 %untagged6, 2 + %twice8 = mul i64 %sub7, 2 + %tagged9 = add i64 %twice8, 1 + %result_int10 = inttoptr i64 %tagged9 to ptr + %direct_fib11 = call ptr @fib(ptr %result_int10) + %raw_int12 = ptrtoint ptr %direct_fib to i64 + %minus113 = sub i64 %raw_int12, 1 + %untagged14 = sdiv i64 %minus113, 2 + %raw_int15 = ptrtoint ptr %direct_fib11 to i64 + %minus116 = sub i64 %raw_int15, 1 + %untagged17 = sdiv i64 %minus116, 2 + %add = add i64 %untagged14, %untagged17 + %twice18 = mul i64 %add, 2 + %tagged19 = add i64 %twice18, 1 + %result_int20 = inttoptr i64 %tagged19 to ptr + br label %merge_0 + + merge_0: ; preds = %else_0, %then_0 + %ite_result = phi ptr [ inttoptr (i64 3 to ptr), %then_0 ], [ %result_int20, %else_0 ] + ret ptr %ite_result + } + + define ptr @main() { + entry: + %direct_fib = call ptr @fib(ptr inttoptr (i64 13 to ptr)) + ret ptr %direct_fib + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "is_positive" = + compile_llvm_show + {| + let is_positive n = n > 0 + let main = is_positive 42 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @is_positive(ptr %n) { + entry: + %raw_int = ptrtoint ptr %n to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_sgt = icmp sgt i64 %untagged, 0 + %tagged_bool = select i1 %icmp_sgt, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + ret ptr %result_bool + } + + define ptr @main() { + entry: + %direct_is_positive = call ptr @is_positive(ptr inttoptr (i64 85 to ptr)) + ret ptr %direct_is_positive + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "mul3" = + compile_llvm_show + {| + let mul3 a b c = a * b * c + let main = mul3 2 3 4 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @mul3(ptr %a, ptr %b, ptr %c) { + entry: + %raw_int = ptrtoint ptr %a to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %b to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %mul = mul i64 %untagged, %untagged3 + %twice = mul i64 %mul, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + %raw_int4 = ptrtoint ptr %result_int to i64 + %minus15 = sub i64 %raw_int4, 1 + %untagged6 = sdiv i64 %minus15, 2 + %raw_int7 = ptrtoint ptr %c to i64 + %minus18 = sub i64 %raw_int7, 1 + %untagged9 = sdiv i64 %minus18, 2 + %mul10 = mul i64 %untagged6, %untagged9 + %twice11 = mul i64 %mul10, 2 + %tagged12 = add i64 %twice11, 1 + %result_int13 = inttoptr i64 %tagged12 to ptr + ret ptr %result_int13 + } + + define ptr @main() { + entry: + %direct_mul3 = call ptr @mul3(ptr inttoptr (i64 5 to ptr), ptr inttoptr (i64 7 to ptr), ptr inttoptr (i64 9 to ptr)) + ret ptr %direct_mul3 + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "test1" = + compile_llvm_show + {| + let large x = if 0<>x then print_int 0 else print_int 1 + let main = + let x = if (if (if 0 + then 0 else (let t42 = print_int 42 in 1)) + then 0 else 1) + then 0 else 1 in + large x + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @large(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_ne = icmp ne i64 0, %untagged + %tagged_bool = select i1 %icmp_ne, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + %raw_bool = ptrtoint ptr %result_bool to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_0, label %else_0 + + then_0: ; preds = %entry + call void @print_int(i64 1) + br label %merge_0 + + else_0: ; preds = %entry + call void @print_int(i64 3) + br label %merge_0 + + merge_0: ; preds = %else_0, %then_0 + %ite_result = phi ptr [ inttoptr (i64 1 to ptr), %then_0 ], [ inttoptr (i64 1 to ptr), %else_0 ] + ret ptr %ite_result + } + + define ptr @main() { + entry: + br i1 false, label %then_1, label %else_1 + + then_1: ; preds = %entry + br label %merge_1 + + else_1: ; preds = %entry + call void @print_int(i64 85) + br label %merge_1 + + merge_1: ; preds = %else_1, %then_1 + %ite_result = phi ptr [ inttoptr (i64 1 to ptr), %then_1 ], [ inttoptr (i64 3 to ptr), %else_1 ] + %raw_bool = ptrtoint ptr %ite_result to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_2, label %else_2 + + then_2: ; preds = %merge_1 + br label %merge_2 + + else_2: ; preds = %merge_1 + br label %merge_2 + + merge_2: ; preds = %else_2, %then_2 + %ite_result1 = phi ptr [ inttoptr (i64 1 to ptr), %then_2 ], [ inttoptr (i64 3 to ptr), %else_2 ] + %raw_bool2 = ptrtoint ptr %ite_result1 to i64 + %is_true3 = icmp eq i64 %raw_bool2, 4 + br i1 %is_true3, label %then_3, label %else_3 + + then_3: ; preds = %merge_2 + br label %merge_3 + + else_3: ; preds = %merge_2 + br label %merge_3 + + merge_3: ; preds = %else_3, %then_3 + %ite_result4 = phi ptr [ inttoptr (i64 1 to ptr), %then_3 ], [ inttoptr (i64 3 to ptr), %else_3 ] + %direct_large = call ptr @large(ptr %ite_result4) + ret ptr %direct_large + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "codegen closure fn with 10 arg" = + compile_llvm_show + {| + let add a b c d e f g = a + b + c + d + e + f + g + + let main = + let temp1 = add 1 1 1 1 in + let temp2 = temp1 1 1 in + let temp3 = temp2 1 1 in + print_int temp3 + ;; + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @add(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g) { + entry: + %raw_int = ptrtoint ptr %a to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %b to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %add = add i64 %untagged, %untagged3 + %twice = mul i64 %add, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + %raw_int4 = ptrtoint ptr %result_int to i64 + %minus15 = sub i64 %raw_int4, 1 + %untagged6 = sdiv i64 %minus15, 2 + %raw_int7 = ptrtoint ptr %c to i64 + %minus18 = sub i64 %raw_int7, 1 + %untagged9 = sdiv i64 %minus18, 2 + %add10 = add i64 %untagged6, %untagged9 + %twice11 = mul i64 %add10, 2 + %tagged12 = add i64 %twice11, 1 + %result_int13 = inttoptr i64 %tagged12 to ptr + %raw_int14 = ptrtoint ptr %result_int13 to i64 + %minus115 = sub i64 %raw_int14, 1 + %untagged16 = sdiv i64 %minus115, 2 + %raw_int17 = ptrtoint ptr %d to i64 + %minus118 = sub i64 %raw_int17, 1 + %untagged19 = sdiv i64 %minus118, 2 + %add20 = add i64 %untagged16, %untagged19 + %twice21 = mul i64 %add20, 2 + %tagged22 = add i64 %twice21, 1 + %result_int23 = inttoptr i64 %tagged22 to ptr + %raw_int24 = ptrtoint ptr %result_int23 to i64 + %minus125 = sub i64 %raw_int24, 1 + %untagged26 = sdiv i64 %minus125, 2 + %raw_int27 = ptrtoint ptr %e to i64 + %minus128 = sub i64 %raw_int27, 1 + %untagged29 = sdiv i64 %minus128, 2 + %add30 = add i64 %untagged26, %untagged29 + %twice31 = mul i64 %add30, 2 + %tagged32 = add i64 %twice31, 1 + %result_int33 = inttoptr i64 %tagged32 to ptr + %raw_int34 = ptrtoint ptr %result_int33 to i64 + %minus135 = sub i64 %raw_int34, 1 + %untagged36 = sdiv i64 %minus135, 2 + %raw_int37 = ptrtoint ptr %f to i64 + %minus138 = sub i64 %raw_int37, 1 + %untagged39 = sdiv i64 %minus138, 2 + %add40 = add i64 %untagged36, %untagged39 + %twice41 = mul i64 %add40, 2 + %tagged42 = add i64 %twice41, 1 + %result_int43 = inttoptr i64 %tagged42 to ptr + %raw_int44 = ptrtoint ptr %result_int43 to i64 + %minus145 = sub i64 %raw_int44, 1 + %untagged46 = sdiv i64 %minus145, 2 + %raw_int47 = ptrtoint ptr %g to i64 + %minus148 = sub i64 %raw_int47, 1 + %untagged49 = sdiv i64 %minus148, 2 + %add50 = add i64 %untagged46, %untagged49 + %twice51 = mul i64 %add50, 2 + %tagged52 = add i64 %twice51, 1 + %result_int53 = inttoptr i64 %tagged52 to ptr + ret ptr %result_int53 + } + + define ptr @main() { + entry: + %boxed_alloc_closure = call ptr @alloc_closure(ptr @add, i64 7) + br label %apply_step_0 + + merge_0: ; preds = %apply_step_3 + %apply_result = phi ptr [ %apply_step_310, %apply_step_3 ] + br label %apply_step_011 + + apply_step_0: ; preds = %entry + %apply_one = alloca [1 x ptr], align 8 + %one_elem = getelementptr [1 x ptr], ptr %apply_one, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem, align 8 + %apply_step_01 = call ptr @eml_applyN(ptr %boxed_alloc_closure, i64 1, ptr %one_elem) + br label %apply_step_1 + + apply_step_1: ; preds = %apply_step_0 + %cur_1 = phi ptr [ %apply_step_01, %apply_step_0 ] + %apply_one2 = alloca [1 x ptr], align 8 + %one_elem3 = getelementptr [1 x ptr], ptr %apply_one2, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem3, align 8 + %apply_step_14 = call ptr @eml_applyN(ptr %cur_1, i64 1, ptr %one_elem3) + br label %apply_step_2 + + apply_step_2: ; preds = %apply_step_1 + %cur_2 = phi ptr [ %apply_step_14, %apply_step_1 ] + %apply_one5 = alloca [1 x ptr], align 8 + %one_elem6 = getelementptr [1 x ptr], ptr %apply_one5, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem6, align 8 + %apply_step_27 = call ptr @eml_applyN(ptr %cur_2, i64 1, ptr %one_elem6) + br label %apply_step_3 + + apply_step_3: ; preds = %apply_step_2 + %cur_3 = phi ptr [ %apply_step_27, %apply_step_2 ] + %apply_one8 = alloca [1 x ptr], align 8 + %one_elem9 = getelementptr [1 x ptr], ptr %apply_one8, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem9, align 8 + %apply_step_310 = call ptr @eml_applyN(ptr %cur_3, i64 1, ptr %one_elem9) + br label %merge_0 + + merge_1: ; preds = %apply_step_112 + %apply_result20 = phi ptr [ %apply_step_119, %apply_step_112 ] + br label %apply_step_021 + + apply_step_011: ; preds = %merge_0 + %apply_one13 = alloca [1 x ptr], align 8 + %one_elem14 = getelementptr [1 x ptr], ptr %apply_one13, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem14, align 8 + %apply_step_015 = call ptr @eml_applyN(ptr %apply_result, i64 1, ptr %one_elem14) + br label %apply_step_112 + + apply_step_112: ; preds = %apply_step_011 + %cur_116 = phi ptr [ %apply_step_015, %apply_step_011 ] + %apply_one17 = alloca [1 x ptr], align 8 + %one_elem18 = getelementptr [1 x ptr], ptr %apply_one17, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem18, align 8 + %apply_step_119 = call ptr @eml_applyN(ptr %cur_116, i64 1, ptr %one_elem18) + br label %merge_1 + + merge_2: ; preds = %apply_step_122 + %apply_result30 = phi ptr [ %apply_step_129, %apply_step_122 ] + %print_int_arg = ptrtoint ptr %apply_result30 to i64 + call void @print_int(i64 %print_int_arg) + ret ptr inttoptr (i64 1 to ptr) + + apply_step_021: ; preds = %merge_1 + %apply_one23 = alloca [1 x ptr], align 8 + %one_elem24 = getelementptr [1 x ptr], ptr %apply_one23, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem24, align 8 + %apply_step_025 = call ptr @eml_applyN(ptr %apply_result20, i64 1, ptr %one_elem24) + br label %apply_step_122 + + apply_step_122: ; preds = %apply_step_021 + %cur_126 = phi ptr [ %apply_step_025, %apply_step_021 ] + %apply_one27 = alloca [1 x ptr], align 8 + %one_elem28 = getelementptr [1 x ptr], ptr %apply_one27, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem28, align 8 + %apply_step_129 = call ptr @eml_applyN(ptr %cur_126, i64 1, ptr %one_elem28) + br label %merge_2 + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; diff --git a/EML/tests/llvm_tests.mli b/EML/tests/llvm_tests.mli new file mode 100644 index 00000000..acb67707 --- /dev/null +++ b/EML/tests/llvm_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val compile_llvm : string -> string +val compile_llvm_show : string -> unit diff --git a/EML/tests/many_tests b/EML/tests/many_tests new file mode 120000 index 00000000..0bd48791 --- /dev/null +++ b/EML/tests/many_tests @@ -0,0 +1 @@ +../../manytests \ No newline at end of file diff --git a/EML/tests/parser_tests.ml b/EML/tests/parser_tests.ml new file mode 100644 index 00000000..5af3cc27 --- /dev/null +++ b/EML/tests/parser_tests.ml @@ -0,0 +1,233 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib.Frontend.Parser +open EML_lib.Frontend.Ast + +let parse_test input = + match parse input with + | Ok ast -> Printf.printf "%s\n" (show_program ast) + | Error fail -> Printf.printf "Ошибка: %s\n" fail +;; + +let%expect_test "factorial" = + parse_test + {| let rec fac n = + if n <= 1 + then 1 + else let n1 = n-1 in + let m = fac n1 in + n*m + +let main = fac 4 |}; + [%expect + {| + [(SValue (Rec, + ((PatVariable "fac"), + (ExpLambda ((PatVariable "n"), [], + (ExpBranch ( + (ExpBinOper (LowestEqual, (ExpIdent "n"), (ExpConst (ConstInt 1)) + )), + (ExpConst (ConstInt 1)), + (Some (ExpLet (NonRec, + ((PatVariable "n1"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 1))))), + [], + (ExpLet (NonRec, + ((PatVariable "m"), + (ExpApply ((ExpIdent "fac"), (ExpIdent "n1")))), + [], + (ExpBinOper (Multiply, (ExpIdent "n"), (ExpIdent "m") + )) + )) + ))) + )) + ))), + [])); + (SValue (NonRec, + ((PatVariable "main"), + (ExpApply ((ExpIdent "fac"), (ExpConst (ConstInt 4))))), + [])) + ] +|}] +;; + +let%expect_test "factorial" = + parse_test "let rec factorial n = if n < 2 then 1 else n * factorial(n - 1);;"; + [%expect + {| + [(SValue (Rec, + ((PatVariable "factorial"), + (ExpLambda ((PatVariable "n"), [], + (ExpBranch ( + (ExpBinOper (LowerThan, (ExpIdent "n"), (ExpConst (ConstInt 2)))), + (ExpConst (ConstInt 1)), + (Some (ExpBinOper (Multiply, (ExpIdent "n"), + (ExpApply ((ExpIdent "factorial"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 1)))) + )) + ))) + )) + ))), + [])) + ] +|}] +;; + +let%expect_test "fibonacci" = + parse_test "let rec fibo n = if n < 2 then 1 else fibo(n - 1) + fibo(n - 2) ;;"; + [%expect + {| + [(SValue (Rec, + ((PatVariable "fibo"), + (ExpLambda ((PatVariable "n"), [], + (ExpBranch ( + (ExpBinOper (LowerThan, (ExpIdent "n"), (ExpConst (ConstInt 2)))), + (ExpConst (ConstInt 1)), + (Some (ExpBinOper (Plus, + (ExpApply ((ExpIdent "fibo"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 1)))) + )), + (ExpApply ((ExpIdent "fibo"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 2)))) + )) + ))) + )) + ))), + [])) + ] +|}] +;; + +let%expect_test "lambda_test" = + parse_test "let add x = fun y -> x + y;;"; + [%expect + {| + [(SValue (NonRec, + ((PatVariable "add"), + (ExpLambda ((PatVariable "x"), [], + (ExpLambda ((PatVariable "y"), [], + (ExpBinOper (Plus, (ExpIdent "x"), (ExpIdent "y"))))) + ))), + [])) + ] +|}] +;; + +let%expect_test "test_tuple" = + parse_test "let x = (1, 2, true) in x;;"; + [%expect + {| + [(SEval + (ExpLet (NonRec, + ((PatVariable "x"), + (ExpTuple ((ExpConst (ConstInt 1)), (ExpConst (ConstInt 2)), + [(ExpConst (ConstBool true))]))), + [], (ExpIdent "x")))) + ] +|}] +;; + +let%expect_test "test_list" = + parse_test "let arr = [1;2;true]"; + [%expect + {| + [(SValue (NonRec, + ((PatVariable "arr"), + (ExpConstruct ("::", + (Some (ExpTuple ((ExpConst (ConstInt 1)), + (ExpConstruct ("::", + (Some (ExpTuple ((ExpConst (ConstInt 2)), + (ExpConstruct ("::", + (Some (ExpTuple ((ExpConst (ConstBool true)), + (ExpConstruct ("[]", None)), + []))) + )), + []))) + )), + []))) + ))), + [])) + ] +|}] +;; + +let%expect_test "test_one_element_in_tuple" = + parse_test "let x = (666)"; + [%expect + {| + [(SValue (NonRec, ((PatVariable "x"), (ExpConst (ConstInt 666))), []))] +|}] +;; + +let%expect_test "test_sum_two_args" = + parse_test "let sum x y = x + y"; + [%expect + {| +[(SValue (NonRec, + ((PatVariable "sum"), + (ExpLambda ((PatVariable "x"), [(PatVariable "y")], + (ExpBinOper (Plus, (ExpIdent "x"), (ExpIdent "y")))))), + [])) + ] +|}] +;; + +let%expect_test "test_annotate_type_1" = + parse_test "let sum (x:int) (y:int) = x + y"; + [%expect + {| +[(SValue (NonRec, + ((PatVariable "sum"), + (ExpLambda ((PatType ((PatVariable "x"), (TyPrim "int"))), + [(PatType ((PatVariable "y"), (TyPrim "int")))], + (ExpBinOper (Plus, (ExpIdent "x"), (ExpIdent "y")))))), + [])) + ] +|}] +;; + +let%expect_test "test_annotate_type_2" = + parse_test "let (a : int list) = [] "; + [%expect + {| +[(SValue (NonRec, + ((PatType ((PatVariable "a"), (TyList (TyPrim "int")))), + (ExpConstruct ("[]", None))), + [])) + ] +|}] +;; + +let%expect_test "test_minus" = + parse_test "-1 -2 - (-1) -(3)"; + [%expect + {| +[(SEval + (ExpBinOper (Minus, + (ExpBinOper (Minus, + (ExpBinOper (Minus, + (ExpUnarOper (Negative, (ExpConst (ConstInt 1)))), + (ExpConst (ConstInt 2)))), + (ExpUnarOper (Negative, (ExpConst (ConstInt 1)))))), + (ExpConst (ConstInt 3))))) + ] + |}] +;; + +let%expect_test "test_unit" = + parse_test "let () = print_int 5"; + [%expect + {| +[(SValue (NonRec, + ((PatConstruct ("()", None)), + (ExpApply ((ExpIdent "print_int"), (ExpConst (ConstInt 5))))), + [])) + ] + |}] +;; diff --git a/EML/tests/parser_tests.mli b/EML/tests/parser_tests.mli new file mode 100644 index 00000000..910bf993 --- /dev/null +++ b/EML/tests/parser_tests.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val parse_test : string -> unit diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t new file mode 100644 index 00000000..228b833d --- /dev/null +++ b/EML/tests/riscv.t @@ -0,0 +1,81 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + + $ make compile_riscv many_tests/typed/001fac.ml + 24 + + $ make compile_riscv many_tests/typed/002fac.ml + 24 + + $ make compile_riscv many_tests/typed/003fib.ml + 3 + 3 + + $ make compile_riscv many_tests/typed/004manyargs.ml + 1111111111 + 1 + 10 + 100 + + $ make compile_riscv many_tests/typed/005fix.ml + 720 + + $ make compile_riscv many_tests/typed/006partial.ml + 1122 + + $ make compile_riscv many_tests/typed/006partial2.ml + 1 + 2 + 3 + 7 + + $ make compile_riscv many_tests/typed/006partial3.ml + 4 + 8 + 9 + + $ make compile_riscv many_tests/typed/007order.ml + 1 + 2 + 4 + -1 + 103 + -555555 + 10000 + + $ make compile_riscv many_tests/typed/008ascription.ml + 8 + + $ make compile_riscv many_tests/typed/009let_poly.ml + + $ make compile_riscv many_tests/typed/010fac_anf.ml + + $ make compile_riscv many_tests/typed/010faccps_ll.ml + 24 + + $ make compile_riscv many_tests/typed/010fibcps_ll.ml + 8 + + $ make compile_riscv many_tests/typed/011mapcps.ml + 2 + 3 + 4 + + $ make compile_riscv many_tests/typed/012faccps.ml + 720 + + $ make compile_riscv many_tests/typed/012fibcps.ml + 8 + + $ make compile_riscv many_tests/typed/013foldfoldr.ml + 6 + + $ make compile_riscv many_tests/typed/015tuples.ml + 1 + 1 + 1 + 1 + + $ make compile_riscv additional_tests/mangling_test.ml + 24 diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml new file mode 100644 index 00000000..279e862c --- /dev/null +++ b/EML/tests/riscv_tests.ml @@ -0,0 +1,694 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib +open Frontend.Parser +open Middleend.Anf + +let compile src : string = + match parse src with + | Error e -> "Parse error: " ^ e + | Ok ast -> + (match anf_program ast with + | Error e -> "ANF error: " ^ e + | Ok anf -> + let buf = Buffer.create 1024 in + let ppf = Format.formatter_of_buffer buf in + (match Backend.Ricsv.Runner.gen_program ppf anf with + | Ok () -> + Format.pp_print_flush ppf (); + Buffer.contents buf + | Error e -> "Codegen error: " ^ e)) +;; + +let run src = Format.printf "%s" (compile src) + +let%expect_test "unary_minus" = + run "let x = -5"; + [%expect + {| +.section .text + .globl x + .type x, @function +x: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li t0, 11 + li a0, 1 + sub a0, a0, t0 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "unary_not" = + run "let x = not true"; + [%expect + {| +.section .text + .globl x + .type x, @function +x: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li t0, 3 + xori a0, t0, 3 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "unit_main" = + run "let main = ()"; + [%expect + {| +.section .text + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "mul_only" = + run "let main = 7 * 8"; + [%expect + {| +.section .text + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li t0, 15 + li t1, 17 + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "double_fn" = + run + {| + let double x = x + x + let main = double 21 + |}; + [%expect + {| +.section .text + .globl double + .type double, @function +double: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + sd a0, -8(fp) + ld t0, -8(fp) + ld t1, -8(fp) + add a0, t0, t1 + addi a0, a0, -1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) + addi fp, sp, 184 + li a0, 43 + call double + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "abs_fn" = + run + {| + let abs x = if x < 0 then -x else x + let main = abs 7 + |}; + [%expect + {| +.section .text + .globl abs + .type abs, @function +abs: + addi sp, sp, -32 + sd ra, 24(sp) + sd fp, 16(sp) + addi fp, sp, 16 + sd a0, -8(fp) + ld t0, -8(fp) + li t1, 1 + slt a0, t0, t1 + add a0, a0, a0 + addi a0, a0, 1 + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_0 + ld t0, -8(fp) + li a0, 1 + sub a0, a0, t0 + j end_0 +else_0: + ld a0, -8(fp) +end_0: + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) + addi fp, sp, 184 + li a0, 15 + call abs + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "nested_calls" = + run + {| + let sq x = x * x + let sum_of_squares a b = sq a + sq b + let main = sum_of_squares 3 4 + |}; + [%expect + {| +.section .text + .globl sq + .type sq, @function +sq: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + sd a0, -8(fp) + ld t0, -8(fp) + ld t1, -8(fp) + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl sum_of_squares + .type sum_of_squares, @function +sum_of_squares: + addi sp, sp, -400 + sd ra, 392(sp) + sd fp, 384(sp) + addi fp, sp, 384 + sd a0, -8(fp) + sd a1, -16(fp) + ld a0, -8(fp) + call sq + sd a0, -24(fp) + ld a0, -16(fp) + call sq + sd a0, -32(fp) + ld t0, -24(fp) + ld t1, -32(fp) + add a0, t0, t1 + addi a0, a0, -1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -208 + sd ra, 200(sp) + sd fp, 192(sp) + addi fp, sp, 192 + li a0, 7 + li a1, 9 + call sum_of_squares + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "fibonacci" = + run + {| + let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) + let main = fib 6 + |}; + [%expect + {| +.section .text + .globl fib + .type fib, @function +fib: + addi sp, sp, -432 + sd ra, 424(sp) + sd fp, 416(sp) + addi fp, sp, 416 + sd a0, -8(fp) + ld t0, -8(fp) + li t1, 5 + slt a0, t0, t1 + add a0, a0, a0 + addi a0, a0, 1 + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_0 + li a0, 3 + j end_0 +else_0: + ld t0, -8(fp) + li t1, 3 + sub a0, t0, t1 + addi a0, a0, 1 + sd a0, -24(fp) + ld a0, -24(fp) + call fib + sd a0, -32(fp) + ld t0, -8(fp) + li t1, 5 + sub a0, t0, t1 + addi a0, a0, 1 + sd a0, -40(fp) + ld a0, -40(fp) + call fib + sd a0, -48(fp) + ld t0, -32(fp) + ld t1, -48(fp) + add a0, t0, t1 + addi a0, a0, -1 +end_0: + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) + addi fp, sp, 184 + li a0, 13 + call fib + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "is_positive" = + run + {| + let is_positive n = n > 0 + let main = is_positive 42 + |}; + [%expect + {| +.section .text + .globl is_positive + .type is_positive, @function +is_positive: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + sd a0, -8(fp) + ld t0, -8(fp) + li t1, 1 + slt a0, t1, t0 + add a0, a0, a0 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) + addi fp, sp, 184 + li a0, 85 + call is_positive + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "mul3" = + run + {| + let mul3 a b c = a * b * c + let main = mul3 2 3 4 + |}; + [%expect + {| +.section .text + .globl mul3 + .type mul3, @function +mul3: + addi sp, sp, -24 + sd ra, 16(sp) + sd fp, 8(sp) + addi fp, sp, 8 + sd a0, -8(fp) + sd a1, -16(fp) + sd a2, -24(fp) + ld t0, -8(fp) + ld t1, -16(fp) + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + sd a0, -32(fp) + ld t0, -32(fp) + ld t1, -24(fp) + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -216 + sd ra, 208(sp) + sd fp, 200(sp) + addi fp, sp, 200 + li a0, 5 + li a1, 7 + li a2, 9 + call mul3 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "test1" = + run + {| + let large x = if 0<>x then print_int 0 else print_int 1 + let main = + let x = if (if (if 0 + then 0 else (let t42 = print_int 42 in 1)) + then 0 else 1) + then 0 else 1 in + large x + |}; + [%expect + {| + .section .text + .globl large + .type large, @function + large: + addi sp, sp, -400 + sd ra, 392(sp) + sd fp, 384(sp) + addi fp, sp, 384 + sd a0, -8(fp) + li t0, 1 + ld t1, -8(fp) + xor a0, t0, t1 + snez a0, a0 + add a0, a0, a0 + addi a0, a0, 1 + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_0 + li a0, 1 + call print_int + j end_0 + else_0: + li a0, 3 + call print_int + end_0: + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function + main: + addi sp, sp, -440 + sd ra, 432(sp) + sd fp, 424(sp) + addi fp, sp, 424 + li t0, 1 + li t1, 1 + beq t0, t1, else_1 + li a0, 1 + j end_1 + else_1: + li a0, 85 + call print_int + sd a0, -8(fp) + li a0, 3 + end_1: + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_2 + li a0, 1 + j end_2 + else_2: + li a0, 3 + end_2: + sd a0, -24(fp) + ld t0, -24(fp) + li t1, 1 + beq t0, t1, else_3 + li a0, 1 + j end_3 + else_3: + li a0, 3 + end_3: + sd a0, -32(fp) + ld a0, -32(fp) + call large + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "codegen closure fn with 10 arg" = + run + {| + let add a b c d e f g = a + b + c + d + e + f + g + + let main = + let temp1 = add 1 1 1 1 in + let temp2 = temp1 1 1 in + let temp3 = temp2 1 1 in + print_int temp3 + ;; + |}; + [%expect + {| + .section .text + .globl add + .type add, @function + add: + addi sp, sp, -56 + sd ra, 48(sp) + sd fp, 40(sp) + addi fp, sp, 40 + sd a0, -8(fp) + sd a1, -16(fp) + sd a2, -24(fp) + sd a3, -32(fp) + sd a4, -40(fp) + sd a5, -48(fp) + sd a6, -56(fp) + ld t0, -8(fp) + ld t1, -16(fp) + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -64(fp) + ld t0, -64(fp) + ld t1, -24(fp) + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -72(fp) + ld t0, -72(fp) + ld t1, -32(fp) + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -80(fp) + ld t0, -80(fp) + ld t1, -40(fp) + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -88(fp) + ld t0, -88(fp) + ld t1, -48(fp) + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -96(fp) + ld t0, -96(fp) + ld t1, -56(fp) + add a0, t0, t1 + addi a0, a0, -1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function + main: + addi sp, sp, -816 + sd ra, 808(sp) + sd fp, 800(sp) + addi fp, sp, 800 + la a0, add + li a1, 7 + call alloc_closure + li a1, 4 + addi sp, sp, -32 + li t0, 3 + sd t0, 0(sp) + li t0, 3 + sd t0, 8(sp) + li t0, 3 + sd t0, 16(sp) + li t0, 3 + sd t0, 24(sp) + mv a2, sp + call eml_applyN + addi sp, sp, 32 + sd a0, -8(fp) + ld a0, -8(fp) + li a1, 2 + addi sp, sp, -16 + li t0, 3 + sd t0, 0(sp) + li t0, 3 + sd t0, 8(sp) + mv a2, sp + call eml_applyN + addi sp, sp, 16 + sd a0, -16(fp) + ld a0, -16(fp) + li a1, 2 + addi sp, sp, -16 + li t0, 3 + sd t0, 0(sp) + li t0, 3 + sd t0, 8(sp) + mv a2, sp + call eml_applyN + addi sp, sp, 16 + sd a0, -24(fp) + ld a0, -24(fp) + call print_int + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret + |}] +;; diff --git a/EML/tests/riscv_tests.mli b/EML/tests/riscv_tests.mli new file mode 100644 index 00000000..f5abe362 --- /dev/null +++ b/EML/tests/riscv_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val compile : string -> string +val run : string -> unit diff --git a/EML/tests/tuple_tests.t b/EML/tests/tuple_tests.t new file mode 100644 index 00000000..33ef8875 --- /dev/null +++ b/EML/tests/tuple_tests.t @@ -0,0 +1,65 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + $ make compile_riscv GC=1 gc_tests/tuple_tests/01adder.ml + === GC Status === + Current allocated: 13 + Total allocated: 13 + Free space: 787 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 + ================= + 42 + === GC Status === + Current allocated: 13 + Total allocated: 28 + Free space: 787 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 4 + ================= + + $ make compile_riscv GC=1 gc_tests/tuple_tests/02nested.ml + === GC Status === + Current allocated: 23 + Total allocated: 23 + Free space: 777 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 + ================= + 90 + === GC Status === + Current allocated: 23 + Total allocated: 48 + Free space: 777 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 5 + ================= + + $ make compile_riscv GC=1 gc_tests/tuple_tests/03args.ml + === GC Status === + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 + ================= + 1053 + === GC Status === + Current allocated: 28 + Total allocated: 58 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 5 + ================= diff --git a/pairing.md b/pairing.md index c6240f57..319d184c 100644 --- a/pairing.md +++ b/pairing.md @@ -8,4 +8,5 @@ | XML | [Гавриленко](https://github.com/qrutyy) | [Руднев-Степанян](https://github.com/Dabzelos) | [Власенко](https://github.com/spisladqo) | oMLet | [Котельникова](https://github.com/p1onerka) | [Козырева](https://github.com/sofyak0zyreva) | [Кочергин](https://github.com/VyacheslavIurevich) | DopsaML | [Нафиков] (https://github.com/SecretPersona5) | [Дьячков] (https://github.com/YazoonDinalt) +| EML | Усольцев | Островская | demo | [@Kakadu](http://github.com/Kakadu) | BOSS |