From b62e1ecf81ca99447ca2ee7ff6c49d17e9d91c55 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Wed, 1 Oct 2025 18:06:00 +0300 Subject: [PATCH 01/74] initial commit --- EML/.gitignore | 3 ++ EML/.ocamlformat | 3 ++ EML/Makefile | 20 +++++++ EML/bin/LL.ml | 94 ++++++++++++++++++++++++++++++++ EML/bin/dune | 21 ++++++++ EML/bin/main.ml | 60 +++++++++++++++++++++ EML/bin/run.t | 20 +++++++ EML/bin/runtime.c | 10 ++++ EML/demo.opam.template | 7 +++ EML/dune-project | 33 ++++++++++++ EML/eml.opam | 34 ++++++++++++ EML/many_tests/.ocamlformat | 1 + EML/many_tests/typed/001fac.ml | 1 + EML/many_tests/typed/dune | 2 + EML/many_tests/typed/typed.t | 2 + EML/out.ll | 11 ++++ EML/test_qc/dune | 13 +++++ EML/test_qc/test_qc.ml | 98 ++++++++++++++++++++++++++++++++++ EML/test_qc/test_qc.mli | 7 +++ 19 files changed, 440 insertions(+) create mode 100644 EML/.gitignore create mode 100644 EML/.ocamlformat create mode 100644 EML/Makefile create mode 100644 EML/bin/LL.ml create mode 100644 EML/bin/dune create mode 100644 EML/bin/main.ml create mode 100644 EML/bin/run.t create mode 100644 EML/bin/runtime.c create mode 100644 EML/demo.opam.template create mode 100644 EML/dune-project create mode 100644 EML/eml.opam create mode 100644 EML/many_tests/.ocamlformat create mode 120000 EML/many_tests/typed/001fac.ml create mode 100644 EML/many_tests/typed/dune create mode 100644 EML/many_tests/typed/typed.t create mode 100644 EML/out.ll create mode 100644 EML/test_qc/dune create mode 100644 EML/test_qc/test_qc.ml create mode 100644 EML/test_qc/test_qc.mli diff --git a/EML/.gitignore b/EML/.gitignore new file mode 100644 index 00000000..0e5f1e4b --- /dev/null +++ b/EML/.gitignore @@ -0,0 +1,3 @@ +/_build +/_coverage + diff --git a/EML/.ocamlformat b/EML/.ocamlformat new file mode 100644 index 00000000..25919d0e --- /dev/null +++ b/EML/.ocamlformat @@ -0,0 +1,3 @@ +version=0.27.0 +profile=janestreet + diff --git a/EML/Makefile b/EML/Makefile new file mode 100644 index 00000000..c0615678 --- /dev/null +++ b/EML/Makefile @@ -0,0 +1,20 @@ +.PHONY: all test +all: + dune build + +test: + dune test + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect bin/ + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/language dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/EML/bin/LL.ml b/EML/bin/LL.ml new file mode 100644 index 00000000..53331175 --- /dev/null +++ b/EML/bin/LL.ml @@ -0,0 +1,94 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Llvm +open Printf + +module type S = sig + val context : Llvm.llcontext + val module_ : Llvm.llmodule + val builder : Llvm.llbuilder + val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue + val build_call : lltype -> ?name:string -> llvalue -> llvalue list -> llvalue + val lookup_func_exn : string -> llvalue + val has_toplevel_func : string -> bool + val build_add : ?name:string -> llvalue -> llvalue -> llvalue + val build_sub : ?name:string -> llvalue -> llvalue -> llvalue + val build_mul : ?name:string -> llvalue -> llvalue -> llvalue + val build_sdiv : ?name:string -> llvalue -> llvalue -> llvalue [@@inline] + val build_icmp : ?name:string -> Icmp.t -> llvalue -> llvalue -> llvalue + + (** [set_metadata v kind fmt] sets metadata to value [v] of kind [k]. + Returns this value [v]. Useful for attaching debugging *) + val set_metadata + : llvalue + -> string + -> ('a, Format.formatter, unit, llvalue) format4 + -> 'a + + (* ?? *) + + val build_ptrtoint : ?name:string -> llvalue -> lltype -> llvalue + val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue + val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue + + (** Just aliases *) + + val const_int : Llvm.lltype -> int -> Llvm.llvalue + val params : Llvm.llvalue -> Llvm.llvalue array + val pp_value : Format.formatter -> llvalue -> unit +end + +let make context builder module_ = + let module L : S = struct + let context = context + let builder = builder + let module_ = module_ + let build_store a b = Llvm.build_store a b builder + + let build_call typ ?(name = "") f args = + build_call typ f (Array.of_list args) name builder + ;; + + let has_toplevel_func fname = + match lookup_function fname module_ with + | Some _ -> true + | None -> false + ;; + + let lookup_func_exn fname = + match lookup_function fname module_ with + | Some f -> f + | None -> failwith (sprintf "Function '%s' not found" fname) + ;; + + let build_add ?(name = "") l r = build_add l r name builder + let build_sub ?(name = "") l r = build_sub l r name builder + let build_mul ?(name = "") l r = build_mul l r name builder + let build_sdiv ?(name = "") l r = build_sdiv l r name builder + let build_icmp ?(name = "") op l r = build_icmp op l r name builder + let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder + let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder + let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder + + let set_metadata v kind fmt = + Format.kasprintf + (fun s -> + Llvm.set_metadata v (Llvm.mdkind_id context kind) (Llvm.mdstring context s); + v) + fmt + ;; + + (* Aliases *) + let const_int = Llvm.const_int + let params = Llvm.params + let pp_value ppf x = Format.fprintf ppf "%s" (Llvm.string_of_llvalue x) + end + in + (module L : S) +;; diff --git a/EML/bin/dune b/EML/bin/dune new file mode 100644 index 00000000..dfd728f1 --- /dev/null +++ b/EML/bin/dune @@ -0,0 +1,21 @@ +(library + (name LL) + (public_name eml.LL) + (modules LL) + (wrapped false) + (libraries + llvm + llvm.analysis + ; + )) + +(executable + (public_name eml) + (name main) + (modules main) + (libraries LL) + (instrumentation + (backend bisect_ppx))) + +(cram + (deps ./main.exe runtime.c)) diff --git a/EML/bin/main.ml b/EML/bin/main.ml new file mode 100644 index 00000000..80a095bc --- /dev/null +++ b/EML/bin/main.ml @@ -0,0 +1,60 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2025, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +let () = + let context = Llvm.global_context () in + let builder = Llvm.builder context in + (* let () = assert (Llvm_executionengine.initialize ()) in *) + let the_module = Llvm.create_module context "main" in + Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; + (* let _the_execution_engine = Llvm_executionengine.create the_module in *) + let module LL = (val LL.make context builder the_module) in + let i64_type = Llvm.i64_type context in + let void_type = Llvm.void_type context in + let ptr_type = Llvm.pointer_type context in + let prepare_main () = + let ft = + (* TODO main has special args *) + let args = Array.make 0 ptr_type in + Llvm.function_type i64_type args + in + let the_function = Llvm.declare_function "main" ft the_module in + (* Create a new basic block to start insertion into. *) + let bb = Llvm.append_block context "entry" the_function in + Llvm.position_at_end bb builder; + (* Add all arguments to the symbol table and create their allocas. *) + (* Finish off the function. *) + let (_ : Llvm.llvalue) = + LL.build_call + (Llvm.function_type void_type [| i64_type |]) + LL.(lookup_func_exn "print_int") + [ Llvm.const_int i64_type 70 ] + in + let (_ : Llvm.llvalue) = Llvm.build_ret (Llvm.const_int i64_type 0) builder in + (* Validate the generated code, checking for consistency. *) + (match Llvm_analysis.verify_function the_function with + | true -> () + | false -> + Stdlib.Format.printf + "invalid function generated\n%s\n" + (Llvm.string_of_llvalue the_function); + Llvm_analysis.assert_valid_function the_function); + (* Optimize the function. *) + (* let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in *) + (* Llvm.dump_value the_function; *) + () + in + let _ = + Llvm.declare_function + "print_int" + (Llvm.function_type (Llvm.void_type context) [| i64_type |]) + the_module + in + prepare_main (); + Llvm.print_module "out.ll" the_module +;; diff --git a/EML/bin/run.t b/EML/bin/run.t new file mode 100644 index 00000000..c5569b68 --- /dev/null +++ b/EML/bin/run.t @@ -0,0 +1,20 @@ + $ clang-18 -c runtime.c -o runtime.o + $ ./main.exe + $ ls + main.exe + out.ll + runtime.c + runtime.o + $ cat out.ll | grep -E 'source_filename|target datalayout|ModuleID' --invert-match + target triple = "x86_64-pc-linux-gnu" + + declare void @print_int(i64) + + define i64 @main() { + entry: + call void @print_int(i64 70) + ret i64 0 + } + $ clang-18 out.ll runtime.o -o demo1.exe + $ echo "Press $(./demo1.exe) to pay respect" + Press F to pay respect diff --git a/EML/bin/runtime.c b/EML/bin/runtime.c new file mode 100644 index 00000000..9c00aa03 --- /dev/null +++ b/EML/bin/runtime.c @@ -0,0 +1,10 @@ +/* Copyright 2023-2024, Kakadu and contributors */ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ + +#include +#include + +void print_int(int64_t n) { + putchar(n); + fflush(stdout); +} \ No newline at end of file diff --git a/EML/demo.opam.template b/EML/demo.opam.template new file mode 100644 index 00000000..f4e537bf --- /dev/null +++ b/EML/demo.opam.template @@ -0,0 +1,7 @@ +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/EML/dune-project b/EML/dune-project new file mode 100644 index 00000000..db563c41 --- /dev/null +++ b/EML/dune-project @@ -0,0 +1,33 @@ +(lang dune 3.8) + +(name eml) + +(generate_opam_files true) + +(source + (github Kakadu/comp24)) + +(authors "Victoria Ostrovskaya & Danil Usoltsev") + +(maintainers "Victoria Ostrovskaya & Danil Usoltsev") + +(license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") + +(package + (name eml) + (synopsis "A short LLVM demo") + (depends + ocaml + (dune + (= "3.19.1")) + (angstrom + (= "0.16.0")) + qcheck + bisect_ppx + (llvm + (= "18-shared")) + (qcheck :with-tests) + (ppx_deriving_qcheck + (= "0.6")))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/EML/eml.opam b/EML/eml.opam new file mode 100644 index 00000000..c9479309 --- /dev/null +++ b/EML/eml.opam @@ -0,0 +1,34 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short LLVM demo" +maintainer: ["Victoria Ostrovskaya & Danil Usoltsev"] +authors: ["Victoria Ostrovskaya & Danil Usoltsev"] +license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" +homepage: "https://github.com/Kakadu/comp24" +bug-reports: "https://github.com/Kakadu/comp24/issues" +depends: [ + "ocaml" + "dune" {>= "3.8" & = "3.19.1"} + "angstrom" {= "0.16.0"} + "qcheck" + "bisect_ppx" + "llvm" {= "18-shared"} + "qcheck" {with-tests} + "ppx_deriving_qcheck" {= "0.6"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/Kakadu/comp24.git" diff --git a/EML/many_tests/.ocamlformat b/EML/many_tests/.ocamlformat new file mode 100644 index 00000000..e3346c16 --- /dev/null +++ b/EML/many_tests/.ocamlformat @@ -0,0 +1 @@ +disable=true diff --git a/EML/many_tests/typed/001fac.ml b/EML/many_tests/typed/001fac.ml new file mode 120000 index 00000000..219cccf7 --- /dev/null +++ b/EML/many_tests/typed/001fac.ml @@ -0,0 +1 @@ +../../../manytests/typed/001fac.ml \ No newline at end of file diff --git a/EML/many_tests/typed/dune b/EML/many_tests/typed/dune new file mode 100644 index 00000000..f66331bb --- /dev/null +++ b/EML/many_tests/typed/dune @@ -0,0 +1,2 @@ +(cram + (deps ./001fac.ml)) diff --git a/EML/many_tests/typed/typed.t b/EML/many_tests/typed/typed.t new file mode 100644 index 00000000..ab349b10 --- /dev/null +++ b/EML/many_tests/typed/typed.t @@ -0,0 +1,2 @@ + $ wc 001fac.ml + 6 30 105 001fac.ml diff --git a/EML/out.ll b/EML/out.ll new file mode 100644 index 00000000..715a3d8d --- /dev/null +++ b/EML/out.ll @@ -0,0 +1,11 @@ +; ModuleID = 'main' +source_filename = "main" +target triple = "x86_64-pc-linux-gnu" + +declare void @print_int(i64) + +define i64 @main() { +entry: + call void @print_int(i64 70) + ret i64 0 +} diff --git a/EML/test_qc/dune b/EML/test_qc/dune new file mode 100644 index 00000000..9ca61266 --- /dev/null +++ b/EML/test_qc/dune @@ -0,0 +1,13 @@ +(executable + (name test_qc) + (modules test_qc) + (libraries qcheck angstrom) + (preprocess + (pps ppx_deriving_qcheck ppx_deriving.show))) + +(rule + (alias runtest) + (deps + (:< test_qc.exe)) + (action + (run %{<}))) diff --git a/EML/test_qc/test_qc.ml b/EML/test_qc/test_qc.ml new file mode 100644 index 00000000..fa125b7f --- /dev/null +++ b/EML/test_qc/test_qc.ml @@ -0,0 +1,98 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +(* run this test via `dune test --force` *) + +module AST = struct + type t = + | Const of (int[@gen QCheck.Gen.return 1]) + | Add of t * t + [@@deriving qcheck, show { with_path = false }] +end + +module PP = struct + let rec pp ppf = function + | AST.Const n -> Format.fprintf ppf "%d" n + (* | Add (l, r) -> Format.fprintf ppf "%a+%a" pp l pp r *) + | Add (l, r) -> Format.fprintf ppf "(%a+%a)" pp l pp r + ;; +end + +module Parser = struct + open Angstrom + + let prio expr table = + let len = Array.length table in + let rec helper level = + if level >= len + then expr + else ( + let xs = table.(level) in + return (List.fold_left (fun acc (op, r) -> op acc r)) + <*> helper (level + 1) + <*> many + (choice + (List.map + (fun (op, f) -> op *> helper (level + 1) >>= fun r -> return (f, r)) + xs))) + in + helper 0 + ;; + + let expr_small = + let code0 = Char.code '0' in + Angstrom.satisfy (function + | '0' .. '9' -> true + | _ -> false) + >>| fun c -> AST.Const (Char.code c - code0) + ;; + + let expr = + fix (fun self -> + let add a b = AST.Add (a, b) in + prio (expr_small <|> (char '(' *> self <* char ')')) [| [ char '+', add ] |]) + ;; +end + +let rec shrink_expr = + let open QCheck.Iter in + (* fun _ -> empty *) + function + | AST.Const _ -> empty + | Add (l, r) -> + of_list [ l; r ] + <+> (shrink_expr l >>= fun l -> return (AST.Add (l, r))) + <+> (shrink_expr r >>= fun r -> return (AST.Add (l, r))) +;; + +let arbitrary_expr = + (* let open QCheck.Iter in *) + QCheck.make AST.gen ~print:(Format.asprintf "%a" PP.pp) ~shrink:shrink_expr +;; + +let _ = + QCheck_runner.run_tests + [ QCheck.( + Test.make arbitrary_expr (fun l -> + match + Angstrom.parse_string + ~consume:Angstrom.Consume.All + Parser.expr + (Format.asprintf "%a" PP.pp l) + with + | Result.Ok after when after = l -> true + | Result.Ok after -> + Format.printf "before : %a\n%!" AST.pp l; + (* Format.printf " : `%a`\n%!" PP.pp l; *) + Format.printf "`%a`\n%!" AST.pp after; + false + | Result.Error _ -> + (* Format.printf "failed on : %a\n%!" Lam.pp l; *) + false)) + ] +;; diff --git a/EML/test_qc/test_qc.mli b/EML/test_qc/test_qc.mli new file mode 100644 index 00000000..a65c69d1 --- /dev/null +++ b/EML/test_qc/test_qc.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] From 3168f60b0d75b33ca188bb263095db4e4d6ea7fe Mon Sep 17 00:00:00 2001 From: Danil Usoltsev <117384050+Sibiri4ok@users.noreply.github.com> Date: Wed, 1 Oct 2025 18:19:15 +0300 Subject: [PATCH 02/74] add names to pairing --- pairing.md | 1 + 1 file changed, 1 insertion(+) diff --git a/pairing.md b/pairing.md index 85677108..07f18ddb 100644 --- a/pairing.md +++ b/pairing.md @@ -5,4 +5,5 @@ | akaML | [Заикин](https://github.com/Friend-zva) | [Родионов](https://github.com/RodionovMaxim05) | ZOVML | Орешин | Дмитриевцев | PudgeWithMoML | [Насретдинов](https://github.com/Ycyken) | [Комбаев](https://github.com/homka122/) +| AML | Усольцев | Островская | demo | [@Kakadu](http://github.com/Kakadu) | BOSS | From 7fa7459aec0490cab0245fd15d1bf5f2b7d9658b Mon Sep 17 00:00:00 2001 From: Danil Usoltsev <117384050+Sibiri4ok@users.noreply.github.com> Date: Wed, 1 Oct 2025 18:20:03 +0300 Subject: [PATCH 03/74] update name directory --- pairing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pairing.md b/pairing.md index 07f18ddb..f5a1d2e4 100644 --- a/pairing.md +++ b/pairing.md @@ -5,5 +5,5 @@ | akaML | [Заикин](https://github.com/Friend-zva) | [Родионов](https://github.com/RodionovMaxim05) | ZOVML | Орешин | Дмитриевцев | PudgeWithMoML | [Насретдинов](https://github.com/Ycyken) | [Комбаев](https://github.com/homka122/) -| AML | Усольцев | Островская +| EML | Усольцев | Островская | demo | [@Kakadu](http://github.com/Kakadu) | BOSS | From f1643274595127852dce9627abdbc7836674030a Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 6 Oct 2025 16:08:26 +0300 Subject: [PATCH 04/74] Add: ast, parser & inferencer --- EML/dune-project | 1 + EML/eml.opam | 1 + EML/lib/dune | 11 + EML/lib/frontend/ast.ml | 107 ++++++ EML/lib/frontend/inferencer.ml | 657 +++++++++++++++++++++++++++++++++ EML/lib/frontend/parser.ml | 313 ++++++++++++++++ EML/test_qc/dune | 2 + EML/tests/dune | 9 + EML/tests/inferencer_tests.ml | 52 +++ EML/tests/parser_tests.ml | 51 +++ 10 files changed, 1204 insertions(+) create mode 100644 EML/lib/dune create mode 100644 EML/lib/frontend/ast.ml create mode 100644 EML/lib/frontend/inferencer.ml create mode 100644 EML/lib/frontend/parser.ml create mode 100644 EML/tests/dune create mode 100644 EML/tests/inferencer_tests.ml create mode 100644 EML/tests/parser_tests.ml diff --git a/EML/dune-project b/EML/dune-project index db563c41..feede8b2 100644 --- a/EML/dune-project +++ b/EML/dune-project @@ -24,6 +24,7 @@ (= "0.16.0")) qcheck bisect_ppx + ppx_expect (llvm (= "18-shared")) (qcheck :with-tests) diff --git a/EML/eml.opam b/EML/eml.opam index c9479309..f98afd94 100644 --- a/EML/eml.opam +++ b/EML/eml.opam @@ -12,6 +12,7 @@ depends: [ "angstrom" {= "0.16.0"} "qcheck" "bisect_ppx" + "ppx_expect" "llvm" {= "18-shared"} "qcheck" {with-tests} "ppx_deriving_qcheck" {= "0.6"} diff --git a/EML/lib/dune b/EML/lib/dune new file mode 100644 index 00000000..8aa4d2b1 --- /dev/null +++ b/EML/lib/dune @@ -0,0 +1,11 @@ +(include_subdirs qualified) + +(library + (name EML_lib) + (public_name eml.lib) + (modules :standard) + (libraries base angstrom) + (preprocess + (pps ppx_deriving.show)) + (instrumentation + (backend bisect_ppx))) diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml new file mode 100644 index 00000000..7d03e1b4 --- /dev/null +++ b/EML/lib/frontend/ast.ml @@ -0,0 +1,107 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format + +type ident = string [@@deriving show { with_path = false }] + +type is_rec = + | NonRec + | Rec +[@@deriving show { with_path = false }] + +type bin_oper = + | Plus (* [+] *) + | Minus (* [-] *) + | Multiply (* [*] *) + | Division (* [/] *) + | And (* [&&] *) + | Or (* [||] *) + | GretestEqual (* [>=] *) + | LowestEqual (* [<=] *) + | GreaterThan (* [>] *) + | LowerThan (* [<] *) + | Equal (* [=] *) + | NotEqual (* [<>] *) +[@@deriving show { with_path = false }] + +type unar_oper = + | Negative (* [-x] *) + | Not (* [not x]*) +[@@deriving show { with_path = false }] + +type const = + | ConstInt of int (* Integer constant: Example - [21] *) + | ConstBool of bool (* Boolean constant: Example - [true] or [false] *) + | ConstString of string (* String constant: Example - "I like OCaml!" *) +[@@deriving show { with_path = false }] + +type binder = int [@@deriving show { with_path = false }] + +type ty = + | TyVar of binder + | TyPrim of string + | TyArrow of ty * ty + | TyList of ty + | TyTuple of ty list + | TyOption of ty +[@@deriving show { with_path = false }] + +type pattern = + | PatVariable of ident (* [x] *) + | PatConst of const (* [21] or [true] or [false] *) + | PatTuple of pattern * pattern * pattern list (* (x1; x2 ... xn) *) + | PatAny + | PatType of pattern * ty + | PatUnit + | PatList of pattern list + | PatOption of pattern option +[@@deriving show { with_path = false }] + +type expr = + | ExpIdent of ident (* ExpIdent "x" *) + | ExpConst of const (* ExpConst (ConstInt 666) *) + | ExpBranch of expr * expr * expr option + | ExpBinOper of bin_oper * expr * expr (* ExpBinOper(Plus, 1, 2) *) + | ExpUnarOper of unar_oper * expr (* ExpUnarOper(not, x)*) + | ExpTuple of expr * expr * expr list (* ExpTuple[x1; x2 .. xn] *) + | ExpList of expr list (* ExpList[x1; x2 .. xn] *) + | ExpLambda of pattern list * expr (* ExpLambda([x;y;z], x+y+z)*) + | ExpTypeAnnotation of expr * ty + | ExpLet of is_rec * bind * bind list * expr + | ExpFunction of expr * expr (* ExpFunction(x, y)*) + | ExpOption of expr option +[@@deriving show { with_path = false }] + +and bind = pattern * expr [@@deriving show { with_path = false }] + +type structure = + | SEval of expr + | SValue of is_rec * bind * bind list +[@@deriving show { with_path = false }] + +type program = structure list [@@deriving show { with_path = false }] + +let rec pp_ty fmt = function + | TyPrim x -> fprintf fmt "%s" x + | TyVar x -> fprintf fmt "'%d" x + | TyArrow (l, r) -> + (match l, r with + | TyArrow _, _ -> fprintf fmt "(%a) -> %a" pp_ty l pp_ty r + | _, _ -> fprintf fmt "%a -> %a" pp_ty l pp_ty r) + | TyTuple elems -> + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " * ") pp_ty) + elems + | TyList ty -> + (match ty with + | TyArrow _ | TyTuple _ -> fprintf fmt "(%a) list" pp_ty ty + | _ -> fprintf fmt "%a list" pp_ty ty) + | TyOption ty -> + (match ty with + | TyArrow _ | TyTuple _ -> fprintf fmt "(%a) option" pp_ty ty + | _ -> fprintf fmt "%a option" pp_ty ty) +;; diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml new file mode 100644 index 00000000..9b1b3352 --- /dev/null +++ b/EML/lib/frontend/inferencer.ml @@ -0,0 +1,657 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(* Template: https://gitlab.com/Kakadu/fp2020course-materials/-/tree/master/code/miniml?ref_type=heads*) + +open Base +open Ast +open Stdlib.Format + +type error = + | OccursCheck of int * ty + | NoVariable of string + | UnificationFailed of ty * ty + | SeveralBounds of string + | LHS of string + | RHS of string + | UnexpectedFunction of ty + +let pp_error fmt = function + | OccursCheck (id, ty) -> + fprintf fmt "Occurs check failed. Type variable '%d occurs inside %a." id pp_ty ty + | NoVariable name -> fprintf fmt "Unbound variable '%s'." name + | UnificationFailed (ty1, ty2) -> + fprintf fmt "Failed to unify types: %a and %a." pp_ty ty1 pp_ty ty2 + | SeveralBounds name -> fprintf fmt "Multiple bounds for variable '%s'." name + | LHS msg -> fprintf fmt "Left-hand side error: %s." msg + | RHS msg -> fprintf fmt "Right-hand side error: %s." msg + | UnexpectedFunction ty1 -> fprintf fmt "UnexpectedFunction error: %a" pp_ty ty1 +;; + +module IntSet = struct + include Stdlib.Set.Make (Int) +end + +module ResultMonad : sig + type 'a t + + val return : 'a -> 'a t + val fail : error -> 'a t + + include Monad.Infix with type 'a t := 'a t + + module Syntax : sig + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + end + + val fresh : int t + val run : 'a t -> ('a, error) Result.t + + module RMap : sig + val fold : ('a, 'b, 'c) Map.t -> init:'d t -> f:('a -> 'b -> 'd -> 'd t) -> 'd t + end +end = struct + type 'a t = int -> int * ('a, error) Result.t + + let ( >>= ) m f state = + let last, r = m state in + match r with + | Result.Error x -> last, Result.fail x + | Result.Ok a -> f a last + ;; + + let return x last = last, Result.return x + let fail e st = st, Result.fail e + + let ( >>| ) m f st = + match m st with + | st, Ok x -> st, Result.return (f x) + | st, Result.Error e -> st, Result.fail e + ;; + + module Syntax = struct + let ( let* ) = ( >>= ) + end + + module RMap = struct + let fold map ~init ~f = + Map.fold map ~init ~f:(fun ~key ~data acc -> + let open Syntax in + let* acc = acc in + f key data acc) + ;; + end + + let fresh : int t = fun last -> last + 1, Result.return last + let run monad = snd (monad 0) +end + +module Type = struct + let rec occurs_in var = function + | TyVar b -> b = var + | TyArrow (left, right) -> occurs_in var left || occurs_in var right + | TyTuple types -> List.exists types ~f:(occurs_in var) + | TyList ty -> occurs_in var ty + | TyOption ty -> occurs_in var ty + | TyPrim _ -> false + ;; + + let free_vars = + let rec helper acc = function + | TyVar b -> IntSet.add b acc + | TyArrow (left, right) -> helper (helper acc left) right + | TyTuple types -> List.fold_left types ~init:acc ~f:helper + | TyList ty -> helper acc ty + | TyOption ty -> helper acc ty + | TyPrim _ -> acc + in + helper IntSet.empty + ;; +end + +module Substitution : sig + type t + + val empty : t + val singleton : int -> ty -> t ResultMonad.t + val remove : t -> int -> t + val apply : t -> ty -> ty + val unify : ty -> ty -> t ResultMonad.t + val compose : t -> t -> t ResultMonad.t + val compose_all : t list -> t ResultMonad.t +end = struct + open ResultMonad + open ResultMonad.Syntax + + type t = (int, ty, Int.comparator_witness) Map.t + + let empty = Map.empty (module Int) + + let mapping key value = + if Type.occurs_in key value + then fail (OccursCheck (key, value)) + else return (key, value) + ;; + + let singleton key value = + let* key, value = mapping key value in + return (Map.singleton (module Int) key value) + ;; + + let find = Map.find + let remove = Map.remove + + let apply subst = + let rec helper = function + | TyPrim x -> TyPrim x + | TyVar b as ty -> + (match find subst b with + | None -> ty + | Some x -> x) + | TyArrow (left, right) -> TyArrow (helper left, helper right) + | TyList ty -> TyList (helper ty) + | TyOption ty -> TyOption (helper ty) + | TyTuple types -> TyTuple (List.map ~f:helper types) + in + helper + ;; + + let rec unify left right = + match left, right with + | TyPrim l, TyPrim r when String.equal l r -> return empty + | TyPrim _, TyPrim _ -> fail (UnificationFailed (left, right)) + | TyVar l, TyVar r when l = r -> return empty + | TyVar b, ty | ty, TyVar b -> singleton b ty + | TyArrow (left1, right1), TyArrow (left2, right2) -> + let* subst1 = unify left1 left2 in + let* subst2 = unify (apply subst1 right1) (apply subst1 right2) in + compose subst1 subst2 + | TyTuple types1, TyTuple types2 -> + if List.length types1 <> List.length types2 + then fail (UnificationFailed (left, right)) + else ( + let rec unify_tuples subst types1 types2 = + match types1, types2 with + | [], [] -> return subst + | t1 :: rest1, t2 :: rest2 -> + let* subst' = unify (apply subst t1) (apply subst t2) in + let* composed_subst = compose subst subst' in + unify_tuples composed_subst rest1 rest2 + | _, _ -> fail (UnificationFailed (left, right)) + in + unify_tuples empty types1 types2) + | TyList ty1, TyList ty2 -> unify ty1 ty2 + | TyOption ty1, TyOption ty2 -> unify ty1 ty2 + | _ -> fail (UnificationFailed (left, right)) + + and extend key value subst = + match find subst key with + | None -> + let value = apply subst value in + let* subst2 = singleton key value in + RMap.fold subst ~init:(return subst2) ~f:(fun key value acc -> + let value = apply subst2 value in + let* key, value = mapping key value in + return (Map.update acc key ~f:(fun _ -> value))) + | Some value2 -> + let* subst2 = unify value value2 in + compose subst subst2 + + and compose subst1 subst2 = RMap.fold subst2 ~init:(return subst1) ~f:extend + + let compose_all = + List.fold_left ~init:(return empty) ~f:(fun acc subst -> + let* acc = acc in + compose acc subst) + ;; +end + +module Scheme = struct + type t = S of IntSet.t * ty + + let free_vars (S (vars, ty)) = IntSet.diff (Type.free_vars ty) vars + + let apply subst (S (vars, ty)) = + let subst2 = + IntSet.fold (fun key subst -> Substitution.remove subst key) vars subst + in + S (vars, Substitution.apply subst2 ty) + ;; +end + +module TypeEnv = struct + type t = (ident, Scheme.t, String.comparator_witness) Map.t + + let extend env key value = Map.update env key ~f:(fun _ -> value) + + let free_vars : t -> IntSet.t = + Map.fold ~init:IntSet.empty ~f:(fun ~key:_ ~data:scheme acc -> + IntSet.union acc (Scheme.free_vars scheme)) + ;; + + let apply subst env = Map.map env ~f:(Scheme.apply subst) + let find = Map.find + + let initial_env = + let open Base.Map in + empty (module String) + |> set + ~key:"print_int" + ~data:(Scheme.S (IntSet.empty, TyArrow (TyPrim "int", TyPrim "unit"))) + |> set + ~key:"print_endline" + ~data:(Scheme.S (IntSet.empty, TyArrow (TyPrim "string", TyPrim "unit"))) + |> set + ~key:"print_bool" + ~data:(Scheme.S (IntSet.empty, TyArrow (TyPrim "bool", TyPrim "unit"))) + ;; +end + +open ResultMonad +open ResultMonad.Syntax + +let fresh_var = fresh >>| fun n -> TyVar n + +let instantiate : Scheme.t -> ty ResultMonad.t = + fun (S (vars, ty)) -> + IntSet.fold + (fun var typ -> + let* typ = typ in + let* fresh_ty = fresh_var in + let* subst = Substitution.singleton var fresh_ty in + return (Substitution.apply subst typ)) + vars + (return ty) +;; + +let generalize env ty = + let free = IntSet.diff (Type.free_vars ty) (TypeEnv.free_vars env) in + Scheme.S (free, ty) +;; + +let infer_const = function + | ConstInt _ -> TyPrim "int" + | ConstBool _ -> TyPrim "bool" + | ConstString _ -> TyPrim "string" +;; + +let rec infer_pattern env = function + | PatAny -> + let* fresh = fresh_var in + return (Substitution.empty, fresh, env) + | PatConst const -> return (Substitution.empty, infer_const const, env) + | PatVariable var -> + let* fresh = fresh_var in + let env = TypeEnv.extend env var (Scheme.S (IntSet.empty, fresh)) in + return (Substitution.empty, fresh, env) + | PatTuple (first_pat, second_pat, rest_pats) -> + let* sub_first, type_first, env_first = infer_pattern env first_pat in + let updated_env_second = TypeEnv.apply sub_first env_first in + let* sub_second, type_second, env_second = + infer_pattern updated_env_second second_pat + in + let process_remaining_patterns acc pat = + let open ResultMonad.Syntax in + let* current_sub, types, current_env = acc in + let* sub_new, type_new, env_new = infer_pattern current_env pat in + let* combined_sub = Substitution.compose current_sub sub_new in + return (combined_sub, type_new :: types, env_new) + in + let initial_state = return (sub_second, [ type_second; type_first ], env_second) in + let* final_sub, collected_types, final_env = + List.fold_left rest_pats ~init:initial_state ~f:process_remaining_patterns + in + let tuple_type = TyTuple (List.rev collected_types) in + return (final_sub, tuple_type, final_env) + | PatList pats -> + let* fresh_el_type = fresh_var in + let* final_sub, final_env = + List.fold_left + pats + ~init:(return (Substitution.empty, env)) + ~f:(fun acc pat -> + let open ResultMonad.Syntax in + let* sub_acc, env_acc = acc in + let* sub_cur, el_type, env_cur = infer_pattern env_acc pat in + let* unified_sub = Substitution.compose sub_acc sub_cur in + let* final_sub = + Substitution.unify (Substitution.apply sub_cur fresh_el_type) el_type + in + let* combined_sub = Substitution.compose unified_sub final_sub in + return (combined_sub, TypeEnv.apply final_sub env_cur)) + in + return (final_sub, TyList (Substitution.apply final_sub fresh_el_type), final_env) + | PatOption opt -> + let* sub, typ, env = + match opt with + | None -> + let* fresh = fresh_var in + return (Substitution.empty, fresh, env) + | Some p -> infer_pattern env p + in + return (sub, TyOption typ, env) + | PatType (pat, annotated_ty) -> + let* subst, inferred_ty, env = infer_pattern env pat in + let* unified_subst = Substitution.unify inferred_ty annotated_ty in + let* total_subst = Substitution.compose subst unified_subst in + return + ( total_subst + , Substitution.apply total_subst annotated_ty + , TypeEnv.apply total_subst env ) + | PatUnit -> return (Substitution.empty, TyPrim "unit", env) +;; + +let infer_binop_type = function + | Equal | NotEqual | GreaterThan | GretestEqual | LowerThan | LowestEqual -> + fresh_var >>| fun fresh_ty -> fresh_ty, fresh_ty, TyPrim "bool" + | Plus | Minus | Multiply | Division -> return (TyPrim "int", TyPrim "int", TyPrim "int") + | And | Or -> return (TyPrim "bool", TyPrim "bool", TyPrim "bool") +;; + +let rec infer_expr env = function + | ExpConst const -> return (Substitution.empty, infer_const const) + | ExpIdent var -> + (match TypeEnv.find env var with + | Some scheme -> + let* ty = instantiate scheme in + return (Substitution.empty, ty) + | None -> fail (NoVariable var)) + | ExpUnarOper (operation, expr) -> + let* subst, ty = infer_expr env expr in + let* operation_type = + match operation with + | Negative -> return (TyArrow (TyPrim "int", TyPrim "int")) + | Not -> return (TyArrow (TyPrim "bool", TyPrim "bool")) + in + let* subst2 = + match operation_type with + | TyArrow (arg, _) -> Substitution.unify ty arg + | ty -> fail (UnexpectedFunction ty) + in + let* subst2 = Substitution.compose_all [ subst2; subst ] in + (match operation_type with + | TyArrow (_, x) -> return (subst2, Substitution.apply subst2 x) + | ty -> fail (UnexpectedFunction ty)) + | ExpBinOper (op, expr1, expr2) -> + let* subst1, ty = infer_expr env expr1 in + let* subst2, ty' = infer_expr (TypeEnv.apply subst1 env) expr2 in + let* ty1_op, ty2_op, ty_res = infer_binop_type op in + let* subst3 = Substitution.unify (Substitution.apply subst2 ty) ty1_op in + let* subst4 = Substitution.unify (Substitution.apply subst3 ty') ty2_op in + let* subst = Substitution.compose_all [ subst1; subst2; subst3; subst4 ] in + return (subst, Substitution.apply subst ty_res) + | ExpBranch (cond, then_expr, else_expr) -> + let* subst1, ty1 = infer_expr env cond in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env) then_expr in + let* ty3 = + match else_expr with + | Some el -> + let* _, ty3 = infer_expr (TypeEnv.apply subst2 env) el in + return ty3 + | None -> return (TyPrim "unit") + in + let* subst4 = Substitution.unify ty1 (TyPrim "bool") in + let* subst5 = Substitution.unify ty2 ty3 in + let* total_subst = + match else_expr with + | Some el -> + let* subst3, _ = infer_expr (TypeEnv.apply subst2 env) el in + Substitution.compose_all [ subst5; subst4; subst3; subst2; subst1 ] + | None -> Substitution.compose_all [ subst5; subst4; subst2; subst1 ] + in + return (total_subst, Substitution.apply total_subst ty2) + | ExpTuple (expr1, expr2, exprs) -> + let* subst1, ty1 = infer_expr env expr1 in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env) expr2 in + let infer_tuple_elements env es = + let rec aux env = function + | [] -> return ([], []) + | e :: es' -> + let* s, t = infer_expr env e in + let* s', ts = aux (TypeEnv.apply s env) es' in + return (s' @ [ s ], t :: ts) + in + aux env es + in + let* subst3, tys = infer_tuple_elements (TypeEnv.apply subst2 env) exprs in + let* subst = Substitution.compose_all (subst3 @ [ subst2; subst1 ]) in + return (subst, TyTuple (ty1 :: ty2 :: tys)) + | ExpList exprs -> + (match exprs with + | [] -> + let* fresh = fresh_var in + return (Substitution.empty, TyList fresh) + | _ :: _ -> + let infer_list_elements env es = + let rec aux env = function + | [] -> return ([], []) + | e :: es' -> + let* s, t = infer_expr env e in + let* s', ts = aux (TypeEnv.apply s env) es' in + return (s' @ [ s ], t :: ts) + in + aux env es + in + let* subst, tys = infer_list_elements env exprs in + let* total_subst = Substitution.compose_all subst in + (match tys with + | [] -> fail (SeveralBounds "inferred empty list type") + | ty :: _ -> return (total_subst, TyList ty))) + | ExpLet (NonRec, (PatVariable x, expr1), _, expr2) -> + let* subst1, ty1 = infer_expr env expr1 in + let env2 = TypeEnv.apply subst1 env in + let ty_gen = generalize env2 ty1 in + let env3 = TypeEnv.extend env x ty_gen in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env3) expr2 in + let* total_subst = Substitution.compose subst1 subst2 in + return (total_subst, ty2) + | ExpLet (NonRec, (pattern, expr1), bindings, expr2) -> + let* subst1, ty1 = infer_expr env expr1 in + let* subst2, ty_pat, env1 = infer_pattern env pattern in + let* subst = Substitution.compose subst1 subst2 in + let* unified_subst = Substitution.unify (Substitution.apply subst ty_pat) ty1 in + let initial_env = TypeEnv.apply unified_subst env1 in + let* extended_env = + List.fold_left + ~f:(fun acc_env (pattern, expr) -> + let* acc_env = acc_env in + let* subst_bind, ty_bind = infer_expr acc_env expr in + let* subst_pattern, _, env_pattern = infer_pattern acc_env pattern in + let* combined_subst = Substitution.compose subst_bind subst_pattern in + let* final_subst = + Substitution.unify (Substitution.apply combined_subst ty_pat) ty_bind + in + let updated_env = + Map.fold + ~init:(TypeEnv.apply final_subst acc_env) + ~f:(fun ~key ~data acc_env -> TypeEnv.extend acc_env key data) + (TypeEnv.apply final_subst env_pattern) + in + return updated_env) + ~init:(return initial_env) + bindings + in + let* subst3, ty2 = infer_expr extended_env expr2 in + let* total_subst = Substitution.compose_all [ subst3; unified_subst; subst ] in + return (total_subst, ty2) + | ExpLet (Rec, (PatVariable x, expr1), [], expr2) -> + let* expr1 = + match expr1 with + | ExpLambda _ -> return expr1 + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* tv = fresh_var in + let env2 = TypeEnv.extend env x (S (IntSet.empty, tv)) in + let* subst1, ty1 = infer_expr env2 expr1 in + let* subst2 = Substitution.unify (Substitution.apply subst1 tv) ty1 in + let* subst_total = Substitution.compose subst1 subst2 in + let env3 = TypeEnv.apply subst_total env in + let env4 = TypeEnv.apply subst1 env3 in + let ty_gen = generalize env4 (Substitution.apply subst_total tv) in + let* subst3, ty2 = infer_expr (TypeEnv.extend env4 x ty_gen) expr2 in + let* subst_total = Substitution.compose subst_total subst3 in + return (subst_total, ty2) + | ExpLet (Rec, value_binding, value_bindings, expr2) -> + let* env_ext, subst_acc = + List.fold_left + ~f:(fun acc_env (pat, expr) -> + let* expr = + match expr with + | ExpLambda _ -> return expr + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* pat = + match pat with + | PatVariable _ -> return pat + | _ -> + fail (LHS "Only variables are allowed on the left-hand side of let rec") + in + let* env_acc, _ = acc_env in + let* subst_expr, ty_expr = infer_expr env_acc expr in + let* subst_pattern, ty_pat, env_pat = infer_pattern env_acc pat in + let* subst = Substitution.compose subst_expr subst_pattern in + let* unified_subst = Substitution.unify ty_expr ty_pat in + let* combined_subst = Substitution.compose subst unified_subst in + let extended_env = TypeEnv.apply combined_subst env_pat in + return (extended_env, combined_subst)) + ~init:(return (env, Substitution.empty)) + (value_binding :: value_bindings) + in + let* subst2, ty2 = infer_expr env_ext expr2 in + let* total_subst = Substitution.compose subst_acc subst2 in + return (total_subst, ty2) + | ExpLambda (patterns, body) -> + let* env, pat_types = + List.fold_left + patterns + ~init:(return (env, [])) + ~f:(fun acc pat -> + let* env, pat_types = acc in + let* _, typ, env = infer_pattern env pat in + return (env, typ :: pat_types)) + in + let* subst_body, ty_body = infer_expr env body in + let arrow_type = + List.fold_right + ~f:(fun pat_type acc -> TyArrow (Substitution.apply subst_body pat_type, acc)) + ~init:ty_body + (List.rev pat_types) + in + return (subst_body, arrow_type) + | ExpFunction (param, body) -> + let* subst1, ty1 = infer_expr env param in + let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env) body in + let* tv = fresh_var in + let* subst3 = + Substitution.unify (Substitution.apply subst2 ty1) (TyArrow (ty2, tv)) + in + let* total_subst = Substitution.compose_all [ subst3; subst2; subst1 ] in + return (total_subst, Substitution.apply total_subst tv) + | ExpOption opt_expr -> + (match opt_expr with + | Some expr -> + let* subst, ty = infer_expr env expr in + return (subst, TyOption ty) + | None -> + let* tv = fresh_var in + return (Substitution.empty, TyOption tv)) + | ExpTypeAnnotation (expr, t) -> + let* subst1, ty1 = infer_expr env expr in + let* subst2 = Substitution.unify ty1 (Substitution.apply subst1 t) in + let* total_subst = Substitution.compose subst1 subst2 in + return (total_subst, Substitution.apply subst2 ty1) +;; + +let infer_structure_item env = function + | SEval expr -> + let* subst, _ = infer_expr env expr in + let updated_env = TypeEnv.apply subst env in + return (subst, updated_env) + | SValue (Rec, (PatVariable x, expr), []) -> + let* expr = + match expr with + | ExpLambda _ -> return expr + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* tv = fresh_var in + let env = TypeEnv.extend env x (S (IntSet.empty, tv)) in + let* subst, ty = infer_expr env expr in + let* subst2 = Substitution.unify (Substitution.apply subst tv) ty in + let* composed_subst = Substitution.compose subst subst2 in + let env2 = TypeEnv.apply composed_subst env in + let generalized_ty = generalize env2 (Substitution.apply composed_subst ty) in + let env = TypeEnv.extend env2 x generalized_ty in + return (composed_subst, env) + | SValue (Rec, value_binding, value_bindings) -> + let all_bindings = value_binding :: value_bindings in + let* env_with_placeholders = + List.fold_left + ~f:(fun acc_env (pat, _) -> + let* pat = + match pat with + | PatVariable _ -> return pat + | _ -> + fail (LHS "Only variables are allowed on the left-hand side of let rec") + in + let* env_acc = acc_env in + let* subst_pat, _, env_pat = infer_pattern env_acc pat in + let extended_env = TypeEnv.apply subst_pat env_pat in + return extended_env) + ~init:(return env) + all_bindings + in + let* env_ext, subst_acc = + List.fold_left + ~f:(fun acc_env (ty_pattern, expr) -> + let* expr = + match expr with + | ExpLambda _ -> return expr + | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") + in + let* env_acc, _ = acc_env in + let* subst_expr, ty_expr = infer_expr env_acc expr in + let* subst_pat, ty_pat, env_pat = infer_pattern env_acc ty_pattern in + let* subst = Substitution.compose subst_expr subst_pat in + let* unified_subst = Substitution.unify ty_expr ty_pat in + let* combined_subst = Substitution.compose subst unified_subst in + let extended_env = TypeEnv.apply combined_subst env_pat in + return (extended_env, combined_subst)) + ~init:(return (env_with_placeholders, Substitution.empty)) + all_bindings + in + return (subst_acc, env_ext) + | SValue (NonRec, (PatVariable x, expr), _) -> + let* subst, ty = infer_expr env expr in + let env2 = TypeEnv.apply subst env in + let generalized_ty = generalize env2 ty in + let env = TypeEnv.extend (TypeEnv.apply subst env) x generalized_ty in + return (subst, env) + | SValue (NonRec, (pattern, expr), _) -> + let* subst_expr, ty = infer_expr env expr in + let* subst_pat, ty_pat, env_pat = infer_pattern env pattern in + let* combined_subst = Substitution.compose subst_expr subst_pat in + let* unified_subst = + Substitution.unify (Substitution.apply combined_subst ty_pat) ty + in + let updated_env = TypeEnv.apply unified_subst env_pat in + let* final_subst = Substitution.compose unified_subst combined_subst in + return (final_subst, updated_env) +;; + +let infer_structure env structure = + let rec process_structure env subst = function + | [] -> return (subst, env) + | item :: rest -> + let* subst1, env1 = infer_structure_item env item in + let* composed_subst = Substitution.compose subst subst1 in + process_structure env1 composed_subst rest + in + process_structure env Substitution.empty structure +;; + +let infer_simple_expression expr = + Result.map ~f:snd (run (infer_expr TypeEnv.initial_env expr)) +;; + +let run_infer str = Result.map ~f:snd (run (infer_structure TypeEnv.initial_env str)) diff --git a/EML/lib/frontend/parser.ml b/EML/lib/frontend/parser.ml new file mode 100644 index 00000000..8b3b968f --- /dev/null +++ b/EML/lib/frontend/parser.ml @@ -0,0 +1,313 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Base +open Angstrom + +let is_keyword = function + | "let" + | "match" + | "in" + | "if" + | "then" + | "else" + | "fun" + | "rec" + | "true" + | "false" + | "Some" + | "and" -> true + | _ -> false +;; + +let is_lowercase = function + | 'a' .. 'z' -> true + | _ -> false +;; + +let is_uppercase = function + | 'A' .. 'Z' -> true + | _ -> false +;; + +let is_digit = function + | '0' .. '9' -> true + | _ -> false +;; + +let white_space = take_while Char.is_whitespace +let token s = white_space *> string s +let token1 s = white_space *> s +let parse_parens p = token "(" *> p <* token ")" + +let parse_const_int = + let sign = choice [ token "" ] in + let num = take_while1 Char.is_digit in + lift2 (fun s n -> ConstInt (Int.of_string (s ^ n))) sign num +;; + +let parse_const_bool = + choice + [ token "true" *> return (ConstBool true); token "false" *> return (ConstBool false) ] +;; + +let parse_const_string = + token "\"" *> take_till (Char.equal '\"') <* token "\"" >>| fun s -> ConstString s +;; + +let parse_const = choice [ parse_const_int; parse_const_bool; parse_const_string ] +let parse_unar_oper = choice [ token "-" *> return Negative; token "not" *> return Not ] + +let parse_ident = + let parse_first_char = + satisfy (fun ch -> is_lowercase ch || is_uppercase ch || Char.equal ch '_') + >>| Char.escaped + in + let parse_other_chars = + take_while (fun ch -> + is_lowercase ch || is_uppercase ch || is_digit ch || Char.equal ch '_') + in + token1 @@ lift2 ( ^ ) parse_first_char parse_other_chars + >>= fun s -> if is_keyword s then fail "It is not identifier" else return s +;; + +let parse_base_type = + choice + [ token "int" *> return (TyPrim "int") + ; token "bool" *> return (TyPrim "bool") + ; token "string" *> return (TyPrim "string") + ; token "unit" *> return (TyPrim "unit") + ] +;; + +let rec parse_type_list t = + let* base = t in + white_space + *> token "list" + *> (parse_type_list (return (TyList base)) <|> return (TyList base)) +;; + +let parse_type = + let base_type = parse_base_type in + let list_type = parse_type_list base_type <|> base_type in + list_type +;; + +let parse_pattern_with_type parse_pattern = + let* pat = white_space *> token "(" *> parse_pattern in + let* constr = + white_space *> token ":" *> white_space *> parse_type <* white_space <* token ")" + in + return (PatType (pat, constr)) +;; + +let parse_pattern_var = parse_ident >>| fun id -> PatVariable id +let parse_pattern_const = parse_const >>| fun c -> PatConst c +let parse_pattern_any = token "_" *> return PatAny + +let parse_pattern_tuple parse_pattern = + let parse_unparenthesized = + lift3 + (fun p1 p2 rest -> PatTuple (p1, p2, rest)) + parse_pattern + (token "," *> parse_pattern) + (many (token "," *> parse_pattern)) + <* white_space + in + parse_parens parse_unparenthesized <|> parse_unparenthesized +;; + +let parse_pattern_list parse_pattern = + let semicols = token ";" in + token "[" *> (sep_by semicols parse_pattern >>| fun patterns -> PatList patterns) + <* token "]" +;; + +let parse_pattern_empty = token "()" *> return PatUnit + +let parse_pattern_option parse_pattern = + lift + (fun e -> PatOption e) + (token "Some" *> parse_pattern + >>| (fun e -> Some e) + <|> (token "None" >>| fun _ -> None)) +;; + +let parse_pattern = + fix (fun pat -> + let atom = + choice + [ parse_pattern_var + ; parse_pattern_any + ; parse_pattern_const + ; parse_pattern_empty + ; parse_pattern_with_type pat + ; parse_parens pat + ; parse_pattern_option pat + ] + in + let tuple = parse_pattern_tuple atom <|> atom in + let lst = parse_pattern_list tuple <|> tuple in + lst) +;; + +let parse_left_associative expr oper = + let rec go acc = lift2 (fun f x -> f acc x) oper expr >>= go <|> return acc in + expr >>= go +;; + +let parse_expr_bin_oper parse_bin_op tkn = + token tkn *> return (fun e1 e2 -> ExpBinOper (parse_bin_op, e1, e2)) +;; + +let multiply = parse_expr_bin_oper Multiply "*" +let division = parse_expr_bin_oper Division "/" +let plus = parse_expr_bin_oper Plus "+" +let minus = parse_expr_bin_oper Minus "-" + +let compare = + choice + [ parse_expr_bin_oper Equal "=" + ; parse_expr_bin_oper NotEqual "<>" + ; parse_expr_bin_oper LowestEqual "<=" + ; parse_expr_bin_oper LowerThan "<" + ; parse_expr_bin_oper GretestEqual ">=" + ; parse_expr_bin_oper GreaterThan ">" + ] +;; + +let and_op = parse_expr_bin_oper And "&&" +let or_op = parse_expr_bin_oper Or "||" +let parse_expr_ident = parse_ident >>| fun x -> ExpIdent x +let parse_expr_const = parse_const >>| fun c -> ExpConst c + +let parse_expr_with_type parse_expr = + let parse_annotated_type = token ":" *> parse_type in + lift2 (fun expr t -> ExpTypeAnnotation (expr, t)) parse_expr parse_annotated_type +;; + +let parse_expr_branch parse_expr = + lift3 + (fun cond t f -> ExpBranch (cond, t, f)) + (token "if" *> parse_expr) + (token "then" *> parse_expr) + (option None (token "else" *> parse_expr >>| Option.some)) +;; + +let parse_expr_option parse_expr = + choice + [ token "None" *> return (ExpOption None) + ; (token "Some" *> choice [ parse_parens parse_expr; parse_expr ] + >>| fun e -> ExpOption (Some e)) + ] +;; + +let parse_expr_unar_oper parse_expr = + parse_unar_oper >>= fun op -> parse_expr >>= fun expr -> return (ExpUnarOper (op, expr)) +;; + +let parse_expr_list parse_expr = + let parse_elements = sep_by (token ";") parse_expr in + token "[" *> parse_elements <* token "]" >>| fun elements -> ExpList elements +;; + +let parse_expr_function e = + parse_left_associative e (return (fun e1 e2 -> ExpFunction (e1, e2))) +;; + +let parse_expr_lambda parse_expr = + token "fun" *> sep_by1 white_space parse_pattern + <* token "->" + >>= fun params -> parse_expr >>| fun body -> ExpLambda (params, body) +;; + +let parse_expr_tuple parse_expr = + let commas = token "," in + let tuple = + lift3 + (fun e1 e2 rest -> ExpTuple (e1, e2, rest)) + (parse_expr <* commas) + parse_expr + (many (commas *> parse_expr)) + <* white_space + in + parse_parens tuple <|> tuple +;; + +let parse_body parse_expr = + many1 parse_pattern + >>= fun patterns -> token "=" *> parse_expr >>| fun body -> ExpLambda (patterns, body) +;; + +let parse_expr_let parse_expr = + token "let" + *> lift4 + (fun rec_flag value_bindings and_bindings body -> + ExpLet (rec_flag, value_bindings, and_bindings, body)) + (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) <|> return NonRec) + (lift2 + (fun pat expr -> pat, expr) + parse_pattern + (token "=" *> parse_expr <|> parse_body parse_expr)) + (many + (token "and" + *> lift2 + (fun pat expr -> pat, expr) + parse_pattern + (token "=" *> parse_expr <|> parse_body parse_expr))) + (token "in" *> parse_expr) +;; + +let parse_expr = + fix (fun expr -> + let term = + choice + [ parse_expr_ident + ; parse_expr_const + ; parse_expr_list expr + ; parse_parens expr + ; parse_parens (parse_expr_with_type expr) + ] + in + let func = parse_expr_function term in + let cons = parse_expr_option func <|> func in + let ife = parse_expr_branch expr <|> cons in + let unops = parse_expr_unar_oper ife <|> ife in + let ops1 = parse_left_associative unops (multiply <|> division) in + let ops2 = parse_left_associative ops1 (plus <|> minus) in + let cmp = parse_left_associative ops2 compare in + let boolean = parse_left_associative cmp (and_op <|> or_op) in + let tuple = parse_expr_tuple boolean <|> boolean in + let lambda = parse_expr_lambda expr <|> tuple in + choice [ parse_expr_let expr; parse_expr_lambda expr; lambda ]) +;; + +let parse_structure = + let parse_eval = parse_expr >>| fun e -> SEval e in + let parse_value = + token "let" + *> lift3 + (fun r id id_list -> SValue (r, id, id_list)) + (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) <|> return NonRec) + (lift2 + (fun pat expr -> pat, expr) + parse_pattern + (token "=" *> parse_expr <|> parse_body parse_expr)) + (many + (token "and" + *> lift2 + (fun pat expr -> pat, expr) + parse_pattern + (token "=" *> parse_expr <|> parse_body parse_expr))) + in + choice [ parse_eval; parse_value ] +;; + +let parse_program = + let definitions_or_exprs = many parse_structure <* option () (token ";;" >>| ignore) in + definitions_or_exprs <* white_space +;; + +let parse input = parse_string ~consume:All parse_program input diff --git a/EML/test_qc/dune b/EML/test_qc/dune index 9ca61266..311ba09f 100644 --- a/EML/test_qc/dune +++ b/EML/test_qc/dune @@ -1,5 +1,6 @@ (executable (name test_qc) + (enabled_if false) (modules test_qc) (libraries qcheck angstrom) (preprocess @@ -7,6 +8,7 @@ (rule (alias runtest) + (enabled_if false) (deps (:< test_qc.exe)) (action diff --git a/EML/tests/dune b/EML/tests/dune new file mode 100644 index 00000000..0572289f --- /dev/null +++ b/EML/tests/dune @@ -0,0 +1,9 @@ +(library + (name tests) + (libraries EML_lib) + (modules :standard) + (preprocess + (pps ppx_deriving.show ppx_expect)) + (inline_tests) + (instrumentation + (backend bisect_ppx))) diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml new file mode 100644 index 00000000..20f8621f --- /dev/null +++ b/EML/tests/inferencer_tests.ml @@ -0,0 +1,52 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib.Frontend.Inferencer +open EML_lib.Frontend.Ast +open EML_lib.Frontend.Parser + +let pretty_printer_parse_and_infer s = + match parse s with + | Ok parsed -> + (match run_infer parsed with + | Ok env -> + let filtered_env = + Base.Map.filter_keys env ~f:(fun key -> + not (List.mem key [ "print_int"; "print_endline"; "print_bool" ])) + in + Base.Map.iteri filtered_env ~f:(fun ~key ~data:(S (_, ty)) -> + Format.printf "val %s: %a\n" key pp_ty ty) + | Error e -> Format.printf "Infer error. %a\n" pp_error e) + | Error e -> Format.printf "Parsing error. %s\n" e +;; + +(* let pretty_printer_parse_and_infer_simple s = + match parse s with + | Ok parsed -> + (match parsed with + | [ SEval expr ] -> + (match infer_simple_expression expr with + | Ok ty -> Format.printf "%a\n" pp_ty ty + | Error e -> Format.printf "Infer error. %a\n" pp_error e) + | _ -> + Format.printf + "Expected a single expression, but got a program with multiple structures.\n") + | Error e -> Format.printf "Parsing error. %s\n" e +;; *) + +let%expect_test "test_factorial" = + pretty_printer_parse_and_infer + {| let rec fac n = + if n <= 1 + then 1 + else let n1 = n-1 in + let m = fac n1 in + n*m + +let main = fac 4 |}; + [%expect + {| + val fac: int -> int + val main: int|}] +;; diff --git a/EML/tests/parser_tests.ml b/EML/tests/parser_tests.ml new file mode 100644 index 00000000..ac0880da --- /dev/null +++ b/EML/tests/parser_tests.ml @@ -0,0 +1,51 @@ +open EML_lib.Frontend.Parser +open EML_lib.Frontend.Ast + +let parse_test input = + match parse input with + | Ok ast -> Printf.printf "%s\n" (show_program ast) + | Error fail -> Printf.printf "Ошибка: %s\n" fail +;; + +let%expect_test "factorial" = + parse_test + {| let rec fac n = + if n <= 1 + then 1 + else let n1 = n-1 in + let m = fac n1 in + n*m + +let main = fac 4 |}; + [%expect + {| + [(SValue (Rec, + ((PatVariable "fac"), + (ExpLambda ([(PatVariable "n")], + (ExpBranch ( + (ExpBinOper (LowestEqual, (ExpIdent "n"), (ExpConst (ConstInt 1)) + )), + (ExpConst (ConstInt 1)), + (Some (ExpLet (NonRec, + ((PatVariable "n1"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 1))))), + [], + (ExpLet (NonRec, + ((PatVariable "m"), + (ExpFunction ((ExpIdent "fac"), (ExpIdent "n1")))), + [], + (ExpBinOper (Multiply, (ExpIdent "n"), (ExpIdent "m") + )) + )) + ))) + )) + ))), + [])); + (SValue (NonRec, + ((PatVariable "main"), + (ExpFunction ((ExpIdent "fac"), (ExpConst (ConstInt 4))))), + [])) + ] +|}] +;; From 39319da144a62bd16b7aaaecafc55fa32dd5c61f Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 6 Oct 2025 16:23:19 +0300 Subject: [PATCH 05/74] Upd: add vicitory as maintainer --- EML/lib/frontend/ast.ml | 2 +- EML/lib/frontend/inferencer.ml | 2 +- EML/lib/frontend/parser.ml | 2 +- EML/tests/inferencer_tests.ml | 2 +- EML/tests/parser_tests.ml | 4 ++++ 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml index 7d03e1b4..0bc9eba2 100644 --- a/EML/lib/frontend/ast.ml +++ b/EML/lib/frontend/ast.ml @@ -1,4 +1,4 @@ -(** Copyright 2024-2025, Danil Usoltsev *) +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index 9b1b3352..62ccce8a 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -1,4 +1,4 @@ -(** Copyright 2024-2025, Danil Usoltsev *) +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/EML/lib/frontend/parser.ml b/EML/lib/frontend/parser.ml index 8b3b968f..6d2a1d3b 100644 --- a/EML/lib/frontend/parser.ml +++ b/EML/lib/frontend/parser.ml @@ -1,4 +1,4 @@ -(** Copyright 2024-2025, Danil Usoltsev *) +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 20f8621f..06c0921a 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -1,4 +1,4 @@ -(** Copyright 2024-2025, Danil Usoltsev *) +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/EML/tests/parser_tests.ml b/EML/tests/parser_tests.ml index ac0880da..f203c74e 100644 --- a/EML/tests/parser_tests.ml +++ b/EML/tests/parser_tests.ml @@ -1,3 +1,7 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + open EML_lib.Frontend.Parser open EML_lib.Frontend.Ast From 5624cdc104e625ec97cee3d825129d376f035f70 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 6 Oct 2025 20:06:09 +0300 Subject: [PATCH 06/74] Fix: some fix --- EML/{eml.opam => EML.opam} | 2 -- EML/{demo.opam.template => EML.opam.template} | 0 EML/bin/dune | 5 +++-- EML/dune-project | 22 +++---------------- EML/lib/dune | 2 +- EML/lib/frontend/inferencer.ml | 2 +- 6 files changed, 8 insertions(+), 25 deletions(-) rename EML/{eml.opam => EML.opam} (88%) rename EML/{demo.opam.template => EML.opam.template} (100%) diff --git a/EML/eml.opam b/EML/EML.opam similarity index 88% rename from EML/eml.opam rename to EML/EML.opam index f98afd94..85857a71 100644 --- a/EML/eml.opam +++ b/EML/EML.opam @@ -1,4 +1,3 @@ -# This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "A short LLVM demo" maintainer: ["Victoria Ostrovskaya & Danil Usoltsev"] @@ -15,7 +14,6 @@ depends: [ "ppx_expect" "llvm" {= "18-shared"} "qcheck" {with-tests} - "ppx_deriving_qcheck" {= "0.6"} "odoc" {with-doc} ] build: [ diff --git a/EML/demo.opam.template b/EML/EML.opam.template similarity index 100% rename from EML/demo.opam.template rename to EML/EML.opam.template diff --git a/EML/bin/dune b/EML/bin/dune index dfd728f1..accab2e5 100644 --- a/EML/bin/dune +++ b/EML/bin/dune @@ -1,6 +1,6 @@ (library (name LL) - (public_name eml.LL) + (public_name EML.LL) (modules LL) (wrapped false) (libraries @@ -10,7 +10,7 @@ )) (executable - (public_name eml) + (public_name EML) (name main) (modules main) (libraries LL) @@ -18,4 +18,5 @@ (backend bisect_ppx))) (cram + (package EML) (deps ./main.exe runtime.c)) diff --git a/EML/dune-project b/EML/dune-project index feede8b2..8d153d2c 100644 --- a/EML/dune-project +++ b/EML/dune-project @@ -1,8 +1,8 @@ (lang dune 3.8) -(name eml) +(name EML) -(generate_opam_files true) +(generate_opam_files false) (source (github Kakadu/comp24)) @@ -13,22 +13,6 @@ (license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") -(package - (name eml) - (synopsis "A short LLVM demo") - (depends - ocaml - (dune - (= "3.19.1")) - (angstrom - (= "0.16.0")) - qcheck - bisect_ppx - ppx_expect - (llvm - (= "18-shared")) - (qcheck :with-tests) - (ppx_deriving_qcheck - (= "0.6")))) + ; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/EML/lib/dune b/EML/lib/dune index 8aa4d2b1..99d5ce41 100644 --- a/EML/lib/dune +++ b/EML/lib/dune @@ -2,7 +2,7 @@ (library (name EML_lib) - (public_name eml.lib) + (public_name EML.lib) (modules :standard) (libraries base angstrom) (preprocess diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index 62ccce8a..9b1b3352 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -1,4 +1,4 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) +(** Copyright 2024-2025, Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) From 6994f2859b59bc4e00db3d47568add438bb1b3ff Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 12 Oct 2025 20:47:35 +0300 Subject: [PATCH 07/74] add Anf --- EML/bin/dune | 2 +- EML/bin/fact | 4 + EML/bin/main.ml | 65 ++----------- EML/bin/run.t | 20 ---- EML/lib/middleend/anf.ml | 200 +++++++++++++++++++++++++++++++++++++++ EML/lib/utils/monads.ml | 24 +++++ EML/lib/utils/utils.ml | 28 ++++++ EML/tests/anf_tests.ml | 167 ++++++++++++++++++++++++++++++++ 8 files changed, 434 insertions(+), 76 deletions(-) create mode 100644 EML/bin/fact create mode 100644 EML/lib/middleend/anf.ml create mode 100644 EML/lib/utils/monads.ml create mode 100644 EML/lib/utils/utils.ml create mode 100644 EML/tests/anf_tests.ml diff --git a/EML/bin/dune b/EML/bin/dune index accab2e5..5e119f12 100644 --- a/EML/bin/dune +++ b/EML/bin/dune @@ -13,7 +13,7 @@ (public_name EML) (name main) (modules main) - (libraries LL) + (libraries LL EML_lib) (instrumentation (backend bisect_ppx))) diff --git a/EML/bin/fact b/EML/bin/fact new file mode 100644 index 00000000..dfe17f61 --- /dev/null +++ b/EML/bin/fact @@ -0,0 +1,4 @@ +let rec factorial n = + if n <= 1 then 1 else n * factorial (n - 1) +in +factorial 5 \ No newline at end of file diff --git a/EML/bin/main.ml b/EML/bin/main.ml index 80a095bc..302529ed 100644 --- a/EML/bin/main.ml +++ b/EML/bin/main.ml @@ -1,60 +1,15 @@ -[@@@ocaml.text "/*"] - -(** Copyright 2023-2025, Kakadu and contributors *) +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) -[@@@ocaml.text "/*"] +open EML_lib.Frontend.Parser +open EML_lib.Frontend.Ast +open Printf -let () = - let context = Llvm.global_context () in - let builder = Llvm.builder context in - (* let () = assert (Llvm_executionengine.initialize ()) in *) - let the_module = Llvm.create_module context "main" in - Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; - (* let _the_execution_engine = Llvm_executionengine.create the_module in *) - let module LL = (val LL.make context builder the_module) in - let i64_type = Llvm.i64_type context in - let void_type = Llvm.void_type context in - let ptr_type = Llvm.pointer_type context in - let prepare_main () = - let ft = - (* TODO main has special args *) - let args = Array.make 0 ptr_type in - Llvm.function_type i64_type args - in - let the_function = Llvm.declare_function "main" ft the_module in - (* Create a new basic block to start insertion into. *) - let bb = Llvm.append_block context "entry" the_function in - Llvm.position_at_end bb builder; - (* Add all arguments to the symbol table and create their allocas. *) - (* Finish off the function. *) - let (_ : Llvm.llvalue) = - LL.build_call - (Llvm.function_type void_type [| i64_type |]) - LL.(lookup_func_exn "print_int") - [ Llvm.const_int i64_type 70 ] - in - let (_ : Llvm.llvalue) = Llvm.build_ret (Llvm.const_int i64_type 0) builder in - (* Validate the generated code, checking for consistency. *) - (match Llvm_analysis.verify_function the_function with - | true -> () - | false -> - Stdlib.Format.printf - "invalid function generated\n%s\n" - (Llvm.string_of_llvalue the_function); - Llvm_analysis.assert_valid_function the_function); - (* Optimize the function. *) - (* let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in *) - (* Llvm.dump_value the_function; *) - () - in - let _ = - Llvm.declare_function - "print_int" - (Llvm.function_type (Llvm.void_type context) [| i64_type |]) - the_module - in - prepare_main (); - Llvm.print_module "out.ll" the_module +let parse_program input = + match parse input with + | Ok ast -> printf "%s\n" (show_program ast) + | Error fail -> printf "Ошибка: %s\n" fail ;; + +let () = parse_program "let x = 1 + 2" \ No newline at end of file diff --git a/EML/bin/run.t b/EML/bin/run.t index c5569b68..e69de29b 100644 --- a/EML/bin/run.t +++ b/EML/bin/run.t @@ -1,20 +0,0 @@ - $ clang-18 -c runtime.c -o runtime.o - $ ./main.exe - $ ls - main.exe - out.ll - runtime.c - runtime.o - $ cat out.ll | grep -E 'source_filename|target datalayout|ModuleID' --invert-match - target triple = "x86_64-pc-linux-gnu" - - declare void @print_int(i64) - - define i64 @main() { - entry: - call void @print_int(i64 70) - ret i64 0 - } - $ clang-18 out.ll runtime.o -o demo1.exe - $ echo "Press $(./demo1.exe) to pay respect" - Press F to pay respect diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml new file mode 100644 index 00000000..74810d73 --- /dev/null +++ b/EML/lib/middleend/anf.ml @@ -0,0 +1,200 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend +open Ast +open Base +open Utils + +module ANFMonad = struct + type 'a t = int -> int * ('a, string) Result.t + + let return x = fun counter -> (counter, Ok x) + + let ( >>= ) m f = fun counter -> + match m counter with + | (counter', Ok a) -> f a counter' + | (counter', Error e) -> (counter', Error e) + + let fresh : string t = fun counter -> + (counter + 1, Ok ("anf_t" ^ Int.to_string counter)) + + let run m = m 0 |> snd + + let fail msg = fun counter -> (counter, Error msg) + + (** Монадические операторы *) + module Syntax = struct + let ( let* ) = ( >>= ) + end +end + +open ANFMonad +open ANFMonad.Syntax + +(** ANF представления *) +type immediate = + | ImmediateConst of const + | ImmediateVar of ident +[@@deriving show { with_path = false }] + +type complex_expr = + | ComplexImmediate of immediate + | ComplexBinOper of bin_oper * immediate * immediate + | ComplexUnarOper of unar_oper * immediate + | ComplexTuple of immediate * immediate * immediate list + | ComplexList of immediate list + | ComplexOption of immediate option + | ComplexApp of immediate * immediate * immediate list + | ComplexLambda of pattern list * anf_expr + | ComplexBranch of immediate * anf_expr * anf_expr +[@@deriving show { with_path = false }] + +and anf_expr = + | AnfLet of is_rec * ident * complex_expr * anf_expr + | AnfExpr of complex_expr +[@@deriving show { with_path = false }] + +type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] +type anf_structure = + | AnfEval of anf_expr + | AnfValue of is_rec * anf_bind * anf_bind list +[@@deriving show { with_path = false }] + +type anf_program = anf_structure list [@@deriving show { with_path = false }] + +(** Вспомогательные функции *) +let optimize_anf_let rf name1 v body = (* Optimize let x = y in x -> y*) + match rf, body with + | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 -> AnfExpr v + | _ -> AnfLet (rf, name1, v, body) +;; + + +(** Основные функции преобразования *) +let rec anf (e : expr) (k : immediate -> anf_expr t) : anf_expr t = + match e with + | ExpConst c -> k (ImmediateConst c) + | ExpIdent x -> k (ImmediateVar x) + + | ExpUnarOper (op, e) -> + anf e (fun immediate -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexUnarOper (op, immediate), cont_expr))) + + | ExpBinOper (op, e1, e2) -> + anf e1 (fun immediate1 -> + anf e2 (fun immediate2 -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexBinOper (op, immediate1, immediate2), cont_expr)))) + + | ExpTuple (e1, e2, rest) -> + let all_exprs = e1 :: e2 :: rest in + anf_list all_exprs (fun imms -> + match imms with + | i1 :: i2 :: rest_imm -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexTuple (i1, i2, rest_imm), cont_expr)) + | _ -> fail "Invalid tuple") + + | ExpList exprs -> + anf_list exprs (fun imms -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexList imms, cont_expr))) + + | ExpOption opt_expr -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + (match opt_expr with + | None -> + return (AnfLet (NonRec, var_name, ComplexOption None, cont_expr)) + | Some expr -> + anf expr (fun immediate -> + return (AnfLet (NonRec, var_name, ComplexOption (Some immediate), cont_expr)))) + + | ExpBranch (cond, then_expr, else_expr) -> + anf cond (fun immediate_cond -> + let* then_anf = anf then_expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + let* else_anf = + match else_expr with + | Some e -> anf e (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + | None -> return (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))) + in return (AnfLet (NonRec, var_name, ComplexBranch (immediate_cond, then_anf, else_anf), cont_expr))) + + | ExpLet (rec_flag, (pat, e1), _, e2) -> + let* e1_anf = anf e1 (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in + let* e2_anf = anf e2 k in + let* complex_expr_body = + match e1_anf with + | AnfExpr c -> return c + | _ -> fail "Expected complex_expr" + in + if is_simple_pattern pat then + (match pattern_to_ident pat with + | Some name -> + return (AnfLet (rec_flag, name, complex_expr_body, e2_anf)) + | None -> + let* var_name = fresh in + return (AnfLet (NonRec, var_name, complex_expr_body, e2_anf))) + else + fail "Complex patterns in let bindings not yet supported" + + | ExpLambda (patterns, body) -> + let* body_anf = anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) + + | ExpFunction (func, arg) -> + anf func (fun immediate_func -> + anf arg (fun immediate_arg -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexApp (immediate_func, immediate_arg, []), cont_expr)))) + + | ExpTypeAnnotation (e, _) -> + (* Игнорируем аннотации типов в ANF *) + anf e k + + +and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = + match exprs with + | [] -> k [] + | hd :: tl -> + anf hd (fun immediate_hd -> + anf_list tl (fun immediate_tl -> k (immediate_hd :: immediate_tl))) + +(** Преобразование структуры *) +let anf_structure_item (item : structure) : anf_structure t = + match item with + | SEval expr -> + let* result = anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in + return (AnfEval result) + + | SValue (rec_flag, (pat, expr), _) -> + if is_simple_pattern pat then + let* anf_expr_body = anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in + match pattern_to_ident pat with + | Some name -> + return (AnfValue (rec_flag, (name, anf_expr_body), [])) + | None -> + return (AnfValue (rec_flag, ("_", anf_expr_body), [])) + else + fail "Complex patterns in top-level bindings not yet supported" + +(** Преобразование всей программы *) +let anf_program (program : program) : (anf_program, string) Result.t = + let program' = + List.fold_right program ~init:(return []) ~f:(fun item acc -> + let* acc_list = acc in + let* item_anf = anf_structure_item item in + return (item_anf :: acc_list)) + in + run program' \ No newline at end of file diff --git a/EML/lib/utils/monads.ml b/EML/lib/utils/monads.ml new file mode 100644 index 00000000..7f39135b --- /dev/null +++ b/EML/lib/utils/monads.ml @@ -0,0 +1,24 @@ + + +module ANFMonad = struct + type 'a t = int -> int * ('a, string) Result.t + + let return x = fun counter -> (counter, Ok x) + + let ( >>= ) m f = fun counter -> + match m counter with + | (counter', Ok a) -> f a counter' + | (counter', Error e) -> (counter', Error e) + + let fresh : string t = fun counter -> + (counter + 1, Ok ("anf_t" ^ Int.to_string counter)) + + let run m = m 0 |> snd + + let fail msg = fun counter -> (counter, Error msg) + + (** Монадические операторы *) + module Syntax = struct + let ( let* ) = ( >>= ) + end +end \ No newline at end of file diff --git a/EML/lib/utils/utils.ml b/EML/lib/utils/utils.ml new file mode 100644 index 00000000..efd4c99d --- /dev/null +++ b/EML/lib/utils/utils.ml @@ -0,0 +1,28 @@ +open Base +open Frontend +open Ast + +let is_simple_pattern = function + | PatVariable _ | PatAny | PatUnit -> true + | _ -> false +;; + +let is_tuple_pattern = function + | PatTuple (_, _, _) -> true + | _ -> false +;; + +let rec extract_tuple_pattern_idents acc = function + | PatVariable x -> x :: acc + | PatTuple (p1, p2, rest) -> + let acc' = extract_tuple_pattern_idents acc p1 in + let acc'' = extract_tuple_pattern_idents acc' p2 in + List.fold_left rest ~f:(fun current_acc pat -> extract_tuple_pattern_idents current_acc pat) ~init:acc'' + | PatAny -> "_" :: acc + | _ -> acc +;; + +let pattern_to_ident = function + | PatVariable x -> Some x + | _ -> None +;; \ No newline at end of file diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml new file mode 100644 index 00000000..6b663f19 --- /dev/null +++ b/EML/tests/anf_tests.ml @@ -0,0 +1,167 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib.Frontend.Parser +open EML_lib.Middleend.Anf + +let parse_and_anf input = + match parse input with + | Ok ast -> + (match anf_program ast with + | Ok anf_ast -> Printf.printf "%s\n" (show_anf_program anf_ast) + | Error e -> Printf.printf "ANF error: %s\n" e) + | Error e -> Printf.printf "Parsing error: %s\n" e +;; + +let%expect_test "001.ml" = + parse_and_anf "let recfac n = if n<=1 then 1 else n * fac (n-1)"; + [%expect + {| +[(AnfValue (NonRec, + ("recfac", + (AnfLet (NonRec, "anf_t5", + (ComplexLambda ([(PatVariable "n")], + (AnfLet (NonRec, "anf_t0", + (ComplexBinOper (LowestEqual, (ImmediateVar "n"), + (ImmediateConst (ConstInt 1)))), + (AnfLet (NonRec, "anf_t1", + (ComplexBranch ((ImmediateVar "anf_t0"), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), + (AnfLet (NonRec, "anf_t2", + (ComplexBinOper (Minus, (ImmediateVar "n"), + (ImmediateConst (ConstInt 1)))), + (AnfLet (NonRec, "anf_t3", + (ComplexApp ((ImmediateVar "fac"), + (ImmediateVar "anf_t2"), [])), + (AnfLet (NonRec, "anf_t4", + (ComplexBinOper (Multiply, (ImmediateVar "n"), + (ImmediateVar "anf_t3"))), + (AnfExpr + (ComplexImmediate (ImmediateVar "anf_t4"))) + )) + )) + )) + )), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t1"))))) + )) + )), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t5")))))), + [])) + ]|}] +;; + + +let%expect_test "003occurs.ml" = + parse_and_anf "let fix f = (fun x -> f (fun f -> x x f)) (fun x -> f (fun f -> x x f))"; + [%expect + {| +[(AnfValue (NonRec, + ("fix", + (AnfLet (NonRec, "anf_t11", + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t4", + (ComplexLambda ([(PatVariable "x")], + (AnfLet (NonRec, "anf_t2", + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t0", + (ComplexApp ((ImmediateVar "x"), + (ImmediateVar "x"), [])), + (AnfLet (NonRec, "anf_t1", + (ComplexApp ((ImmediateVar "anf_t0"), + (ImmediateVar "f"), [])), + (AnfExpr + (ComplexImmediate (ImmediateVar "anf_t1"))) + )) + )) + )), + (AnfLet (NonRec, "anf_t3", + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t2"), [])), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t3"))))) + )) + )), + (AnfLet (NonRec, "anf_t9", + (ComplexLambda ([(PatVariable "x")], + (AnfLet (NonRec, "anf_t7", + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t5", + (ComplexApp ((ImmediateVar "x"), + (ImmediateVar "x"), [])), + (AnfLet (NonRec, "anf_t6", + (ComplexApp ((ImmediateVar "anf_t5"), + (ImmediateVar "f"), [])), + (AnfExpr + (ComplexImmediate (ImmediateVar "anf_t6"))) + )) + )) + )), + (AnfLet (NonRec, "anf_t8", + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t7"), [])), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t8"))) + )) + )) + )), + (AnfLet (NonRec, "anf_t10", + (ComplexApp ((ImmediateVar "anf_t4"), + (ImmediateVar "anf_t9"), [])), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t10"))))) + )) + )) + )), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t11")))))), + [])) + ]|}] +;; + +let%expect_test "004let_poly.ml" = + parse_and_anf "let temp = + (fun f -> (f 1, f true)) (fun x -> x)"; + [%expect + {| +[(AnfValue (NonRec, + ("temp", + (AnfLet (NonRec, "anf_t3", + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t0", + (ComplexApp ((ImmediateVar "f"), (ImmediateConst (ConstInt 1)), + [])), + (AnfLet (NonRec, "anf_t1", + (ComplexApp ((ImmediateVar "f"), + (ImmediateConst (ConstBool true)), [])), + (AnfLet (NonRec, "anf_t2", + (ComplexTuple ((ImmediateVar "anf_t0"), + (ImmediateVar "anf_t1"), [])), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t2"))))) + )) + )) + )), + (AnfLet (NonRec, "anf_t4", + (ComplexLambda ([(PatVariable "x")], + (AnfExpr (ComplexImmediate (ImmediateVar "x"))))), + (AnfLet (NonRec, "anf_t5", + (ComplexApp ((ImmediateVar "anf_t3"), (ImmediateVar "anf_t4"), + [])), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t5"))))) + )) + ))), + [])) + ]|}] +;; + +let%expect_test "002if.ml" = + parse_and_anf "let main = if true then 1 else false"; + [%expect + {| + [(AnfValue (NonRec, + ("main", + (AnfLet (NonRec, "anf_t0", + (ComplexBranch ((ImmediateConst (ConstBool true)), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))))), + (AnfExpr (ComplexImmediate (ImmediateVar "anf_t0")))))), + [])) + ]|}] +;; + From 080534e6d51100bac2b922b1de6787e6e453b1c8 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Tue, 18 Nov 2025 00:26:33 +0300 Subject: [PATCH 08/74] fix ocamlformat --- EML/lib/middleend/anf.ml | 221 +++++++++++++++++++-------------------- EML/lib/utils/monads.ml | 23 ++-- EML/lib/utils/utils.ml | 11 +- EML/tests/anf_tests.ml | 5 +- 4 files changed, 126 insertions(+), 134 deletions(-) diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 74810d73..4f2071d7 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -1,7 +1,3 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - open Frontend open Ast open Base @@ -10,21 +6,19 @@ open Utils module ANFMonad = struct type 'a t = int -> int * ('a, string) Result.t - let return x = fun counter -> (counter, Ok x) - - let ( >>= ) m f = fun counter -> - match m counter with - | (counter', Ok a) -> f a counter' - | (counter', Error e) -> (counter', Error e) + let return x = fun counter -> counter, Ok x - let fresh : string t = fun counter -> - (counter + 1, Ok ("anf_t" ^ Int.to_string counter)) + let ( >>= ) m f = + fun counter -> + match m counter with + | counter', Ok a -> f a counter' + | counter', Error e -> counter', Error e + ;; + let fresh : string t = fun counter -> counter + 1, Ok ("anf_t" ^ Int.to_string counter) let run m = m 0 |> snd + let fail msg = fun counter -> counter, Error msg - let fail msg = fun counter -> (counter, Error msg) - - (** Монадические операторы *) module Syntax = struct let ( let* ) = ( >>= ) end @@ -33,7 +27,6 @@ end open ANFMonad open ANFMonad.Syntax -(** ANF представления *) type immediate = | ImmediateConst of const | ImmediateVar of ident @@ -57,139 +50,140 @@ and anf_expr = [@@deriving show { with_path = false }] type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] -type anf_structure = + +type anf_structure = | AnfEval of anf_expr | AnfValue of is_rec * anf_bind * anf_bind list [@@deriving show { with_path = false }] type anf_program = anf_structure list [@@deriving show { with_path = false }] -(** Вспомогательные функции *) -let optimize_anf_let rf name1 v body = (* Optimize let x = y in x -> y*) +let optimize_anf_let rf name1 v body = match rf, body with - | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 -> AnfExpr v + | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 + -> AnfExpr v | _ -> AnfLet (rf, name1, v, body) ;; - -(** Основные функции преобразования *) let rec anf (e : expr) (k : immediate -> anf_expr t) : anf_expr t = match e with | ExpConst c -> k (ImmediateConst c) | ExpIdent x -> k (ImmediateVar x) - | ExpUnarOper (op, e) -> - anf e (fun immediate -> + anf e (fun immediate -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexUnarOper (op, immediate), cont_expr))) + | ExpBinOper (op, e1, e2) -> + anf e1 (fun immediate1 -> + anf e2 (fun immediate2 -> let* var_name = fresh in let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexUnarOper (op, immediate), cont_expr))) - - | ExpBinOper (op, e1, e2) -> - anf e1 (fun immediate1 -> - anf e2 (fun immediate2 -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexBinOper (op, immediate1, immediate2), cont_expr)))) - + return + (AnfLet + (NonRec, var_name, ComplexBinOper (op, immediate1, immediate2), cont_expr)))) | ExpTuple (e1, e2, rest) -> - let all_exprs = e1 :: e2 :: rest in - anf_list all_exprs (fun imms -> - match imms with - | i1 :: i2 :: rest_imm -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexTuple (i1, i2, rest_imm), cont_expr)) - | _ -> fail "Invalid tuple") - - | ExpList exprs -> - anf_list exprs (fun imms -> + let all_exprs = e1 :: e2 :: rest in + anf_list all_exprs (fun imms -> + match imms with + | i1 :: i2 :: rest_imm -> let* var_name = fresh in let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexList imms, cont_expr))) - + return (AnfLet (NonRec, var_name, ComplexTuple (i1, i2, rest_imm), cont_expr)) + | _ -> fail "Invalid tuple") + | ExpList exprs -> + anf_list exprs (fun imms -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexList imms, cont_expr))) | ExpOption opt_expr -> - let* var_name = fresh in + let* var_name = fresh in let* cont_expr = k (ImmediateVar var_name) in - (match opt_expr with - | None -> - return (AnfLet (NonRec, var_name, ComplexOption None, cont_expr)) - | Some expr -> - anf expr (fun immediate -> - return (AnfLet (NonRec, var_name, ComplexOption (Some immediate), cont_expr)))) - + (match opt_expr with + | None -> return (AnfLet (NonRec, var_name, ComplexOption None, cont_expr)) + | Some expr -> + anf expr (fun immediate -> + return (AnfLet (NonRec, var_name, ComplexOption (Some immediate), cont_expr)))) | ExpBranch (cond, then_expr, else_expr) -> - anf cond (fun immediate_cond -> - let* then_anf = anf then_expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - let* else_anf = - match else_expr with - | Some e -> anf e (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) - | None -> return (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))) - in return (AnfLet (NonRec, var_name, ComplexBranch (immediate_cond, then_anf, else_anf), cont_expr))) - - | ExpLet (rec_flag, (pat, e1), _, e2) -> - let* e1_anf = anf e1 (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - let* e2_anf = anf e2 k in - let* complex_expr_body = - match e1_anf with - | AnfExpr c -> return c - | _ -> fail "Expected complex_expr" + anf cond (fun immediate_cond -> + let* then_anf = + anf then_expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - if is_simple_pattern pat then - (match pattern_to_ident pat with - | Some name -> - return (AnfLet (rec_flag, name, complex_expr_body, e2_anf)) - | None -> - let* var_name = fresh in - return (AnfLet (NonRec, var_name, complex_expr_body, e2_anf))) - else - fail "Complex patterns in let bindings not yet supported" - - | ExpLambda (patterns, body) -> - let* body_anf = anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in let* var_name = fresh in let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) - + let* else_anf = + match else_expr with + | Some e -> anf e (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + | None -> return (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))) + in + return + (AnfLet + ( NonRec + , var_name + , ComplexBranch (immediate_cond, then_anf, else_anf) + , cont_expr ))) + | ExpLet (rec_flag, (pat, e1), _, e2) -> + let* e1_anf = + anf e1 (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + in + let* e2_anf = anf e2 k in + let* complex_expr_body = + match e1_anf with + | AnfExpr c -> return c + | _ -> fail "Expected complex_expr" + in + if is_simple_pattern pat + then ( + match pattern_to_ident pat with + | Some name -> return (AnfLet (rec_flag, name, complex_expr_body, e2_anf)) + | None -> + let* var_name = fresh in + return (AnfLet (NonRec, var_name, complex_expr_body, e2_anf))) + else fail "Complex patterns in let bindings not yet supported" + | ExpLambda (patterns, body) -> + let* body_anf = + anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + in + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) | ExpFunction (func, arg) -> - anf func (fun immediate_func -> - anf arg (fun immediate_arg -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexApp (immediate_func, immediate_arg, []), cont_expr)))) - - | ExpTypeAnnotation (e, _) -> - (* Игнорируем аннотации типов в ANF *) - anf e k - + anf func (fun immediate_func -> + anf arg (fun immediate_arg -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return + (AnfLet + (NonRec, var_name, ComplexApp (immediate_func, immediate_arg, []), cont_expr)))) + | ExpTypeAnnotation (e, _) -> anf e k and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = match exprs with | [] -> k [] | hd :: tl -> - anf hd (fun immediate_hd -> - anf_list tl (fun immediate_tl -> k (immediate_hd :: immediate_tl))) + anf hd (fun immediate_hd -> + anf_list tl (fun immediate_tl -> k (immediate_hd :: immediate_tl))) +;; -(** Преобразование структуры *) let anf_structure_item (item : structure) : anf_structure t = match item with | SEval expr -> - let* result = anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - return (AnfEval result) - + let* result = + anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + in + return (AnfEval result) | SValue (rec_flag, (pat, expr), _) -> - if is_simple_pattern pat then - let* anf_expr_body = anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - match pattern_to_ident pat with - | Some name -> - return (AnfValue (rec_flag, (name, anf_expr_body), [])) - | None -> - return (AnfValue (rec_flag, ("_", anf_expr_body), [])) - else - fail "Complex patterns in top-level bindings not yet supported" - -(** Преобразование всей программы *) + if is_simple_pattern pat + then + let* anf_expr_body = + anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + in + match pattern_to_ident pat with + | Some name -> return (AnfValue (rec_flag, (name, anf_expr_body), [])) + | None -> return (AnfValue (rec_flag, ("_", anf_expr_body), [])) + else fail "Complex patterns in top-level bindings not yet supported" +;; + let anf_program (program : program) : (anf_program, string) Result.t = let program' = List.fold_right program ~init:(return []) ~f:(fun item acc -> @@ -197,4 +191,5 @@ let anf_program (program : program) : (anf_program, string) Result.t = let* item_anf = anf_structure_item item in return (item_anf :: acc_list)) in - run program' \ No newline at end of file + run program' +;; diff --git a/EML/lib/utils/monads.ml b/EML/lib/utils/monads.ml index 7f39135b..b8322df4 100644 --- a/EML/lib/utils/monads.ml +++ b/EML/lib/utils/monads.ml @@ -1,24 +1,21 @@ - - module ANFMonad = struct type 'a t = int -> int * ('a, string) Result.t - let return x = fun counter -> (counter, Ok x) - - let ( >>= ) m f = fun counter -> - match m counter with - | (counter', Ok a) -> f a counter' - | (counter', Error e) -> (counter', Error e) + let return x = fun counter -> counter, Ok x - let fresh : string t = fun counter -> - (counter + 1, Ok ("anf_t" ^ Int.to_string counter)) + let ( >>= ) m f = + fun counter -> + match m counter with + | counter', Ok a -> f a counter' + | counter', Error e -> counter', Error e + ;; + let fresh : string t = fun counter -> counter + 1, Ok ("anf_t" ^ Int.to_string counter) let run m = m 0 |> snd - - let fail msg = fun counter -> (counter, Error msg) + let fail msg = fun counter -> counter, Error msg (** Монадические операторы *) module Syntax = struct let ( let* ) = ( >>= ) end -end \ No newline at end of file +end diff --git a/EML/lib/utils/utils.ml b/EML/lib/utils/utils.ml index efd4c99d..38c6ec9e 100644 --- a/EML/lib/utils/utils.ml +++ b/EML/lib/utils/utils.ml @@ -15,9 +15,12 @@ let is_tuple_pattern = function let rec extract_tuple_pattern_idents acc = function | PatVariable x -> x :: acc | PatTuple (p1, p2, rest) -> - let acc' = extract_tuple_pattern_idents acc p1 in - let acc'' = extract_tuple_pattern_idents acc' p2 in - List.fold_left rest ~f:(fun current_acc pat -> extract_tuple_pattern_idents current_acc pat) ~init:acc'' + let acc' = extract_tuple_pattern_idents acc p1 in + let acc'' = extract_tuple_pattern_idents acc' p2 in + List.fold_left + rest + ~f:(fun current_acc pat -> extract_tuple_pattern_idents current_acc pat) + ~init:acc'' | PatAny -> "_" :: acc | _ -> acc ;; @@ -25,4 +28,4 @@ let rec extract_tuple_pattern_idents acc = function let pattern_to_ident = function | PatVariable x -> Some x | _ -> None -;; \ No newline at end of file +;; diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index 6b663f19..0d390c6b 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -51,7 +51,6 @@ let%expect_test "001.ml" = ]|}] ;; - let%expect_test "003occurs.ml" = parse_and_anf "let fix f = (fun x -> f (fun f -> x x f)) (fun x -> f (fun f -> x x f))"; [%expect @@ -116,8 +115,7 @@ let%expect_test "003occurs.ml" = ;; let%expect_test "004let_poly.ml" = - parse_and_anf "let temp = - (fun f -> (f 1, f true)) (fun x -> x)"; + parse_and_anf "let temp =\n (fun f -> (f 1, f true)) (fun x -> x)"; [%expect {| [(AnfValue (NonRec, @@ -164,4 +162,3 @@ let%expect_test "002if.ml" = [])) ]|}] ;; - From 2d89e3aa8082a7a6fa4ef4645580d2c7c14680fa Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Tue, 18 Nov 2025 00:52:25 +0300 Subject: [PATCH 09/74] fix ocamlformat --- EML/bin/main.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EML/bin/main.ml b/EML/bin/main.ml index 302529ed..9fc57907 100644 --- a/EML/bin/main.ml +++ b/EML/bin/main.ml @@ -12,4 +12,4 @@ let parse_program input = | Error fail -> printf "Ошибка: %s\n" fail ;; -let () = parse_program "let x = 1 + 2" \ No newline at end of file +let () = parse_program "let x = 1 + 2" From 5e8d63865a52f0a1c38c8667a3f6ee0c7d824070 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Wed, 19 Nov 2025 22:24:27 +0300 Subject: [PATCH 10/74] Add anf_pretty_printer --- EML/lib/middleend/anf.ml | 155 ++++++++++++++++++++++++++++++++++++ EML/lib/middleend/anf_pp.ml | 149 ++++++++++++++++++++++++++++++++++ EML/tests/anf_tests.ml | 40 ++++++++++ 3 files changed, 344 insertions(+) create mode 100644 EML/lib/middleend/anf_pp.ml diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 4f2071d7..d9366e90 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -58,6 +58,151 @@ type anf_structure = type anf_program = anf_structure list [@@deriving show { with_path = false }] +(* Pretty-printer for ANF expressions *) +open Stdlib.Format + +let pp_ty = Frontend.Ast.pp_ty + +let rec pp_immediate fmt = function + | ImmediateConst c -> + (match c with + | ConstInt n -> fprintf fmt "%d" n + | ConstBool b -> fprintf fmt "%b" b + | ConstString s -> fprintf fmt "\"%s\"" s) + | ImmediateVar x -> fprintf fmt "%s" x + +and pp_complex_expr fmt = function + | ComplexImmediate imm -> pp_immediate fmt imm + | ComplexBinOper (op, e1, e2) -> + let op_str = + match op with + | Plus -> "+" + | Minus -> "-" + | Multiply -> "*" + | Division -> "/" + | And -> "&&" + | Or -> "||" + | GretestEqual -> ">=" + | LowestEqual -> "<=" + | GreaterThan -> ">" + | LowerThan -> "<" + | Equal -> "=" + | NotEqual -> "<>" + in + fprintf fmt "(%a %s %a)" pp_immediate e1 op_str pp_immediate e2 + | ComplexUnarOper (op, e) -> + let op_str = + match op with + | Negative -> "-" + | Not -> "not" + in + fprintf fmt "(%s %a)" op_str pp_immediate e + | ComplexTuple (e1, e2, rest) -> + let all_exprs = e1 :: e2 :: rest in + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) + all_exprs + | ComplexList exprs -> + fprintf + fmt + "[%a]" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) + exprs + | ComplexOption None -> fprintf fmt "None" + | ComplexOption (Some e) -> fprintf fmt "Some %a" pp_immediate e + | ComplexApp (f, arg, args) -> + let all_args = arg :: args in + fprintf + fmt + "%a %a" + pp_immediate + f + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_immediate) + all_args + | ComplexLambda (patterns, body) -> + let pp_pattern fmt pat = + match pat with + | PatVariable x -> fprintf fmt "%s" x + | PatConst c -> + (match c with + | ConstInt n -> fprintf fmt "%d" n + | ConstBool b -> fprintf fmt "%b" b + | ConstString s -> fprintf fmt "\"%s\"" s) + | PatTuple (p1, p2, rest) -> + let all_pats = p1 :: p2 :: rest in + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) + all_pats + | PatAny -> fprintf fmt "_" + | PatType (p, t) -> fprintf fmt "%a : %a" pp_pattern p pp_ty t + | PatUnit -> fprintf fmt "()" + | PatList pats -> + fprintf + fmt + "[%a]" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) + pats + | PatOption None -> fprintf fmt "None" + | PatOption (Some p) -> fprintf fmt "Some %a" pp_pattern p + in + fprintf + fmt + "fun %a -> %a" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_pattern) + patterns + pp_anf_expr + body + | ComplexBranch (cond, then_expr, else_expr) -> + fprintf + fmt + "if %a then %a else %a" + pp_immediate + cond + pp_anf_expr + then_expr + pp_anf_expr + else_expr + +and pp_anf_expr fmt = function + | AnfLet (rf, name, v, body) -> + let rec_flag = + match rf with + | Rec -> "rec " + | NonRec -> "" + in + fprintf fmt "let %s%s = %a in@ %a" rec_flag name pp_complex_expr v pp_anf_expr body + | AnfExpr e -> pp_complex_expr fmt e + +and pp_anf_bind fmt (name, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr + +and pp_anf_structure fmt = function + | AnfEval expr -> fprintf fmt "%a" pp_anf_expr expr + | AnfValue (rf, bind, binds) -> + let rec_flag = + match rf with + | Rec -> "rec " + | NonRec -> "" + in + let all_binds = bind :: binds in + fprintf + fmt + "let %s%a" + rec_flag + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@ and ") pp_anf_bind) + all_binds + +and pp_anf_program fmt program = + fprintf + fmt + "%a" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@\n\n") pp_anf_structure) + program +;; + let optimize_anf_let rf name1 v body = match rf, body with | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 @@ -193,3 +338,13 @@ let anf_program (program : program) : (anf_program, string) Result.t = in run program' ;; + +(* Function to convert ANF expression to string using the pretty-printer *) +let anf_to_string anf_program = Stdlib.Format.asprintf "%a" pp_anf_program anf_program +let string_of_anf_expr anf_expr = Stdlib.Format.asprintf "%a" pp_anf_expr anf_expr + +let string_of_complex_expr complex_expr = + Stdlib.Format.asprintf "%a" pp_complex_expr complex_expr +;; + +let string_of_immediate immediate = Stdlib.Format.asprintf "%a" pp_immediate immediate diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml new file mode 100644 index 00000000..796a54b4 --- /dev/null +++ b/EML/lib/middleend/anf_pp.ml @@ -0,0 +1,149 @@ +(* Pretty-printer for ANF expressions *) +open Stdlib.Format +open Frontend +open Ast +open Anf + +let pp_ty = Frontend.Ast.pp_ty + +let rec pp_immediate fmt = function + | ImmediateConst c -> + (match c with + | ConstInt n -> fprintf fmt "%d" n + | ConstBool b -> fprintf fmt "%b" b + | ConstString s -> fprintf fmt "\"%s\"" s) + | ImmediateVar x -> fprintf fmt "%s" x + +and pp_complex_expr fmt = function + | ComplexImmediate imm -> pp_immediate fmt imm + | ComplexBinOper (op, e1, e2) -> + let op_str = + match op with + | Plus -> "+" + | Minus -> "-" + | Multiply -> "*" + | Division -> "/" + | And -> "&&" + | Or -> "||" + | GretestEqual -> ">=" + | LowestEqual -> "<=" + | GreaterThan -> ">" + | LowerThan -> "<" + | Equal -> "=" + | NotEqual -> "<>" + in + fprintf fmt "(%a %s %a)" pp_immediate e1 op_str pp_immediate e2 + | ComplexUnarOper (op, e) -> + let op_str = + match op with + | Negative -> "-" + | Not -> "not" + in + fprintf fmt "(%s %a)" op_str pp_immediate e + | ComplexTuple (e1, e2, rest) -> + let all_exprs = e1 :: e2 :: rest in + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) + all_exprs + | ComplexList exprs -> + fprintf + fmt + "[%a]" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) + exprs + | ComplexOption None -> fprintf fmt "None" + | ComplexOption (Some e) -> fprintf fmt "Some %a" pp_immediate e + | ComplexApp (f, arg, args) -> + let all_args = arg :: args in + fprintf + fmt + "%a %a" + pp_immediate + f + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_immediate) + all_args + | ComplexLambda (patterns, body) -> + let pp_pattern fmt pat = + match pat with + | PatVariable x -> fprintf fmt "%s" x + | PatConst c -> + (match c with + | ConstInt n -> fprintf fmt "%d" n + | ConstBool b -> fprintf fmt "%b" b + | ConstString s -> fprintf fmt "\"%s\"" s) + | PatTuple (p1, p2, rest) -> + let all_pats = p1 :: p2 :: rest in + fprintf + fmt + "(%a)" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) + all_pats + | PatAny -> fprintf fmt "_" + | PatType (p, t) -> fprintf fmt "%a : %a" pp_pattern p pp_ty t + | PatUnit -> fprintf fmt "()" + | PatList pats -> + fprintf + fmt + "[%a]" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) + pats + | PatOption None -> fprintf fmt "None" + | PatOption (Some p) -> fprintf fmt "Some %a" pp_pattern p + in + fprintf + fmt + "fun %a -> %a" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_pattern) + patterns + pp_anf_expr + body + | ComplexBranch (cond, then_expr, else_expr) -> + fprintf + fmt + "if %a then %a else %a" + pp_immediate + cond + pp_anf_expr + then_expr + pp_anf_expr + else_expr + +and pp_anf_expr fmt = function + | AnfLet (rf, name, v, body) -> + let rec_flag = + match rf with + | Rec -> "rec " + | NonRec -> "" + in + fprintf fmt "let %s%s = %a in@ %a" rec_flag name pp_complex_expr v pp_anf_expr body + | AnfExpr e -> pp_complex_expr fmt e + +and pp_anf_bind fmt (name, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr + +and pp_anf_structure fmt = function + | AnfEval expr -> fprintf fmt "%a" pp_anf_expr expr + | AnfValue (rf, bind, binds) -> + let rec_flag = + match rf with + | Rec -> "rec " + | NonRec -> "" + in + let all_binds = bind :: binds in + fprintf + fmt + "let %s%a" + rec_flag + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@ and ") pp_anf_bind) + all_binds + +and pp_anf_program fmt program = + fprintf + fmt + "%a" + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@\n\n") pp_anf_structure) + program +;; + +let anf_to_string anf_program = Stdlib.Format.asprintf "%a" pp_anf_program anf_program diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index 0d390c6b..c17645e9 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -4,6 +4,7 @@ open EML_lib.Frontend.Parser open EML_lib.Middleend.Anf +open EML_lib.Middleend.Anf_pp let parse_and_anf input = match parse input with @@ -14,6 +15,15 @@ let parse_and_anf input = | Error e -> Printf.printf "Parsing error: %s\n" e ;; +let parse_and_anf_pp input = + match parse input with + | Ok ast -> + (match anf_program ast with + | Ok anf_ast -> Printf.printf "%s\n" (anf_to_string anf_ast) + | Error e -> Printf.printf "ANF error: %s\n" e) + | Error e -> Printf.printf "Parsing error: %s\n" e +;; + let%expect_test "001.ml" = parse_and_anf "let recfac n = if n<=1 then 1 else n * fac (n-1)"; [%expect @@ -162,3 +172,33 @@ let%expect_test "002if.ml" = [])) ]|}] ;; + +let%expect_test "pretty_printer_test1" = + parse_and_anf_pp + "let rec fac n = if n <= 1 then 1 else n * fac (n - 1)\n let main = fac 4"; + [%expect + {| + let rec fac = let anf_t6 = fun n -> let anf_t1 = (n <= 1) in + let anf_t2 = if anf_t1 then 1 else let anf_t3 = (n - 1) in + let anf_t4 = fac anf_t3 in let anf_t5 = (n * anf_t4) in anf_t5 in anf_t2 in + anf_t6 + + let main = let anf_t0 = fac 4 in + anf_t0 |}] +;; + +let%expect_test "pretty_printer_test2" = + parse_and_anf_pp + "let rec fibo = fun n -> if n < 1 then 1 else fibo (n-1) + fibo (n-2)\n\ + \ let main = fibo 10"; + [%expect + {| + let rec fibo = let anf_t8 = fun n -> let anf_t1 = (n < 1) in + let anf_t2 = if anf_t1 then 1 else let anf_t3 = (n - 1) in + let anf_t4 = fibo anf_t3 in let anf_t5 = (n - 2) in + let anf_t6 = fibo anf_t5 in let anf_t7 = (anf_t4 + anf_t6) in anf_t7 in + anf_t2 in anf_t8 + + let main = let anf_t0 = fibo 10 in + anf_t0|}] +;; From fde450279c4397fe4bb54f34aadca21293084b75 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Feb 2026 19:59:22 +0300 Subject: [PATCH 11/74] add new constructions in ast --- EML/lib/frontend/ast.ml | 19 +- EML/lib/frontend/inferencer.ml | 152 +++++++++---- EML/lib/frontend/parser.ml | 390 ++++++++++++++++++++++++++------- EML/lib/middleend/anf.ml | 47 +++- EML/lib/middleend/anf_pp.ml | 10 +- EML/tests/inferencer_tests.ml | 2 +- EML/tests/parser_tests.ml | 6 +- 7 files changed, 489 insertions(+), 137 deletions(-) diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml index 0bc9eba2..e43ac7cb 100644 --- a/EML/lib/frontend/ast.ml +++ b/EML/lib/frontend/ast.ml @@ -35,12 +35,13 @@ type const = | ConstInt of int (* Integer constant: Example - [21] *) | ConstBool of bool (* Boolean constant: Example - [true] or [false] *) | ConstString of string (* String constant: Example - "I like OCaml!" *) + | ConstChar of char (* Character constant: Example - ['a'] *) [@@deriving show { with_path = false }] type binder = int [@@deriving show { with_path = false }] type ty = - | TyVar of binder + | TyVar of ident | TyPrim of string | TyArrow of ty * ty | TyList of ty @@ -57,6 +58,7 @@ type pattern = | PatUnit | PatList of pattern list | PatOption of pattern option + | PatConstruct of ident * pattern option [@@deriving show { with_path = false }] type expr = @@ -67,11 +69,14 @@ type expr = | ExpUnarOper of unar_oper * expr (* ExpUnarOper(not, x)*) | ExpTuple of expr * expr * expr list (* ExpTuple[x1; x2 .. xn] *) | ExpList of expr list (* ExpList[x1; x2 .. xn] *) - | ExpLambda of pattern list * expr (* ExpLambda([x;y;z], x+y+z)*) + | ExpLambda of pattern * pattern list * expr | ExpTypeAnnotation of expr * ty | ExpLet of is_rec * bind * bind list * expr - | ExpFunction of expr * expr (* ExpFunction(x, y)*) + | ExpApply of expr * expr | ExpOption of expr option + | ExpFunction of bind * bind list + | ExpMatch of expr * bind * bind list + | ExpConstruct of ident * expr option [@@deriving show { with_path = false }] and bind = pattern * expr [@@deriving show { with_path = false }] @@ -83,9 +88,15 @@ type structure = type program = structure list [@@deriving show { with_path = false }] +let bin_op_list = [ "*"; "/"; "+"; "-"; "^"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] +let is_bin_op op = List.mem op bin_op_list +let is_operator opr = List.exists (fun s -> String.equal s opr) bin_op_list +let is_unary_minus op = op = "~-" + + let rec pp_ty fmt = function | TyPrim x -> fprintf fmt "%s" x - | TyVar x -> fprintf fmt "'%d" x + | TyVar x -> fprintf fmt "%s" x | TyArrow (l, r) -> (match l, r with | TyArrow _, _ -> fprintf fmt "(%a) -> %a" pp_ty l pp_ty r diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index 9b1b3352..ca7ecc79 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -9,7 +9,7 @@ open Ast open Stdlib.Format type error = - | OccursCheck of int * ty + | OccursCheck of string * ty | NoVariable of string | UnificationFailed of ty * ty | SeveralBounds of string @@ -19,7 +19,7 @@ type error = let pp_error fmt = function | OccursCheck (id, ty) -> - fprintf fmt "Occurs check failed. Type variable '%d occurs inside %a." id pp_ty ty + fprintf fmt "Occurs check failed. Type variable '%s' occurs inside %a." id pp_ty ty | NoVariable name -> fprintf fmt "Unbound variable '%s'." name | UnificationFailed (ty1, ty2) -> fprintf fmt "Failed to unify types: %a and %a." pp_ty ty1 pp_ty ty2 @@ -29,8 +29,8 @@ let pp_error fmt = function | UnexpectedFunction ty1 -> fprintf fmt "UnexpectedFunction error: %a" pp_ty ty1 ;; -module IntSet = struct - include Stdlib.Set.Make (Int) +module VarSet = struct + include Stdlib.Set.Make (String) end module ResultMonad : sig @@ -89,7 +89,7 @@ end module Type = struct let rec occurs_in var = function - | TyVar b -> b = var + | TyVar b -> String.equal b var | TyArrow (left, right) -> occurs_in var left || occurs_in var right | TyTuple types -> List.exists types ~f:(occurs_in var) | TyList ty -> occurs_in var ty @@ -99,14 +99,14 @@ module Type = struct let free_vars = let rec helper acc = function - | TyVar b -> IntSet.add b acc + | TyVar b -> VarSet.add b acc | TyArrow (left, right) -> helper (helper acc left) right | TyTuple types -> List.fold_left types ~init:acc ~f:helper | TyList ty -> helper acc ty | TyOption ty -> helper acc ty | TyPrim _ -> acc in - helper IntSet.empty + helper VarSet.empty ;; end @@ -114,8 +114,8 @@ module Substitution : sig type t val empty : t - val singleton : int -> ty -> t ResultMonad.t - val remove : t -> int -> t + val singleton : string -> ty -> t ResultMonad.t + val remove : t -> string -> t val apply : t -> ty -> ty val unify : ty -> ty -> t ResultMonad.t val compose : t -> t -> t ResultMonad.t @@ -124,9 +124,9 @@ end = struct open ResultMonad open ResultMonad.Syntax - type t = (int, ty, Int.comparator_witness) Map.t + type t = (string, ty, String.comparator_witness) Map.t - let empty = Map.empty (module Int) + let empty = Map.empty (module String) let mapping key value = if Type.occurs_in key value @@ -136,7 +136,7 @@ end = struct let singleton key value = let* key, value = mapping key value in - return (Map.singleton (module Int) key value) + return (Map.singleton (module String) key value) ;; let find = Map.find @@ -161,7 +161,7 @@ end = struct match left, right with | TyPrim l, TyPrim r when String.equal l r -> return empty | TyPrim _, TyPrim _ -> fail (UnificationFailed (left, right)) - | TyVar l, TyVar r when l = r -> return empty + | TyVar l, TyVar r when String.equal l r -> return empty | TyVar b, ty | ty, TyVar b -> singleton b ty | TyArrow (left1, right1), TyArrow (left2, right2) -> let* subst1 = unify left1 left2 in @@ -208,15 +208,15 @@ end = struct end module Scheme = struct - type t = S of IntSet.t * ty + type t = Scheme of VarSet.t * ty - let free_vars (S (vars, ty)) = IntSet.diff (Type.free_vars ty) vars + let free_vars (Scheme (vars, ty)) = VarSet.diff (Type.free_vars ty) vars - let apply subst (S (vars, ty)) = + let apply subst (Scheme (vars, ty)) = let subst2 = - IntSet.fold (fun key subst -> Substitution.remove subst key) vars subst + VarSet.fold (fun key subst -> Substitution.remove subst key) vars subst in - S (vars, Substitution.apply subst2 ty) + Scheme (vars, Substitution.apply subst2 ty) ;; end @@ -225,9 +225,9 @@ module TypeEnv = struct let extend env key value = Map.update env key ~f:(fun _ -> value) - let free_vars : t -> IntSet.t = - Map.fold ~init:IntSet.empty ~f:(fun ~key:_ ~data:scheme acc -> - IntSet.union acc (Scheme.free_vars scheme)) + let free_vars : t -> VarSet.t = + Map.fold ~init:VarSet.empty ~f:(fun ~key:_ ~data:scheme acc -> + VarSet.union acc (Scheme.free_vars scheme)) ;; let apply subst env = Map.map env ~f:(Scheme.apply subst) @@ -238,24 +238,24 @@ module TypeEnv = struct empty (module String) |> set ~key:"print_int" - ~data:(Scheme.S (IntSet.empty, TyArrow (TyPrim "int", TyPrim "unit"))) + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "int", TyPrim "unit"))) |> set ~key:"print_endline" - ~data:(Scheme.S (IntSet.empty, TyArrow (TyPrim "string", TyPrim "unit"))) + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "string", TyPrim "unit"))) |> set ~key:"print_bool" - ~data:(Scheme.S (IntSet.empty, TyArrow (TyPrim "bool", TyPrim "unit"))) + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "bool", TyPrim "unit"))) ;; end open ResultMonad open ResultMonad.Syntax -let fresh_var = fresh >>| fun n -> TyVar n +let fresh_var = fresh >>| fun n -> TyVar ("t" ^ Int.to_string n) let instantiate : Scheme.t -> ty ResultMonad.t = - fun (S (vars, ty)) -> - IntSet.fold + fun (Scheme (vars, ty)) -> + VarSet.fold (fun var typ -> let* typ = typ in let* fresh_ty = fresh_var in @@ -266,14 +266,15 @@ let instantiate : Scheme.t -> ty ResultMonad.t = ;; let generalize env ty = - let free = IntSet.diff (Type.free_vars ty) (TypeEnv.free_vars env) in - Scheme.S (free, ty) + let free = VarSet.diff (Type.free_vars ty) (TypeEnv.free_vars env) in + Scheme.Scheme (free, ty) ;; let infer_const = function | ConstInt _ -> TyPrim "int" | ConstBool _ -> TyPrim "bool" | ConstString _ -> TyPrim "string" + | ConstChar _ -> TyPrim "char" ;; let rec infer_pattern env = function @@ -283,7 +284,7 @@ let rec infer_pattern env = function | PatConst const -> return (Substitution.empty, infer_const const, env) | PatVariable var -> let* fresh = fresh_var in - let env = TypeEnv.extend env var (Scheme.S (IntSet.empty, fresh)) in + let env = TypeEnv.extend env var (Scheme.Scheme (VarSet.empty, fresh)) in return (Substitution.empty, fresh, env) | PatTuple (first_pat, second_pat, rest_pats) -> let* sub_first, type_first, env_first = infer_pattern env first_pat in @@ -340,6 +341,15 @@ let rec infer_pattern env = function , Substitution.apply total_subst annotated_ty , TypeEnv.apply total_subst env ) | PatUnit -> return (Substitution.empty, TyPrim "unit", env) + | PatConstruct (name, opt) -> + (match name, opt with + | "None", None -> + let* fresh = fresh_var in + return (Substitution.empty, TyOption fresh, env) + | "Some", Some p -> + let* sub, typ, env' = infer_pattern env p in + return (sub, TyOption typ, env') + | _ -> fail (RHS ("Unknown constructor: " ^ name))) ;; let infer_binop_type = function @@ -482,7 +492,7 @@ let rec infer_expr env = function | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") in let* tv = fresh_var in - let env2 = TypeEnv.extend env x (S (IntSet.empty, tv)) in + let env2 = TypeEnv.extend env x (Scheme.Scheme (VarSet.empty, tv)) in let* subst1, ty1 = infer_expr env2 expr1 in let* subst2 = Substitution.unify (Substitution.apply subst1 tv) ty1 in let* subst_total = Substitution.compose subst1 subst2 in @@ -521,7 +531,8 @@ let rec infer_expr env = function let* subst2, ty2 = infer_expr env_ext expr2 in let* total_subst = Substitution.compose subst_acc subst2 in return (total_subst, ty2) - | ExpLambda (patterns, body) -> + | ExpLambda (pat, pats, body) -> + let patterns = pat :: pats in let* env, pat_types = List.fold_left patterns @@ -539,15 +550,39 @@ let rec infer_expr env = function (List.rev pat_types) in return (subst_body, arrow_type) - | ExpFunction (param, body) -> - let* subst1, ty1 = infer_expr env param in - let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env) body in + | ExpApply (func, arg) -> + let* subst1, ty_func = infer_expr env func in + let* subst2, ty_arg = infer_expr (TypeEnv.apply subst1 env) arg in let* tv = fresh_var in let* subst3 = - Substitution.unify (Substitution.apply subst2 ty1) (TyArrow (ty2, tv)) + Substitution.unify + (Substitution.apply subst2 ty_func) + (TyArrow (ty_arg, tv)) in let* total_subst = Substitution.compose_all [ subst3; subst2; subst1 ] in return (total_subst, Substitution.apply total_subst tv) + | ExpFunction ((pat, body), rest_cases) -> + (match rest_cases with + | [] -> + let patterns = [ pat ] in + let* env', pat_types = + List.fold_left + patterns + ~init:(return (env, [])) + ~f:(fun acc p -> + let* env_acc, pat_types = acc in + let* _, typ, env_new = infer_pattern env_acc p in + return (env_new, typ :: pat_types)) + in + let* subst_body, ty_body = infer_expr env' body in + let arrow_type = + List.fold_right + ~f:(fun pt acc -> TyArrow (Substitution.apply subst_body pt, acc)) + ~init:ty_body + (List.rev pat_types) + in + return (subst_body, arrow_type) + | _ -> fail (RHS "Multiple function cases not yet supported")) | ExpOption opt_expr -> (match opt_expr with | Some expr -> @@ -561,6 +596,49 @@ let rec infer_expr env = function let* subst2 = Substitution.unify ty1 (Substitution.apply subst1 t) in let* total_subst = Substitution.compose subst1 subst2 in return (total_subst, Substitution.apply subst2 ty1) + | ExpMatch (scrut, (pat, expr), bind_list) -> + let* subst_scrut, ty_scrut = infer_expr env scrut in + let all_cases = (pat, expr) :: bind_list in + let* final_subst, ty_res = + List.fold_left + all_cases + ~init:(return (subst_scrut, None)) + ~f:(fun acc (pat', expr') -> + let* sub_acc, ty_res_opt = acc in + let env' = TypeEnv.apply sub_acc env in + let* sub_pat, ty_pat, env_pat = infer_pattern env' pat' in + let* sub_u = + Substitution.unify + (Substitution.apply sub_pat (Substitution.apply sub_acc ty_scrut)) + ty_pat + in + let* sub_comp = Substitution.compose sub_u sub_pat in + let* sub_expr, ty_branch = + infer_expr (TypeEnv.apply sub_comp env_pat) expr' + in + let* sub_total = + Substitution.compose_all [ sub_expr; sub_comp; sub_acc ] + in + let ty_branch' = Substitution.apply sub_total ty_branch in + (match ty_res_opt with + | None -> return (sub_total, Some ty_branch') + | Some ty_prev -> + let* sub_merge = Substitution.unify ty_prev ty_branch' in + let* sub_final = Substitution.compose sub_total sub_merge in + return (sub_final, Some (Substitution.apply sub_merge ty_prev)))) + in + (match ty_res with + | Some t -> return (final_subst, t) + | None -> fail (RHS "Empty match")) + | ExpConstruct (name, opt_expr) -> + (match name, opt_expr with + | "None", None -> + let* tv = fresh_var in + return (Substitution.empty, TyOption tv) + | "Some", Some e -> + let* subst, ty = infer_expr env e in + return (subst, TyOption ty) + | _ -> fail (RHS ("Unknown constructor: " ^ name))) ;; let infer_structure_item env = function @@ -575,7 +653,7 @@ let infer_structure_item env = function | _ -> fail (RHS "Right-hand side of let rec must be a lambda expression") in let* tv = fresh_var in - let env = TypeEnv.extend env x (S (IntSet.empty, tv)) in + let env = TypeEnv.extend env x (Scheme.Scheme (VarSet.empty, tv)) in let* subst, ty = infer_expr env expr in let* subst2 = Substitution.unify (Substitution.apply subst tv) ty in let* composed_subst = Substitution.compose subst subst2 in diff --git a/EML/lib/frontend/parser.ml b/EML/lib/frontend/parser.ml index 6d2a1d3b..473d779e 100644 --- a/EML/lib/frontend/parser.ml +++ b/EML/lib/frontend/parser.ml @@ -18,7 +18,10 @@ let is_keyword = function | "true" | "false" | "Some" - | "and" -> true + | "and" + | "function" + | "None" + | "with" -> true | _ -> false ;; @@ -42,12 +45,52 @@ let token s = white_space *> string s let token1 s = white_space *> s let parse_parens p = token "(" *> p <* token ")" +let is_separator = function + | ')' + | '(' + | '<' + | '>' + | '@' + | ',' + | ';' + | ':' + | '\\' + | '"' + | '/' + | '[' + | ']' + | '?' + | '=' + | '{' + | '}' + | ' ' + | '\r' + | '\t' + | '\n' + | '*' + | '-' -> true + | _ -> false +;; + +let token2 str = + token str + *> peek_char + >>= function + | Some c when is_separator c -> return str <* white_space + | None -> return str <* white_space + | _ -> fail (Printf.sprintf "There is no separator after %S." str) +;; + let parse_const_int = let sign = choice [ token "" ] in let num = take_while1 Char.is_digit in lift2 (fun s n -> ConstInt (Int.of_string (s ^ n))) sign num ;; +let parse_const_char = + string "\'" *> any_char <* string "\'" >>| fun char_value -> ConstChar char_value +;; + let parse_const_bool = choice [ token "true" *> return (ConstBool true); token "false" *> return (ConstBool false) ] @@ -57,8 +100,13 @@ let parse_const_string = token "\"" *> take_till (Char.equal '\"') <* token "\"" >>| fun s -> ConstString s ;; -let parse_const = choice [ parse_const_int; parse_const_bool; parse_const_string ] -let parse_unar_oper = choice [ token "-" *> return Negative; token "not" *> return Not ] + +let parse_const = + white_space *> choice [ parse_const_int; parse_const_char; parse_const_string; parse_const_bool ] +;; +let parse_unar_oper = + choice [ token "-" *> return Negative; token "not" *> return Not ] + let parse_ident = let parse_first_char = @@ -69,8 +117,16 @@ let parse_ident = take_while (fun ch -> is_lowercase ch || is_uppercase ch || is_digit ch || Char.equal ch '_') in - token1 @@ lift2 ( ^ ) parse_first_char parse_other_chars - >>= fun s -> if is_keyword s then fail "It is not identifier" else return s + let parse_regular_ident = + token1 @@ lift2 ( ^ ) parse_first_char parse_other_chars + >>= fun s -> if is_keyword s then fail "It is not identifier" else return s + in + let parse_op_ident = + white_space *> char '(' *> white_space + *> choice (List.map Ast.bin_op_list ~f:(fun opr -> token opr *> return opr)) + <* white_space <* char ')' + in + parse_regular_ident <|> parse_op_ident ;; let parse_base_type = @@ -79,6 +135,7 @@ let parse_base_type = ; token "bool" *> return (TyPrim "bool") ; token "string" *> return (TyPrim "string") ; token "unit" *> return (TyPrim "unit") + ; token "char" *> return (TyPrim "char") ] ;; @@ -89,10 +146,25 @@ let rec parse_type_list t = *> (parse_type_list (return (TyList base)) <|> return (TyList base)) ;; +let parse_tuple_type parse_type = + let* fst_type = parse_type in + let* snd_type = token "*" *> parse_type in + let* type_list = many (token "*" *> parse_type) in + return (TyTuple (fst_type :: snd_type :: type_list)) +;; + +let rec parse_arrow_type parse_type = + let* type1 = parse_type in + let* type2 = token "->" *> (parse_arrow_type parse_type <|> parse_type) in + return (TyArrow (type1, type2)) +;; + + let parse_type = let base_type = parse_base_type in let list_type = parse_type_list base_type <|> base_type in - list_type + let tuple_type = parse_tuple_type list_type <|> list_type in + parse_arrow_type tuple_type <|> tuple_type ;; let parse_pattern_with_type parse_pattern = @@ -105,7 +177,7 @@ let parse_pattern_with_type parse_pattern = let parse_pattern_var = parse_ident >>| fun id -> PatVariable id let parse_pattern_const = parse_const >>| fun c -> PatConst c -let parse_pattern_any = token "_" *> return PatAny +let parse_pattern_any = token2 "_" *> return PatAny let parse_pattern_tuple parse_pattern = let parse_unparenthesized = @@ -119,40 +191,69 @@ let parse_pattern_tuple parse_pattern = parse_parens parse_unparenthesized <|> parse_unparenthesized ;; -let parse_pattern_list parse_pattern = - let semicols = token ";" in - token "[" *> (sep_by semicols parse_pattern >>| fun patterns -> PatList patterns) - <* token "]" +let parse_keyword = + choice [ token "true"; token "false"; token "None"; token "()" ] +;; + +let parse_option parse = + let* tag = token2 "Some" in + let* opt = parse >>| Option.some in + return (tag, opt) ;; -let parse_pattern_empty = token "()" *> return PatUnit -let parse_pattern_option parse_pattern = - lift - (fun e -> PatOption e) - (token "Some" *> parse_pattern - >>| (fun e -> Some e) - <|> (token "None" >>| fun _ -> None)) + +let parse_construct parse construct func = + token "[" *> sep_by (token ";") parse <* token "]" + >>| List.fold_right ~init:(construct ("[]", None)) ~f:func +;; + +let parse_list parse construct tuple = + let rec go acc = + (token "::" *> parse >>= fun elem -> go elem >>| fun rest -> construct ("::", Some (tuple (acc, rest, [])))) + <|> return acc + in + parse >>= go +;; + +let parse_pattern_construct parse_elem parse_pat = + choice + [ parse_option (parse_elem <|> parse_parens parse_pat) + >>| (fun (t, p) -> PatConstruct (t, p)) + ; parse_construct parse_elem (fun (t, p) -> PatConstruct (t, p)) + (fun p acc -> PatConstruct ("::", Some (PatTuple (p, acc, [])))) + ; parse_list parse_elem (fun (t, p) -> PatConstruct (t, p)) (fun (a, b, c) -> PatTuple (a, b, c)) + ] +;; + + +let parse_base_pat = + choice + [ parse_pattern_any + ; parse_pattern_var + ; parse_pattern_const + ; parse_keyword >>| (fun tag -> PatConstruct (tag, None)) + ] ;; let parse_pattern = - fix (fun pat -> - let atom = - choice - [ parse_pattern_var - ; parse_pattern_any - ; parse_pattern_const - ; parse_pattern_empty - ; parse_pattern_with_type pat - ; parse_parens pat - ; parse_pattern_option pat - ] - in - let tuple = parse_pattern_tuple atom <|> atom in - let lst = parse_pattern_list tuple <|> tuple in - lst) + white_space + *> fix (fun pat -> + let atom = + choice + [ parse_base_pat + ; parse_pattern_construct parse_base_pat pat + ; parse_pattern_with_type pat + ; parse_parens pat + ] + in + let tuple = parse_pattern_construct atom pat <|> atom in + let lst = parse_pattern_construct tuple pat <|> tuple in + parse_pattern_tuple lst <|> lst) ;; + + let parse_left_associative expr oper = let rec go acc = lift2 (fun f x -> f acc x) oper expr >>= go <|> return acc in expr >>= go @@ -196,31 +297,49 @@ let parse_expr_branch parse_expr = (option None (token "else" *> parse_expr >>| Option.some)) ;; -let parse_expr_option parse_expr = - choice - [ token "None" *> return (ExpOption None) - ; (token "Some" *> choice [ parse_parens parse_expr; parse_expr ] - >>| fun e -> ExpOption (Some e)) - ] -;; - let parse_expr_unar_oper parse_expr = parse_unar_oper >>= fun op -> parse_expr >>= fun expr -> return (ExpUnarOper (op, expr)) ;; let parse_expr_list parse_expr = - let parse_elements = sep_by (token ";") parse_expr in - token "[" *> parse_elements <* token "]" >>| fun elements -> ExpList elements + parse_list + parse_expr + (fun (tag, exp_opt) -> ExpConstruct (tag, exp_opt)) + (fun (fst_exp, snd_exp, exp_list) -> ExpTuple (fst_exp, snd_exp, exp_list)) ;; -let parse_expr_function e = - parse_left_associative e (return (fun e1 e2 -> ExpFunction (e1, e2))) +let parse_expr_apply e = + parse_left_associative e (return (fun e1 e2 -> ExpApply (e1, e2))) ;; let parse_expr_lambda parse_expr = - token "fun" *> sep_by1 white_space parse_pattern + token2 "fun" + *> sep_by1 white_space parse_pattern <* token "->" - >>= fun params -> parse_expr >>| fun body -> ExpLambda (params, body) + >>= fun params -> + parse_expr + >>| fun body -> + match params with + | pat :: pats -> ExpLambda (pat, pats, body) + | [] -> body +;; + +let parse_case parse_expr = + white_space *> option () (token "|" *> return ()) + *> lift2 (fun pat exp -> (pat, exp)) parse_pattern (token "->" *> parse_expr) +;; + +let parse_expr_function parse_expr = + token2 "function" + *> + let* case_list = sep_by1 (token "|") (parse_case parse_expr) in + return (ExpFunction (List.hd_exn case_list, List.drop case_list 1)) +;; + +let parse_expr_match parse_expr = + let* exp = token2 "match" *> parse_expr <* token2 "with" in + let* case_list = sep_by1 (token "|") (parse_case parse_expr) in + return (ExpMatch (exp, List.hd_exn case_list, List.drop case_list 1)) ;; let parse_expr_tuple parse_expr = @@ -238,9 +357,95 @@ let parse_expr_tuple parse_expr = let parse_body parse_expr = many1 parse_pattern - >>= fun patterns -> token "=" *> parse_expr >>| fun body -> ExpLambda (patterns, body) + >>= fun patterns -> + token "=" *> parse_expr + >>| fun body -> + match patterns with + | pat :: pats -> ExpLambda (pat, pats, body) + | [] -> body +;; + + +let parse_expr_sequence parse_expr = + parse_left_associative + parse_expr + (token ";" *> return (fun exp1 exp2 -> + ExpLet (NonRec, (PatUnit, exp1), [], exp2))) +;; + +let parse_expr_construct parse_expr = + let cons_one exp acc = ExpConstruct ("::", Some (ExpTuple (exp, acc, []))) in + let rec unfold_sequence = function + | ExpLet (NonRec, (PatUnit, e1), [], e2) -> + let rest, last = unfold_sequence e2 in + (e1 :: rest, last) + | e -> ([], e) + in + let rec fold_elem (from_parens, exp) acc = + if from_parens then cons_one exp acc + else + match exp with + | ExpLet (NonRec, (PatUnit, e1), [], e2) -> + let rest, last = unfold_sequence e2 in + let acc' = fold_elem (false, last) acc in + let acc'' = List.fold_right rest ~init:acc' ~f:(fun e a -> fold_elem (false, e) a) in + fold_elem (false, e1) acc'' + | _ -> cons_one exp acc + in + let elem_parser = + (parse_parens (parse_expr_sequence parse_expr) >>| fun exp -> true, exp) + <|> (parse_expr >>| fun exp -> false, exp) + in + parse_construct elem_parser (fun (t, e) -> ExpConstruct (t, e)) fold_elem +;; + +let parse_annotated_rhs parse_expr opr = + (token ":" *> parse_type) >>= fun t -> + (token opr *> parse_expr) >>| fun expr -> ExpTypeAnnotation (expr, t) +;; + +let parse_fun_binding parse_expr = + let* id = parse_pattern_var in + let* params = many1 parse_pattern in + let pat = List.hd_exn params and pats = List.drop params 1 in + let mk_body body = ExpLambda (pat, pats, body) in + choice + [ (parse_annotated_rhs parse_expr "=" >>= function + | ExpTypeAnnotation (expr, t) -> + return (PatType (id, t), mk_body expr) + | _ -> fail "expected type annotation") + ; token "=" *> parse_expr >>| fun expr -> (id, mk_body expr) + ] +;; + +let parse_simple_binding parse_expr = + let* pat = parse_pattern in + choice + [ (parse_annotated_rhs parse_expr "=" >>= function + | ExpTypeAnnotation (expr, t) -> + return (PatType (pat, t), expr) + | _ -> fail "expected type annotation") + ; token "=" *> parse_expr >>| fun expr -> (pat, expr) + ] +;; + +let parse_value_binding_list parse_expr = + let parse_binding = parse_fun_binding parse_expr <|> parse_simple_binding parse_expr in + sep_by1 (token2 "and") (white_space *> parse_binding) +;; + + +let parse_base_expr = + choice [ parse_expr_ident; parse_expr_const; parse_keyword >>| fun tag -> ExpConstruct (tag, None) ] +;; + +let parse_expr_construct_keyword_some parse_expr = + parse_option (parse_base_expr <|> parse_parens parse_expr) + >>| fun (tag, exp_opt) -> ExpConstruct (tag, exp_opt) ;; + + let parse_expr_let parse_expr = token "let" *> lift4 @@ -260,53 +465,72 @@ let parse_expr_let parse_expr = (token "in" *> parse_expr) ;; +let parse_top_expr parse_expr = + choice + [ parse_expr_let parse_expr + ; parse_expr_function parse_expr + ; parse_expr_lambda parse_expr + ; parse_expr_match parse_expr + ; parse_expr_branch parse_expr + ] +;; + +let parse_exp_apply e = + let app = parse_expr_apply e in + let app = parse_expr_unar_oper app <|> app in + let ops1 = parse_left_associative app (multiply <|> division) in + let ops2 = parse_left_associative ops1 (plus <|> minus) in + let cmp = parse_left_associative ops2 compare in + parse_left_associative cmp (and_op <|> or_op) +;; + let parse_expr = - fix (fun expr -> - let term = + white_space + *> fix (fun expr -> + let term = + choice + [ parse_base_expr + ; parse_expr_construct_keyword_some expr + ; parse_parens (parse_expr_with_type expr) + ; parse_expr_construct expr + ; parse_top_expr expr + ; parse_parens expr + ] + in + let func = parse_exp_apply term <|> term in + let lst = parse_expr_list func <|> func in + let tuple = parse_expr_tuple lst <|> lst in + let seq = parse_expr_sequence tuple <|> tuple in + let lambda = parse_expr_lambda expr <|> seq in choice - [ parse_expr_ident - ; parse_expr_const - ; parse_expr_list expr - ; parse_parens expr - ; parse_parens (parse_expr_with_type expr) - ] - in - let func = parse_expr_function term in - let cons = parse_expr_option func <|> func in - let ife = parse_expr_branch expr <|> cons in - let unops = parse_expr_unar_oper ife <|> ife in - let ops1 = parse_left_associative unops (multiply <|> division) in - let ops2 = parse_left_associative ops1 (plus <|> minus) in - let cmp = parse_left_associative ops2 compare in - let boolean = parse_left_associative cmp (and_op <|> or_op) in - let tuple = parse_expr_tuple boolean <|> boolean in - let lambda = parse_expr_lambda expr <|> tuple in - choice [ parse_expr_let expr; parse_expr_lambda expr; lambda ]) + [ parse_expr_let expr + ; parse_expr_function expr + ; parse_expr_lambda expr + ; parse_expr_match expr + ; parse_expr_branch expr + ; lambda + ]) ;; let parse_structure = let parse_eval = parse_expr >>| fun e -> SEval e in let parse_value = token "let" - *> lift3 - (fun r id id_list -> SValue (r, id, id_list)) - (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) <|> return NonRec) - (lift2 - (fun pat expr -> pat, expr) - parse_pattern - (token "=" *> parse_expr <|> parse_body parse_expr)) - (many - (token "and" - *> lift2 - (fun pat expr -> pat, expr) - parse_pattern - (token "=" *> parse_expr <|> parse_body parse_expr))) + *> lift2 + (fun r id_list -> + SValue (r, List.hd_exn id_list, List.drop id_list 1)) + (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) + <|> return NonRec) + (parse_value_binding_list parse_expr) in - choice [ parse_eval; parse_value ] + let parse_structure_item = choice [ parse_eval; parse_value ] in + parse_structure_item <* option () (token ";;" >>| ignore) ;; let parse_program = - let definitions_or_exprs = many parse_structure <* option () (token ";;" >>| ignore) in + let definitions_or_exprs = + white_space *> many parse_structure <* option () (token ";;" >>| ignore) + in definitions_or_exprs <* white_space ;; diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index d9366e90..38a4a9ce 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -68,7 +68,8 @@ let rec pp_immediate fmt = function (match c with | ConstInt n -> fprintf fmt "%d" n | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s) + | ConstString s -> fprintf fmt "\"%s\"" s + | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) | ImmediateVar x -> fprintf fmt "%s" x and pp_complex_expr fmt = function @@ -129,7 +130,8 @@ and pp_complex_expr fmt = function (match c with | ConstInt n -> fprintf fmt "%d" n | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s) + | ConstString s -> fprintf fmt "\"%s\"" s + | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) | PatTuple (p1, p2, rest) -> let all_pats = p1 :: p2 :: rest in fprintf @@ -148,6 +150,10 @@ and pp_complex_expr fmt = function pats | PatOption None -> fprintf fmt "None" | PatOption (Some p) -> fprintf fmt "Some %a" pp_pattern p + | PatConstruct (name, opt) -> + (match opt with + | None -> fprintf fmt "%s" name + | Some p -> fprintf fmt "%s %a" name pp_pattern p) in fprintf fmt @@ -285,21 +291,48 @@ let rec anf (e : expr) (k : immediate -> anf_expr t) : anf_expr t = let* var_name = fresh in return (AnfLet (NonRec, var_name, complex_expr_body, e2_anf))) else fail "Complex patterns in let bindings not yet supported" - | ExpLambda (patterns, body) -> + | ExpLambda (pat, pats, body) -> + let patterns = pat :: pats in let* body_anf = anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in let* var_name = fresh in let* cont_expr = k (ImmediateVar var_name) in return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) - | ExpFunction (func, arg) -> - anf func (fun immediate_func -> - anf arg (fun immediate_arg -> + | ExpFunction ((pat, body), rest_cases) -> + (match rest_cases with + | [] -> + let patterns = [ pat ] in + let* body_anf = + anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + in + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) + | _ -> fail "ExpFunction: multiple cases not yet supported") + | ExpApply (func, arg) -> + anf func (fun imm_f -> + anf arg (fun imm_arg -> let* var_name = fresh in let* cont_expr = k (ImmediateVar var_name) in return (AnfLet - (NonRec, var_name, ComplexApp (immediate_func, immediate_arg, []), cont_expr)))) + (NonRec, var_name, ComplexApp (imm_f, imm_arg, []), cont_expr)))) + | ExpMatch _ -> fail "ExpMatch not yet supported" + | ExpConstruct (name, opt_expr) -> + (match name, opt_expr with + | "None", None -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return (AnfLet (NonRec, var_name, ComplexOption None, cont_expr)) + | "Some", Some e -> + anf e (fun immediate -> + let* var_name = fresh in + let* cont_expr = k (ImmediateVar var_name) in + return + (AnfLet + (NonRec, var_name, ComplexOption (Some immediate), cont_expr))) + | _ -> fail "ExpConstruct: only None/Some supported") | ExpTypeAnnotation (e, _) -> anf e k and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml index 796a54b4..2ef63f9b 100644 --- a/EML/lib/middleend/anf_pp.ml +++ b/EML/lib/middleend/anf_pp.ml @@ -11,7 +11,8 @@ let rec pp_immediate fmt = function (match c with | ConstInt n -> fprintf fmt "%d" n | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s) + | ConstString s -> fprintf fmt "\"%s\"" s + | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) | ImmediateVar x -> fprintf fmt "%s" x and pp_complex_expr fmt = function @@ -72,7 +73,8 @@ and pp_complex_expr fmt = function (match c with | ConstInt n -> fprintf fmt "%d" n | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s) + | ConstString s -> fprintf fmt "\"%s\"" s + | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) | PatTuple (p1, p2, rest) -> let all_pats = p1 :: p2 :: rest in fprintf @@ -91,6 +93,10 @@ and pp_complex_expr fmt = function pats | PatOption None -> fprintf fmt "None" | PatOption (Some p) -> fprintf fmt "Some %a" pp_pattern p + | PatConstruct (name, opt) -> + (match opt with + | None -> fprintf fmt "%s" name + | Some p -> fprintf fmt "%s %a" name pp_pattern p) in fprintf fmt diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 06c0921a..2b4f49e4 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -15,7 +15,7 @@ let pretty_printer_parse_and_infer s = Base.Map.filter_keys env ~f:(fun key -> not (List.mem key [ "print_int"; "print_endline"; "print_bool" ])) in - Base.Map.iteri filtered_env ~f:(fun ~key ~data:(S (_, ty)) -> + Base.Map.iteri filtered_env ~f:(fun ~key ~data:(Scheme.Scheme (_, ty)) -> Format.printf "val %s: %a\n" key pp_ty ty) | Error e -> Format.printf "Infer error. %a\n" pp_error e) | Error e -> Format.printf "Parsing error. %s\n" e diff --git a/EML/tests/parser_tests.ml b/EML/tests/parser_tests.ml index f203c74e..67e6349d 100644 --- a/EML/tests/parser_tests.ml +++ b/EML/tests/parser_tests.ml @@ -25,7 +25,7 @@ let main = fac 4 |}; {| [(SValue (Rec, ((PatVariable "fac"), - (ExpLambda ([(PatVariable "n")], + (ExpLambda ((PatVariable "n"), [], (ExpBranch ( (ExpBinOper (LowestEqual, (ExpIdent "n"), (ExpConst (ConstInt 1)) )), @@ -37,7 +37,7 @@ let main = fac 4 |}; [], (ExpLet (NonRec, ((PatVariable "m"), - (ExpFunction ((ExpIdent "fac"), (ExpIdent "n1")))), + (ExpApply ((ExpIdent "fac"), (ExpIdent "n1")))), [], (ExpBinOper (Multiply, (ExpIdent "n"), (ExpIdent "m") )) @@ -48,7 +48,7 @@ let main = fac 4 |}; [])); (SValue (NonRec, ((PatVariable "main"), - (ExpFunction ((ExpIdent "fac"), (ExpConst (ConstInt 4))))), + (ExpApply ((ExpIdent "fac"), (ExpConst (ConstInt 4))))), [])) ] |}] From c3a6f28bffb988bf0d77b875330c875eb1c4d089 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 15 Feb 2026 14:48:18 +0300 Subject: [PATCH 12/74] update anf --- EML/lib/middleend/anf.ml | 442 +++++++++++++----------------------- EML/lib/middleend/anf_pp.ml | 2 + EML/tests/anf_tests.ml | 154 ++++++------- 3 files changed, 225 insertions(+), 373 deletions(-) diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 38a4a9ce..1b5dd6f9 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -1,7 +1,13 @@ -open Frontend -open Ast +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Frontend.Ast open Base -open Utils module ANFMonad = struct type 'a t = int -> int * ('a, string) Result.t @@ -26,7 +32,6 @@ end open ANFMonad open ANFMonad.Syntax - type immediate = | ImmediateConst of const | ImmediateVar of ident @@ -34,9 +39,11 @@ type immediate = type complex_expr = | ComplexImmediate of immediate + | ComplexUnit | ComplexBinOper of bin_oper * immediate * immediate | ComplexUnarOper of unar_oper * immediate | ComplexTuple of immediate * immediate * immediate list + | ComplexField of immediate * int | ComplexList of immediate list | ComplexOption of immediate option | ComplexApp of immediate * immediate * immediate list @@ -58,282 +65,147 @@ type anf_structure = type anf_program = anf_structure list [@@deriving show { with_path = false }] -(* Pretty-printer for ANF expressions *) -open Stdlib.Format -let pp_ty = Frontend.Ast.pp_ty -let rec pp_immediate fmt = function - | ImmediateConst c -> - (match c with - | ConstInt n -> fprintf fmt "%d" n - | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s - | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) - | ImmediateVar x -> fprintf fmt "%s" x -and pp_complex_expr fmt = function - | ComplexImmediate imm -> pp_immediate fmt imm - | ComplexBinOper (op, e1, e2) -> - let op_str = - match op with - | Plus -> "+" - | Minus -> "-" - | Multiply -> "*" - | Division -> "/" - | And -> "&&" - | Or -> "||" - | GretestEqual -> ">=" - | LowestEqual -> "<=" - | GreaterThan -> ">" - | LowerThan -> "<" - | Equal -> "=" - | NotEqual -> "<>" - in - fprintf fmt "(%a %s %a)" pp_immediate e1 op_str pp_immediate e2 - | ComplexUnarOper (op, e) -> - let op_str = - match op with - | Negative -> "-" - | Not -> "not" - in - fprintf fmt "(%s %a)" op_str pp_immediate e - | ComplexTuple (e1, e2, rest) -> - let all_exprs = e1 :: e2 :: rest in - fprintf - fmt - "(%a)" - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) - all_exprs - | ComplexList exprs -> - fprintf - fmt - "[%a]" - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_immediate) - exprs - | ComplexOption None -> fprintf fmt "None" - | ComplexOption (Some e) -> fprintf fmt "Some %a" pp_immediate e - | ComplexApp (f, arg, args) -> - let all_args = arg :: args in - fprintf - fmt - "%a %a" - pp_immediate - f - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_immediate) - all_args - | ComplexLambda (patterns, body) -> - let pp_pattern fmt pat = - match pat with - | PatVariable x -> fprintf fmt "%s" x - | PatConst c -> - (match c with - | ConstInt n -> fprintf fmt "%d" n - | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s - | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) - | PatTuple (p1, p2, rest) -> - let all_pats = p1 :: p2 :: rest in - fprintf - fmt - "(%a)" - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) - all_pats - | PatAny -> fprintf fmt "_" - | PatType (p, t) -> fprintf fmt "%a : %a" pp_pattern p pp_ty t - | PatUnit -> fprintf fmt "()" - | PatList pats -> - fprintf - fmt - "[%a]" - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern) - pats - | PatOption None -> fprintf fmt "None" - | PatOption (Some p) -> fprintf fmt "Some %a" pp_pattern p - | PatConstruct (name, opt) -> - (match opt with - | None -> fprintf fmt "%s" name - | Some p -> fprintf fmt "%s %a" name pp_pattern p) - in - fprintf - fmt - "fun %a -> %a" - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_pattern) - patterns - pp_anf_expr - body - | ComplexBranch (cond, then_expr, else_expr) -> - fprintf - fmt - "if %a then %a else %a" - pp_immediate - cond - pp_anf_expr - then_expr - pp_anf_expr - else_expr +let optimize_anf_let (is_rec, name1, expr, body) = + match is_rec, body with + | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 + -> AnfExpr expr + | _, AnfLet (is_rec', orig_name, ComplexImmediate (ImmediateVar name2), body') + when String.equal name1 name2 + -> AnfLet (is_rec', orig_name, expr, body') + | _ -> AnfLet (is_rec, name1, expr, body) +;; -and pp_anf_expr fmt = function - | AnfLet (rf, name, v, body) -> - let rec_flag = - match rf with - | Rec -> "rec " - | NonRec -> "" - in - fprintf fmt "let %s%s = %a in@ %a" rec_flag name pp_complex_expr v pp_anf_expr body - | AnfExpr e -> pp_complex_expr fmt e +let bind_complex_expr complex_expr k = + let* var = fresh in + let* body_expr = k (ImmediateVar var) in + return (optimize_anf_let (NonRec, var, complex_expr, body_expr)) +;; -and pp_anf_bind fmt (name, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr +let get_var = function + | PatVariable id -> return id + | _ -> fresh +;; -and pp_anf_structure fmt = function - | AnfEval expr -> fprintf fmt "%a" pp_anf_expr expr - | AnfValue (rf, bind, binds) -> - let rec_flag = - match rf with - | Rec -> "rec " - | NonRec -> "" +let rec destructure_tuple_pat tuple_var indices_pats empty nested_empty add + = + match indices_pats with + | [] -> return empty + | (i, pat) :: rest -> + let* var = get_var pat in + let* rest_result = destructure_tuple_pat tuple_var rest empty nested_empty add in + let* inner_result = + (match pat with + | PatTuple (ip1, ip2, irest) -> + destructure_tuple_pat var + (List.mapi (ip1 :: ip2 :: irest) ~f:(fun j p -> j, p)) + (nested_empty rest_result) + nested_empty add + | _ -> return (nested_empty rest_result)) in - let all_binds = bind :: binds in - fprintf - fmt - "let %s%a" - rec_flag - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@ and ") pp_anf_bind) - all_binds + return (add var i tuple_var inner_result rest_result) +;; -and pp_anf_program fmt program = - fprintf - fmt - "%a" - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@\n\n") pp_anf_structure) - program +let build_tuple_lets tuple_var indices_pats body = + destructure_tuple_pat tuple_var indices_pats body (fun x -> x) (fun bind_id i tv inner _rest -> + AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tv, i), inner)) ;; -let optimize_anf_let rf name1 v body = - match rf, body with - | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 - -> AnfExpr v - | _ -> AnfLet (rf, name1, v, body) +let build_tuple_top_level_bindings tuple_var indices_pats = + destructure_tuple_pat tuple_var indices_pats [] (fun _ -> []) (fun bind_id i tv inner rest -> + (bind_id, AnfExpr (ComplexField (ImmediateVar tv, i))) :: inner @ rest) ;; -let rec anf (e : expr) (k : immediate -> anf_expr t) : anf_expr t = - match e with +let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = + match expr with | ExpConst c -> k (ImmediateConst c) | ExpIdent x -> k (ImmediateVar x) - | ExpUnarOper (op, e) -> - anf e (fun immediate -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexUnarOper (op, immediate), cont_expr))) - | ExpBinOper (op, e1, e2) -> - anf e1 (fun immediate1 -> - anf e2 (fun immediate2 -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return - (AnfLet - (NonRec, var_name, ComplexBinOper (op, immediate1, immediate2), cont_expr)))) - | ExpTuple (e1, e2, rest) -> - let all_exprs = e1 :: e2 :: rest in - anf_list all_exprs (fun imms -> - match imms with - | i1 :: i2 :: rest_imm -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexTuple (i1, i2, rest_imm), cont_expr)) - | _ -> fail "Invalid tuple") - | ExpList exprs -> - anf_list exprs (fun imms -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexList imms, cont_expr))) - | ExpOption opt_expr -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - (match opt_expr with - | None -> return (AnfLet (NonRec, var_name, ComplexOption None, cont_expr)) - | Some expr -> - anf expr (fun immediate -> - return (AnfLet (NonRec, var_name, ComplexOption (Some immediate), cont_expr)))) - | ExpBranch (cond, then_expr, else_expr) -> - anf cond (fun immediate_cond -> - let* then_anf = - anf then_expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + | ExpUnarOper (op, expr) -> + anf expr (fun imm -> + bind_complex_expr (ComplexUnarOper (op, imm)) k) + | ExpBinOper (op, exp1, exp2) -> + anf exp1 (fun imm1 -> + anf exp2 (fun imm2 -> + bind_complex_expr (ComplexBinOper (op, imm1, imm2)) k)) + + | ExpBranch (cond, then_exp, else_exp_opt) -> + anf cond (fun imm_cond -> + let* then_aexp = anf then_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) in + let* else_aexp = + match else_exp_opt with + | None -> return (AnfExpr ComplexUnit) + | Some else_exp -> anf else_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) in - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - let* else_anf = - match else_expr with - | Some e -> anf e (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) - | None -> return (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))) + bind_complex_expr (ComplexBranch (imm_cond, then_aexp, else_aexp)) k) + + | ExpLet (flag, (pat, expr), _, body) -> + (match pat with + | PatAny | PatConstruct ("()", None) -> anf expr (fun _ -> anf body k) + | PatTuple (p1, p2, rest) -> + let pats = p1 :: p2 :: rest in + anf expr (fun tuple_imm -> + let* tuple_var = fresh in + let* body_anf_expr = anf body k in + let* with_lets = + build_tuple_lets tuple_var (List.mapi pats ~f:(fun i p -> i, p)) body_anf_expr + in + return (AnfLet (flag, tuple_var, ComplexImmediate tuple_imm, with_lets))) + | PatVariable _ | PatConst _ -> + anf expr (fun imm -> + let* body_anf_expr = anf body k in + let* var = get_var pat in + return (AnfLet (flag, var, ComplexImmediate imm, body_anf_expr))) + | _ -> fail "Complex patterns in let not supported") + + | ExpApply (exp1, exp2) -> + let func, args_list = + let rec collect_args acc = function + | ExpApply (f, arg) -> collect_args (arg :: acc) f + | f -> f, acc in - return - (AnfLet - ( NonRec - , var_name - , ComplexBranch (immediate_cond, then_anf, else_anf) - , cont_expr ))) - | ExpLet (rec_flag, (pat, e1), _, e2) -> - let* e1_anf = - anf e1 (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + collect_args [] (ExpApply (exp1, exp2)) in - let* e2_anf = anf e2 k in - let* complex_expr_body = - match e1_anf with - | AnfExpr c -> return c - | _ -> fail "Expected complex_expr" - in - if is_simple_pattern pat - then ( - match pattern_to_ident pat with - | Some name -> return (AnfLet (rec_flag, name, complex_expr_body, e2_anf)) - | None -> - let* var_name = fresh in - return (AnfLet (NonRec, var_name, complex_expr_body, e2_anf))) - else fail "Complex patterns in let bindings not yet supported" - | ExpLambda (pat, pats, body) -> - let patterns = pat :: pats in - let* body_anf = - anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) + anf func (fun immediate_func -> + anf_list args_list (function + | arg1 :: arg_tl -> bind_complex_expr (ComplexApp (immediate_func, arg1, arg_tl)) k + | [] -> fail "application with no arguments")) + | ExpTuple (exp1, exp2, exp_list) -> + let all_exprs = exp1 :: exp2 :: exp_list in + anf_list all_exprs (fun imm_list -> + match imm_list with + | imm1 :: imm2 :: rest -> + bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k + | _ -> fail "Invalid tuple") + | ExpLambda (pat, pat_list, body) -> + let params = pat :: pat_list in + let* body_anf_expr = anf body (fun imm -> bind_complex_expr (ComplexImmediate imm) k) in + let rec wrap_params current_body = function + | [] -> return current_body + | (PatVariable _ | PatConst _) as param :: remaining_params -> + let* body_with_rest = wrap_params current_body remaining_params in + return (AnfExpr (ComplexLambda ([ param ], body_with_rest))) + | PatTuple (p1, p2, rest_pats) :: remaining_params -> + let* body_with_rest = wrap_params current_body remaining_params in + let* var = fresh in + let* body_with_tuple_destructured = + build_tuple_lets var + (List.mapi (p1 :: p2 :: rest_pats) ~f:(fun i p -> i, p)) + body_with_rest + in + return (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) + | _ -> fail "Only variable, constant and tuple patterns in lambda" in - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) - | ExpFunction ((pat, body), rest_cases) -> - (match rest_cases with - | [] -> - let patterns = [ pat ] in - let* body_anf = - anf body (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) - in - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexLambda (patterns, body_anf), cont_expr)) - | _ -> fail "ExpFunction: multiple cases not yet supported") - | ExpApply (func, arg) -> - anf func (fun imm_f -> - anf arg (fun imm_arg -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return - (AnfLet - (NonRec, var_name, ComplexApp (imm_f, imm_arg, []), cont_expr)))) - | ExpMatch _ -> fail "ExpMatch not yet supported" - | ExpConstruct (name, opt_expr) -> - (match name, opt_expr with - | "None", None -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return (AnfLet (NonRec, var_name, ComplexOption None, cont_expr)) - | "Some", Some e -> - anf e (fun immediate -> - let* var_name = fresh in - let* cont_expr = k (ImmediateVar var_name) in - return - (AnfLet - (NonRec, var_name, ComplexOption (Some immediate), cont_expr))) - | _ -> fail "ExpConstruct: only None/Some supported") + wrap_params body_anf_expr params + | ExpConstruct ("()", None) -> bind_complex_expr ComplexUnit k | ExpTypeAnnotation (e, _) -> anf e k + | ExpList exprs -> + anf_list exprs (fun imm_list -> + bind_complex_expr (ComplexList imm_list) k) + | ExpOption None -> bind_complex_expr ComplexUnit k + | ExpOption (Some e) -> anf e k + | _ -> fail "Exp: Not implemented" and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = match exprs with @@ -343,41 +215,43 @@ and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t anf_list tl (fun immediate_tl -> k (immediate_hd :: immediate_tl))) ;; -let anf_structure_item (item : structure) : anf_structure t = +let anf_structure_item (item : structure) : anf_structure list t = match item with | SEval expr -> let* result = anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - return (AnfEval result) - | SValue (rec_flag, (pat, expr), _) -> - if is_simple_pattern pat - then + return [ AnfEval result ] + | SValue (rec_flag, (pat, expr), binds) -> + let bindings = (pat, expr) :: binds in + List.fold_left bindings ~init:(return []) ~f:(fun acc (pat, expr) -> + let* acc_list = acc in let* anf_expr_body = anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) in - match pattern_to_ident pat with - | Some name -> return (AnfValue (rec_flag, (name, anf_expr_body), [])) - | None -> return (AnfValue (rec_flag, ("_", anf_expr_body), [])) - else fail "Complex patterns in top-level bindings not yet supported" + match pat with + | PatTuple (p1, p2, rest) -> + let* tuple_var = fresh in + let* component_bindings = + build_tuple_top_level_bindings tuple_var + (List.mapi (p1 :: p2 :: rest) ~f:(fun i p -> i, p)) + in + let one_value (id, e) = AnfValue (NonRec, (id, e), []) in + let new_items = + AnfValue (rec_flag, (tuple_var, anf_expr_body), []) + :: List.map component_bindings ~f:one_value + in + return (acc_list @ new_items) + | _ -> + let* var = get_var pat in + return (acc_list @ [ AnfValue (rec_flag, (var, anf_expr_body), []) ])) ;; let anf_program (program : program) : (anf_program, string) Result.t = let program' = - List.fold_right program ~init:(return []) ~f:(fun item acc -> + List.fold_left program ~init:(return []) ~f:(fun acc item -> let* acc_list = acc in let* item_anf = anf_structure_item item in - return (item_anf :: acc_list)) + return (acc_list @ item_anf)) in - run program' -;; - -(* Function to convert ANF expression to string using the pretty-printer *) -let anf_to_string anf_program = Stdlib.Format.asprintf "%a" pp_anf_program anf_program -let string_of_anf_expr anf_expr = Stdlib.Format.asprintf "%a" pp_anf_expr anf_expr - -let string_of_complex_expr complex_expr = - Stdlib.Format.asprintf "%a" pp_complex_expr complex_expr -;; - -let string_of_immediate immediate = Stdlib.Format.asprintf "%a" pp_immediate immediate + ANFMonad.run program' \ No newline at end of file diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml index 2ef63f9b..1c42af78 100644 --- a/EML/lib/middleend/anf_pp.ml +++ b/EML/lib/middleend/anf_pp.ml @@ -17,6 +17,8 @@ let rec pp_immediate fmt = function and pp_complex_expr fmt = function | ComplexImmediate imm -> pp_immediate fmt imm + | ComplexUnit -> fprintf fmt "()" + | ComplexField (imm, i) -> fprintf fmt "%a.%d" pp_immediate imm i | ComplexBinOper (op, e1, e2) -> let op_str = match op with diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index c17645e9..c897f9cc 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -30,33 +30,28 @@ let%expect_test "001.ml" = {| [(AnfValue (NonRec, ("recfac", - (AnfLet (NonRec, "anf_t5", + (AnfExpr (ComplexLambda ([(PatVariable "n")], (AnfLet (NonRec, "anf_t0", (ComplexBinOper (LowestEqual, (ImmediateVar "n"), (ImmediateConst (ConstInt 1)))), - (AnfLet (NonRec, "anf_t1", + (AnfExpr (ComplexBranch ((ImmediateVar "anf_t0"), (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), - (AnfLet (NonRec, "anf_t2", + (AnfLet (NonRec, "anf_t1", (ComplexBinOper (Minus, (ImmediateVar "n"), (ImmediateConst (ConstInt 1)))), - (AnfLet (NonRec, "anf_t3", + (AnfLet (NonRec, "anf_t2", (ComplexApp ((ImmediateVar "fac"), - (ImmediateVar "anf_t2"), [])), - (AnfLet (NonRec, "anf_t4", + (ImmediateVar "anf_t1"), [])), + (AnfExpr (ComplexBinOper (Multiply, (ImmediateVar "n"), - (ImmediateVar "anf_t3"))), - (AnfExpr - (ComplexImmediate (ImmediateVar "anf_t4"))) - )) + (ImmediateVar "anf_t2")))) )) )) - )), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t1"))))) + ))) )) - )), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t5")))))), + )))), [])) ]|}] ;; @@ -67,59 +62,45 @@ let%expect_test "003occurs.ml" = {| [(AnfValue (NonRec, ("fix", - (AnfLet (NonRec, "anf_t11", + (AnfExpr (ComplexLambda ([(PatVariable "f")], - (AnfLet (NonRec, "anf_t4", + (AnfExpr (ComplexLambda ([(PatVariable "x")], - (AnfLet (NonRec, "anf_t2", + (AnfExpr (ComplexLambda ([(PatVariable "f")], - (AnfLet (NonRec, "anf_t0", + (AnfLet (NonRec, "anf_t1", (ComplexApp ((ImmediateVar "x"), - (ImmediateVar "x"), [])), - (AnfLet (NonRec, "anf_t1", - (ComplexApp ((ImmediateVar "anf_t0"), - (ImmediateVar "f"), [])), + (ImmediateVar "x"), [(ImmediateVar "f")])), + (AnfLet (NonRec, "anf_t3", + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t1"), [])), (AnfExpr - (ComplexImmediate (ImmediateVar "anf_t1"))) + (ComplexLambda ([(PatVariable "x")], + (AnfExpr + (ComplexLambda ([(PatVariable "f")], + (AnfLet (NonRec, "anf_t5", + (ComplexApp ((ImmediateVar "x"), + (ImmediateVar "x"), + [(ImmediateVar "f")])), + (AnfLet (NonRec, "anf_t7", + (ComplexApp ( + (ImmediateVar "f"), + (ImmediateVar "anf_t5"), + [])), + (AnfExpr + (ComplexApp ( + (ImmediateVar "anf_t3"), + (ImmediateVar "anf_t7"), + []))) + )) + )) + ))) + ))) )) )) - )), - (AnfLet (NonRec, "anf_t3", - (ComplexApp ((ImmediateVar "f"), - (ImmediateVar "anf_t2"), [])), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t3"))))) - )) - )), - (AnfLet (NonRec, "anf_t9", - (ComplexLambda ([(PatVariable "x")], - (AnfLet (NonRec, "anf_t7", - (ComplexLambda ([(PatVariable "f")], - (AnfLet (NonRec, "anf_t5", - (ComplexApp ((ImmediateVar "x"), - (ImmediateVar "x"), [])), - (AnfLet (NonRec, "anf_t6", - (ComplexApp ((ImmediateVar "anf_t5"), - (ImmediateVar "f"), [])), - (AnfExpr - (ComplexImmediate (ImmediateVar "anf_t6"))) - )) - )) - )), - (AnfLet (NonRec, "anf_t8", - (ComplexApp ((ImmediateVar "f"), - (ImmediateVar "anf_t7"), [])), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t8"))) - )) - )) - )), - (AnfLet (NonRec, "anf_t10", - (ComplexApp ((ImmediateVar "anf_t4"), - (ImmediateVar "anf_t9"), [])), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t10"))))) - )) - )) - )), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t11")))))), + ))) + ))) + )))), [])) ]|}] ;; @@ -130,7 +111,7 @@ let%expect_test "004let_poly.ml" = {| [(AnfValue (NonRec, ("temp", - (AnfLet (NonRec, "anf_t3", + (AnfExpr (ComplexLambda ([(PatVariable "f")], (AnfLet (NonRec, "anf_t0", (ComplexApp ((ImmediateVar "f"), (ImmediateConst (ConstInt 1)), @@ -138,22 +119,22 @@ let%expect_test "004let_poly.ml" = (AnfLet (NonRec, "anf_t1", (ComplexApp ((ImmediateVar "f"), (ImmediateConst (ConstBool true)), [])), - (AnfLet (NonRec, "anf_t2", + (AnfLet (NonRec, "anf_t3", (ComplexTuple ((ImmediateVar "anf_t0"), (ImmediateVar "anf_t1"), [])), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t2"))))) + (AnfExpr + (ComplexLambda ([(PatVariable "x")], + (AnfLet (NonRec, "anf_t4", + (ComplexImmediate (ImmediateVar "x")), + (AnfExpr + (ComplexApp ((ImmediateVar "anf_t3"), + (ImmediateVar "anf_t4"), []))) + )) + ))) + )) )) )) - )), - (AnfLet (NonRec, "anf_t4", - (ComplexLambda ([(PatVariable "x")], - (AnfExpr (ComplexImmediate (ImmediateVar "x"))))), - (AnfLet (NonRec, "anf_t5", - (ComplexApp ((ImmediateVar "anf_t3"), (ImmediateVar "anf_t4"), - [])), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t5"))))) - )) - ))), + )))), [])) ]|}] ;; @@ -164,11 +145,10 @@ let%expect_test "002if.ml" = {| [(AnfValue (NonRec, ("main", - (AnfLet (NonRec, "anf_t0", + (AnfExpr (ComplexBranch ((ImmediateConst (ConstBool true)), (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), - (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))))), - (AnfExpr (ComplexImmediate (ImmediateVar "anf_t0")))))), + (AnfExpr (ComplexImmediate (ImmediateConst (ConstBool false)))))))), [])) ]|}] ;; @@ -178,13 +158,11 @@ let%expect_test "pretty_printer_test1" = "let rec fac n = if n <= 1 then 1 else n * fac (n - 1)\n let main = fac 4"; [%expect {| - let rec fac = let anf_t6 = fun n -> let anf_t1 = (n <= 1) in - let anf_t2 = if anf_t1 then 1 else let anf_t3 = (n - 1) in - let anf_t4 = fac anf_t3 in let anf_t5 = (n * anf_t4) in anf_t5 in anf_t2 in - anf_t6 + let rec fac = fun n -> let anf_t0 = (n <= 1) in + if anf_t0 then 1 else let anf_t1 = (n - 1) in let anf_t2 = fac anf_t1 in + (n * anf_t2) - let main = let anf_t0 = fac 4 in - anf_t0 |}] + let main = fac 4 |}] ;; let%expect_test "pretty_printer_test2" = @@ -193,12 +171,10 @@ let%expect_test "pretty_printer_test2" = \ let main = fibo 10"; [%expect {| - let rec fibo = let anf_t8 = fun n -> let anf_t1 = (n < 1) in - let anf_t2 = if anf_t1 then 1 else let anf_t3 = (n - 1) in - let anf_t4 = fibo anf_t3 in let anf_t5 = (n - 2) in - let anf_t6 = fibo anf_t5 in let anf_t7 = (anf_t4 + anf_t6) in anf_t7 in - anf_t2 in anf_t8 + let rec fibo = fun n -> let anf_t0 = (n < 1) in + if anf_t0 then 1 else let anf_t1 = (n - 1) in let anf_t2 = fibo anf_t1 in + let anf_t3 = (n - 2) in let anf_t4 = fibo anf_t3 in + (anf_t2 + anf_t4) - let main = let anf_t0 = fibo 10 in - anf_t0|}] + let main = fibo 10|}] ;; From 16b3647507e863ce6d80f9db85be1d0ac1d6b214 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:42:54 +0300 Subject: [PATCH 13/74] fmt in ast, inferencer, parser Signed-off-by: Victoria Ostrovskaya --- EML/lib/frontend/ast.ml | 1 - EML/lib/frontend/inferencer.ml | 24 ++-- EML/lib/frontend/parser.ml | 197 +++++++++++++++++---------------- 3 files changed, 109 insertions(+), 113 deletions(-) diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml index e43ac7cb..a7388e64 100644 --- a/EML/lib/frontend/ast.ml +++ b/EML/lib/frontend/ast.ml @@ -93,7 +93,6 @@ let is_bin_op op = List.mem op bin_op_list let is_operator opr = List.exists (fun s -> String.equal s opr) bin_op_list let is_unary_minus op = op = "~-" - let rec pp_ty fmt = function | TyPrim x -> fprintf fmt "%s" x | TyVar x -> fprintf fmt "%s" x diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index ca7ecc79..b7266f2c 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -555,9 +555,7 @@ let rec infer_expr env = function let* subst2, ty_arg = infer_expr (TypeEnv.apply subst1 env) arg in let* tv = fresh_var in let* subst3 = - Substitution.unify - (Substitution.apply subst2 ty_func) - (TyArrow (ty_arg, tv)) + Substitution.unify (Substitution.apply subst2 ty_func) (TyArrow (ty_arg, tv)) in let* total_subst = Substitution.compose_all [ subst3; subst2; subst1 ] in return (total_subst, Substitution.apply total_subst tv) @@ -613,19 +611,15 @@ let rec infer_expr env = function ty_pat in let* sub_comp = Substitution.compose sub_u sub_pat in - let* sub_expr, ty_branch = - infer_expr (TypeEnv.apply sub_comp env_pat) expr' - in - let* sub_total = - Substitution.compose_all [ sub_expr; sub_comp; sub_acc ] - in + let* sub_expr, ty_branch = infer_expr (TypeEnv.apply sub_comp env_pat) expr' in + let* sub_total = Substitution.compose_all [ sub_expr; sub_comp; sub_acc ] in let ty_branch' = Substitution.apply sub_total ty_branch in - (match ty_res_opt with - | None -> return (sub_total, Some ty_branch') - | Some ty_prev -> - let* sub_merge = Substitution.unify ty_prev ty_branch' in - let* sub_final = Substitution.compose sub_total sub_merge in - return (sub_final, Some (Substitution.apply sub_merge ty_prev)))) + match ty_res_opt with + | None -> return (sub_total, Some ty_branch') + | Some ty_prev -> + let* sub_merge = Substitution.unify ty_prev ty_branch' in + let* sub_final = Substitution.compose sub_total sub_merge in + return (sub_final, Some (Substitution.apply sub_merge ty_prev))) in (match ty_res with | Some t -> return (final_subst, t) diff --git a/EML/lib/frontend/parser.ml b/EML/lib/frontend/parser.ml index 473d779e..bfd9847b 100644 --- a/EML/lib/frontend/parser.ml +++ b/EML/lib/frontend/parser.ml @@ -18,7 +18,7 @@ let is_keyword = function | "true" | "false" | "Some" - | "and" + | "and" | "function" | "None" | "with" -> true @@ -73,8 +73,7 @@ let is_separator = function ;; let token2 str = - token str - *> peek_char + token str *> peek_char >>= function | Some c when is_separator c -> return str <* white_space | None -> return str <* white_space @@ -100,13 +99,12 @@ let parse_const_string = token "\"" *> take_till (Char.equal '\"') <* token "\"" >>| fun s -> ConstString s ;; - let parse_const = - white_space *> choice [ parse_const_int; parse_const_char; parse_const_string; parse_const_bool ] + white_space + *> choice [ parse_const_int; parse_const_char; parse_const_string; parse_const_bool ] ;; -let parse_unar_oper = - choice [ token "-" *> return Negative; token "not" *> return Not ] +let parse_unar_oper = choice [ token "-" *> return Negative; token "not" *> return Not ] let parse_ident = let parse_first_char = @@ -122,9 +120,12 @@ let parse_ident = >>= fun s -> if is_keyword s then fail "It is not identifier" else return s in let parse_op_ident = - white_space *> char '(' *> white_space + white_space + *> char '(' + *> white_space *> choice (List.map Ast.bin_op_list ~f:(fun opr -> token opr *> return opr)) - <* white_space <* char ')' + <* white_space + <* char ')' in parse_regular_ident <|> parse_op_ident ;; @@ -159,7 +160,6 @@ let rec parse_arrow_type parse_type = return (TyArrow (type1, type2)) ;; - let parse_type = let base_type = parse_base_type in let list_type = parse_type_list base_type <|> base_type in @@ -191,9 +191,7 @@ let parse_pattern_tuple parse_pattern = parse_parens parse_unparenthesized <|> parse_unparenthesized ;; -let parse_keyword = - choice [ token "true"; token "false"; token "None"; token "()" ] -;; +let parse_keyword = choice [ token "true"; token "false"; token "None"; token "()" ] let parse_option parse = let* tag = token2 "Some" in @@ -201,16 +199,17 @@ let parse_option parse = return (tag, opt) ;; - - let parse_construct parse construct func = - token "[" *> sep_by (token ";") parse <* token "]" + token "[" *> sep_by (token ";") parse + <* token "]" >>| List.fold_right ~init:(construct ("[]", None)) ~f:func ;; let parse_list parse construct tuple = let rec go acc = - (token "::" *> parse >>= fun elem -> go elem >>| fun rest -> construct ("::", Some (tuple (acc, rest, [])))) + token "::" *> parse + >>= (fun elem -> + go elem >>| fun rest -> construct ("::", Some (tuple (acc, rest, [])))) <|> return acc in parse >>= go @@ -218,42 +217,44 @@ let parse_list parse construct tuple = let parse_pattern_construct parse_elem parse_pat = choice - [ parse_option (parse_elem <|> parse_parens parse_pat) - >>| (fun (t, p) -> PatConstruct (t, p)) - ; parse_construct parse_elem (fun (t, p) -> PatConstruct (t, p)) + [ (parse_option (parse_elem <|> parse_parens parse_pat) + >>| fun (t, p) -> PatConstruct (t, p)) + ; parse_construct + parse_elem + (fun (t, p) -> PatConstruct (t, p)) (fun p acc -> PatConstruct ("::", Some (PatTuple (p, acc, [])))) - ; parse_list parse_elem (fun (t, p) -> PatConstruct (t, p)) (fun (a, b, c) -> PatTuple (a, b, c)) + ; parse_list + parse_elem + (fun (t, p) -> PatConstruct (t, p)) + (fun (a, b, c) -> PatTuple (a, b, c)) ] ;; - let parse_base_pat = choice [ parse_pattern_any ; parse_pattern_var ; parse_pattern_const - ; parse_keyword >>| (fun tag -> PatConstruct (tag, None)) + ; (parse_keyword >>| fun tag -> PatConstruct (tag, None)) ] ;; let parse_pattern = white_space *> fix (fun pat -> - let atom = - choice - [ parse_base_pat - ; parse_pattern_construct parse_base_pat pat - ; parse_pattern_with_type pat - ; parse_parens pat - ] - in - let tuple = parse_pattern_construct atom pat <|> atom in - let lst = parse_pattern_construct tuple pat <|> tuple in - parse_pattern_tuple lst <|> lst) + let atom = + choice + [ parse_base_pat + ; parse_pattern_construct parse_base_pat pat + ; parse_pattern_with_type pat + ; parse_parens pat + ] + in + let tuple = parse_pattern_construct atom pat <|> atom in + let lst = parse_pattern_construct tuple pat <|> tuple in + parse_pattern_tuple lst <|> lst) ;; - - let parse_left_associative expr oper = let rec go acc = lift2 (fun f x -> f acc x) oper expr >>= go <|> return acc in expr >>= go @@ -313,8 +314,7 @@ let parse_expr_apply e = ;; let parse_expr_lambda parse_expr = - token2 "fun" - *> sep_by1 white_space parse_pattern + token2 "fun" *> sep_by1 white_space parse_pattern <* token "->" >>= fun params -> parse_expr @@ -325,8 +325,9 @@ let parse_expr_lambda parse_expr = ;; let parse_case parse_expr = - white_space *> option () (token "|" *> return ()) - *> lift2 (fun pat exp -> (pat, exp)) parse_pattern (token "->" *> parse_expr) + white_space + *> option () (token "|" *> return ()) + *> lift2 (fun pat exp -> pat, exp) parse_pattern (token "->" *> parse_expr) ;; let parse_expr_function parse_expr = @@ -365,67 +366,70 @@ let parse_body parse_expr = | [] -> body ;; - let parse_expr_sequence parse_expr = parse_left_associative parse_expr - (token ";" *> return (fun exp1 exp2 -> - ExpLet (NonRec, (PatUnit, exp1), [], exp2))) + (token ";" *> return (fun exp1 exp2 -> ExpLet (NonRec, (PatUnit, exp1), [], exp2))) ;; let parse_expr_construct parse_expr = let cons_one exp acc = ExpConstruct ("::", Some (ExpTuple (exp, acc, []))) in let rec unfold_sequence = function | ExpLet (NonRec, (PatUnit, e1), [], e2) -> - let rest, last = unfold_sequence e2 in - (e1 :: rest, last) - | e -> ([], e) + let rest, last = unfold_sequence e2 in + e1 :: rest, last + | e -> [], e in let rec fold_elem (from_parens, exp) acc = - if from_parens then cons_one exp acc - else + if from_parens + then cons_one exp acc + else ( match exp with | ExpLet (NonRec, (PatUnit, e1), [], e2) -> - let rest, last = unfold_sequence e2 in - let acc' = fold_elem (false, last) acc in - let acc'' = List.fold_right rest ~init:acc' ~f:(fun e a -> fold_elem (false, e) a) in - fold_elem (false, e1) acc'' - | _ -> cons_one exp acc + let rest, last = unfold_sequence e2 in + let acc' = fold_elem (false, last) acc in + let acc'' = + List.fold_right rest ~init:acc' ~f:(fun e a -> fold_elem (false, e) a) + in + fold_elem (false, e1) acc'' + | _ -> cons_one exp acc) in let elem_parser = - (parse_parens (parse_expr_sequence parse_expr) >>| fun exp -> true, exp) + parse_parens (parse_expr_sequence parse_expr) + >>| (fun exp -> true, exp) <|> (parse_expr >>| fun exp -> false, exp) in parse_construct elem_parser (fun (t, e) -> ExpConstruct (t, e)) fold_elem ;; let parse_annotated_rhs parse_expr opr = - (token ":" *> parse_type) >>= fun t -> - (token opr *> parse_expr) >>| fun expr -> ExpTypeAnnotation (expr, t) + token ":" *> parse_type + >>= fun t -> token opr *> parse_expr >>| fun expr -> ExpTypeAnnotation (expr, t) ;; let parse_fun_binding parse_expr = let* id = parse_pattern_var in let* params = many1 parse_pattern in - let pat = List.hd_exn params and pats = List.drop params 1 in + let pat = List.hd_exn params + and pats = List.drop params 1 in let mk_body body = ExpLambda (pat, pats, body) in choice - [ (parse_annotated_rhs parse_expr "=" >>= function - | ExpTypeAnnotation (expr, t) -> - return (PatType (id, t), mk_body expr) - | _ -> fail "expected type annotation") - ; token "=" *> parse_expr >>| fun expr -> (id, mk_body expr) + [ (parse_annotated_rhs parse_expr "=" + >>= function + | ExpTypeAnnotation (expr, t) -> return (PatType (id, t), mk_body expr) + | _ -> fail "expected type annotation") + ; (token "=" *> parse_expr >>| fun expr -> id, mk_body expr) ] ;; let parse_simple_binding parse_expr = let* pat = parse_pattern in choice - [ (parse_annotated_rhs parse_expr "=" >>= function - | ExpTypeAnnotation (expr, t) -> - return (PatType (pat, t), expr) - | _ -> fail "expected type annotation") - ; token "=" *> parse_expr >>| fun expr -> (pat, expr) + [ (parse_annotated_rhs parse_expr "=" + >>= function + | ExpTypeAnnotation (expr, t) -> return (PatType (pat, t), expr) + | _ -> fail "expected type annotation") + ; (token "=" *> parse_expr >>| fun expr -> pat, expr) ] ;; @@ -434,9 +438,12 @@ let parse_value_binding_list parse_expr = sep_by1 (token2 "and") (white_space *> parse_binding) ;; - let parse_base_expr = - choice [ parse_expr_ident; parse_expr_const; parse_keyword >>| fun tag -> ExpConstruct (tag, None) ] + choice + [ parse_expr_ident + ; parse_expr_const + ; (parse_keyword >>| fun tag -> ExpConstruct (tag, None)) + ] ;; let parse_expr_construct_keyword_some parse_expr = @@ -444,8 +451,6 @@ let parse_expr_construct_keyword_some parse_expr = >>| fun (tag, exp_opt) -> ExpConstruct (tag, exp_opt) ;; - - let parse_expr_let parse_expr = token "let" *> lift4 @@ -487,29 +492,29 @@ let parse_exp_apply e = let parse_expr = white_space *> fix (fun expr -> - let term = - choice - [ parse_base_expr - ; parse_expr_construct_keyword_some expr - ; parse_parens (parse_expr_with_type expr) - ; parse_expr_construct expr - ; parse_top_expr expr - ; parse_parens expr - ] - in - let func = parse_exp_apply term <|> term in - let lst = parse_expr_list func <|> func in - let tuple = parse_expr_tuple lst <|> lst in - let seq = parse_expr_sequence tuple <|> tuple in - let lambda = parse_expr_lambda expr <|> seq in + let term = choice - [ parse_expr_let expr - ; parse_expr_function expr - ; parse_expr_lambda expr - ; parse_expr_match expr - ; parse_expr_branch expr - ; lambda - ]) + [ parse_base_expr + ; parse_expr_construct_keyword_some expr + ; parse_parens (parse_expr_with_type expr) + ; parse_expr_construct expr + ; parse_top_expr expr + ; parse_parens expr + ] + in + let func = parse_exp_apply term <|> term in + let lst = parse_expr_list func <|> func in + let tuple = parse_expr_tuple lst <|> lst in + let seq = parse_expr_sequence tuple <|> tuple in + let lambda = parse_expr_lambda expr <|> seq in + choice + [ parse_expr_let expr + ; parse_expr_function expr + ; parse_expr_lambda expr + ; parse_expr_match expr + ; parse_expr_branch expr + ; lambda + ]) ;; let parse_structure = @@ -517,10 +522,8 @@ let parse_structure = let parse_value = token "let" *> lift2 - (fun r id_list -> - SValue (r, List.hd_exn id_list, List.drop id_list 1)) - (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) - <|> return NonRec) + (fun r id_list -> SValue (r, List.hd_exn id_list, List.drop id_list 1)) + (token "rec" *> (take_while1 Char.is_whitespace *> return Rec) <|> return NonRec) (parse_value_binding_list parse_expr) in let parse_structure_item = choice [ parse_eval; parse_value ] in From 341eb797e9391ac6157afa0b52cc6582e6505b8c Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:44:13 +0300 Subject: [PATCH 14/74] change utils folder name to util Signed-off-by: Victoria Ostrovskaya --- EML/lib/{utils => util}/monads.ml | 2 ++ EML/lib/util/monads.mli | 15 +++++++++++++++ EML/lib/{utils => util}/utils.ml | 0 3 files changed, 17 insertions(+) rename EML/lib/{utils => util}/monads.ml (98%) create mode 100644 EML/lib/util/monads.mli rename EML/lib/{utils => util}/utils.ml (100%) diff --git a/EML/lib/utils/monads.ml b/EML/lib/util/monads.ml similarity index 98% rename from EML/lib/utils/monads.ml rename to EML/lib/util/monads.ml index b8322df4..2b27f737 100644 --- a/EML/lib/utils/monads.ml +++ b/EML/lib/util/monads.ml @@ -1,3 +1,5 @@ +open Base + module ANFMonad = struct type 'a t = int -> int * ('a, string) Result.t diff --git a/EML/lib/util/monads.mli b/EML/lib/util/monads.mli new file mode 100644 index 00000000..e0997779 --- /dev/null +++ b/EML/lib/util/monads.mli @@ -0,0 +1,15 @@ +open Base + +module ANFMonad : sig + type 'a t = int -> int * ('a, string) Result.t + + val return : 'a -> 'a t + val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t + val fresh : string t + val run : 'a t -> ('a, string) Result.t + val fail : string -> 'a t + + module Syntax : sig + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + end +end diff --git a/EML/lib/utils/utils.ml b/EML/lib/util/utils.ml similarity index 100% rename from EML/lib/utils/utils.ml rename to EML/lib/util/utils.ml From 6cdfbe456b3b37b31b964e8a0ba38fa31a5ede1a Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:50:19 +0300 Subject: [PATCH 15/74] replace ANFMonad at util Signed-off-by: Victoria Ostrovskaya --- EML/lib/middleend/anf.ml | 115 ++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 1b5dd6f9..2b63fd93 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -8,30 +8,9 @@ open Frontend.Ast open Base +open Util.Monads.ANFMonad +open Util.Monads.ANFMonad.Syntax -module ANFMonad = struct - type 'a t = int -> int * ('a, string) Result.t - - let return x = fun counter -> counter, Ok x - - let ( >>= ) m f = - fun counter -> - match m counter with - | counter', Ok a -> f a counter' - | counter', Error e -> counter', Error e - ;; - - let fresh : string t = fun counter -> counter + 1, Ok ("anf_t" ^ Int.to_string counter) - let run m = m 0 |> snd - let fail msg = fun counter -> counter, Error msg - - module Syntax = struct - let ( let* ) = ( >>= ) - end -end - -open ANFMonad -open ANFMonad.Syntax type immediate = | ImmediateConst of const | ImmediateVar of ident @@ -65,16 +44,12 @@ type anf_structure = type anf_program = anf_structure list [@@deriving show { with_path = false }] - - - let optimize_anf_let (is_rec, name1, expr, body) = match is_rec, body with | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 -> AnfExpr expr | _, AnfLet (is_rec', orig_name, ComplexImmediate (ImmediateVar name2), body') - when String.equal name1 name2 - -> AnfLet (is_rec', orig_name, expr, body') + when String.equal name1 name2 -> AnfLet (is_rec', orig_name, expr, body') | _ -> AnfLet (is_rec, name1, expr, body) ;; @@ -86,36 +61,47 @@ let bind_complex_expr complex_expr k = let get_var = function | PatVariable id -> return id - | _ -> fresh + | _ -> fresh ;; -let rec destructure_tuple_pat tuple_var indices_pats empty nested_empty add - = +let rec destructure_tuple_pat tuple_var indices_pats empty nested_empty add = match indices_pats with | [] -> return empty | (i, pat) :: rest -> let* var = get_var pat in let* rest_result = destructure_tuple_pat tuple_var rest empty nested_empty add in let* inner_result = - (match pat with - | PatTuple (ip1, ip2, irest) -> - destructure_tuple_pat var - (List.mapi (ip1 :: ip2 :: irest) ~f:(fun j p -> j, p)) - (nested_empty rest_result) - nested_empty add - | _ -> return (nested_empty rest_result)) + match pat with + | PatTuple (ip1, ip2, irest) -> + destructure_tuple_pat + var + (List.mapi (ip1 :: ip2 :: irest) ~f:(fun j p -> j, p)) + (nested_empty rest_result) + nested_empty + add + | _ -> return (nested_empty rest_result) in return (add var i tuple_var inner_result rest_result) ;; let build_tuple_lets tuple_var indices_pats body = - destructure_tuple_pat tuple_var indices_pats body (fun x -> x) (fun bind_id i tv inner _rest -> - AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tv, i), inner)) + destructure_tuple_pat + tuple_var + indices_pats + body + (fun x -> x) + (fun bind_id i tv inner _rest -> + AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tv, i), inner)) ;; -let build_tuple_top_level_bindings tuple_var indices_pats = - destructure_tuple_pat tuple_var indices_pats [] (fun _ -> []) (fun bind_id i tv inner rest -> - (bind_id, AnfExpr (ComplexField (ImmediateVar tv, i))) :: inner @ rest) +let build_tuple_top_level_bindings tuple_var indices_pats = + destructure_tuple_pat + tuple_var + indices_pats + [] + (fun _ -> []) + (fun bind_id i tv inner rest -> + ((bind_id, AnfExpr (ComplexField (ImmediateVar tv, i))) :: inner) @ rest) ;; let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = @@ -123,23 +109,22 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | ExpConst c -> k (ImmediateConst c) | ExpIdent x -> k (ImmediateVar x) | ExpUnarOper (op, expr) -> - anf expr (fun imm -> - bind_complex_expr (ComplexUnarOper (op, imm)) k) + anf expr (fun imm -> bind_complex_expr (ComplexUnarOper (op, imm)) k) | ExpBinOper (op, exp1, exp2) -> anf exp1 (fun imm1 -> - anf exp2 (fun imm2 -> - bind_complex_expr (ComplexBinOper (op, imm1, imm2)) k)) - + anf exp2 (fun imm2 -> bind_complex_expr (ComplexBinOper (op, imm1, imm2)) k)) | ExpBranch (cond, then_exp, else_exp_opt) -> anf cond (fun imm_cond -> - let* then_aexp = anf then_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) in + let* then_aexp = + anf then_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) + in let* else_aexp = match else_exp_opt with | None -> return (AnfExpr ComplexUnit) - | Some else_exp -> anf else_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) + | Some else_exp -> + anf else_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) in bind_complex_expr (ComplexBranch (imm_cond, then_aexp, else_aexp)) k) - | ExpLet (flag, (pat, expr), _, body) -> (match pat with | PatAny | PatConstruct ("()", None) -> anf expr (fun _ -> anf body k) @@ -158,7 +143,6 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = let* var = get_var pat in return (AnfLet (flag, var, ComplexImmediate imm, body_anf_expr))) | _ -> fail "Complex patterns in let not supported") - | ExpApply (exp1, exp2) -> let func, args_list = let rec collect_args acc = function @@ -169,40 +153,43 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = in anf func (fun immediate_func -> anf_list args_list (function - | arg1 :: arg_tl -> bind_complex_expr (ComplexApp (immediate_func, arg1, arg_tl)) k + | arg1 :: arg_tl -> + bind_complex_expr (ComplexApp (immediate_func, arg1, arg_tl)) k | [] -> fail "application with no arguments")) | ExpTuple (exp1, exp2, exp_list) -> let all_exprs = exp1 :: exp2 :: exp_list in anf_list all_exprs (fun imm_list -> match imm_list with - | imm1 :: imm2 :: rest -> - bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k + | imm1 :: imm2 :: rest -> bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k | _ -> fail "Invalid tuple") | ExpLambda (pat, pat_list, body) -> let params = pat :: pat_list in - let* body_anf_expr = anf body (fun imm -> bind_complex_expr (ComplexImmediate imm) k) in + let* body_anf_expr = + anf body (fun imm -> bind_complex_expr (ComplexImmediate imm) k) + in let rec wrap_params current_body = function | [] -> return current_body - | (PatVariable _ | PatConst _) as param :: remaining_params -> + | ((PatVariable _ | PatConst _) as param) :: remaining_params -> let* body_with_rest = wrap_params current_body remaining_params in return (AnfExpr (ComplexLambda ([ param ], body_with_rest))) | PatTuple (p1, p2, rest_pats) :: remaining_params -> let* body_with_rest = wrap_params current_body remaining_params in let* var = fresh in let* body_with_tuple_destructured = - build_tuple_lets var + build_tuple_lets + var (List.mapi (p1 :: p2 :: rest_pats) ~f:(fun i p -> i, p)) body_with_rest in - return (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) + return + (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) | _ -> fail "Only variable, constant and tuple patterns in lambda" in wrap_params body_anf_expr params | ExpConstruct ("()", None) -> bind_complex_expr ComplexUnit k | ExpTypeAnnotation (e, _) -> anf e k | ExpList exprs -> - anf_list exprs (fun imm_list -> - bind_complex_expr (ComplexList imm_list) k) + anf_list exprs (fun imm_list -> bind_complex_expr (ComplexList imm_list) k) | ExpOption None -> bind_complex_expr ComplexUnit k | ExpOption (Some e) -> anf e k | _ -> fail "Exp: Not implemented" @@ -233,7 +220,8 @@ let anf_structure_item (item : structure) : anf_structure list t = | PatTuple (p1, p2, rest) -> let* tuple_var = fresh in let* component_bindings = - build_tuple_top_level_bindings tuple_var + build_tuple_top_level_bindings + tuple_var (List.mapi (p1 :: p2 :: rest) ~f:(fun i p -> i, p)) in let one_value (id, e) = AnfValue (NonRec, (id, e), []) in @@ -254,4 +242,5 @@ let anf_program (program : program) : (anf_program, string) Result.t = let* item_anf = anf_structure_item item in return (acc_list @ item_anf)) in - ANFMonad.run program' \ No newline at end of file + ANFMonad.run program' +;; From 1ebc7a4596ab6f8902dc3b27ccb564b125e5116e Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:52:14 +0300 Subject: [PATCH 16/74] add arity type and anf_fun_bind Signed-off-by: Victoria Ostrovskaya --- EML/lib/middleend/anf.ml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 2b63fd93..46cee973 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -35,15 +35,26 @@ and anf_expr = | AnfExpr of complex_expr [@@deriving show { with_path = false }] +type arity = int + +let pp_arity ppf (n : arity) = Stdlib.Format.pp_print_int ppf n + type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] +type anf_fun_bind = ident * arity * anf_expr [@@deriving show { with_path = false }] type anf_structure = | AnfEval of anf_expr - | AnfValue of is_rec * anf_bind * anf_bind list + | AnfValue of is_rec * anf_fun_bind * anf_fun_bind list [@@deriving show { with_path = false }] type anf_program = anf_structure list [@@deriving show { with_path = false }] +let rec anf_expr_arity = function + | AnfExpr (ComplexLambda (pat_list, body)) -> List.length pat_list + anf_expr_arity body + | AnfLet (_, _, _, body) -> anf_expr_arity body + | _ -> 0 +;; + let optimize_anf_let (is_rec, name1, expr, body) = match is_rec, body with | NonRec, AnfExpr (ComplexImmediate (ImmediateVar name2)) when String.equal name1 name2 @@ -242,5 +253,5 @@ let anf_program (program : program) : (anf_program, string) Result.t = let* item_anf = anf_structure_item item in return (acc_list @ item_anf)) in - ANFMonad.run program' + run program' ;; From afd9c7e0a06528b6921a0937e5093db4fba4d1f9 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:52:56 +0300 Subject: [PATCH 17/74] use arity in conversion to ANF Signed-off-by: Victoria Ostrovskaya --- EML/lib/middleend/anf.ml | 27 ++++++++++++++++----------- EML/lib/middleend/anf_pp.ml | 3 ++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 46cee973..f8d129a6 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -138,7 +138,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = bind_complex_expr (ComplexBranch (imm_cond, then_aexp, else_aexp)) k) | ExpLet (flag, (pat, expr), _, body) -> (match pat with - | PatAny | PatConstruct ("()", None) -> anf expr (fun _ -> anf body k) + | PatAny | PatUnit | PatConstruct ("()", None) -> anf expr (fun _ -> anf body k) | PatTuple (p1, p2, rest) -> let pats = p1 :: p2 :: rest in anf expr (fun tuple_imm -> @@ -175,9 +175,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | _ -> fail "Invalid tuple") | ExpLambda (pat, pat_list, body) -> let params = pat :: pat_list in - let* body_anf_expr = - anf body (fun imm -> bind_complex_expr (ComplexImmediate imm) k) - in + let* body_anf_expr = anf body (fun imm -> return (AnfExpr (ComplexImmediate imm))) in let rec wrap_params current_body = function | [] -> return current_body | ((PatVariable _ | PatConst _) as param) :: remaining_params -> @@ -196,14 +194,19 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) | _ -> fail "Only variable, constant and tuple patterns in lambda" in - wrap_params body_anf_expr params + let* lambda_anf = wrap_params body_anf_expr params in + (match lambda_anf with + | AnfExpr (ComplexLambda (pats, body)) -> + bind_complex_expr (ComplexLambda (pats, body)) k + | _ -> fail "ExpLambda: wrap_params must return ComplexLambda") | ExpConstruct ("()", None) -> bind_complex_expr ComplexUnit k | ExpTypeAnnotation (e, _) -> anf e k | ExpList exprs -> anf_list exprs (fun imm_list -> bind_complex_expr (ComplexList imm_list) k) | ExpOption None -> bind_complex_expr ComplexUnit k | ExpOption (Some e) -> anf e k - | _ -> fail "Exp: Not implemented" + | ExpFunction _ | ExpMatch _ -> fail "Match/function cases not implemented" + | ExpConstruct _ -> fail "Constructors not implemented" and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = match exprs with @@ -213,6 +216,8 @@ and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t anf_list tl (fun immediate_tl -> k (immediate_hd :: immediate_tl))) ;; +let to_fun_bind (id, e) = id, anf_expr_arity e, e + let anf_structure_item (item : structure) : anf_structure list t = match item with | SEval expr -> @@ -235,20 +240,20 @@ let anf_structure_item (item : structure) : anf_structure list t = tuple_var (List.mapi (p1 :: p2 :: rest) ~f:(fun i p -> i, p)) in - let one_value (id, e) = AnfValue (NonRec, (id, e), []) in + let one_value (id, e) = AnfValue (NonRec, to_fun_bind (id, e), []) in let new_items = - AnfValue (rec_flag, (tuple_var, anf_expr_body), []) + AnfValue (rec_flag, to_fun_bind (tuple_var, anf_expr_body), []) :: List.map component_bindings ~f:one_value in return (acc_list @ new_items) | _ -> let* var = get_var pat in - return (acc_list @ [ AnfValue (rec_flag, (var, anf_expr_body), []) ])) + return (acc_list @ [ AnfValue (rec_flag, to_fun_bind (var, anf_expr_body), []) ])) ;; -let anf_program (program : program) : (anf_program, string) Result.t = +let anf_program (ast : program) : (anf_program, string) Result.t = let program' = - List.fold_left program ~init:(return []) ~f:(fun acc item -> + List.fold_left ast ~init:(return []) ~f:(fun acc item -> let* acc_list = acc in let* item_anf = anf_structure_item item in return (acc_list @ item_anf)) diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml index 1c42af78..9fbaf68a 100644 --- a/EML/lib/middleend/anf_pp.ml +++ b/EML/lib/middleend/anf_pp.ml @@ -129,6 +129,7 @@ and pp_anf_expr fmt = function | AnfExpr e -> pp_complex_expr fmt e and pp_anf_bind fmt (name, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr +and pp_anf_fun_bind fmt (name, _arity, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr and pp_anf_structure fmt = function | AnfEval expr -> fprintf fmt "%a" pp_anf_expr expr @@ -143,7 +144,7 @@ and pp_anf_structure fmt = function fmt "let %s%a" rec_flag - (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@ and ") pp_anf_bind) + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "@ and ") pp_anf_fun_bind) all_binds and pp_anf_program fmt program = From 62ac16ab96ebe15f677f7f7bd0af550aa9af71ac Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:53:44 +0300 Subject: [PATCH 18/74] add architecture (ISA, regs, instr) Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/architecture.ml | 172 ++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 EML/lib/backend/ricsv/architecture.ml diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml new file mode 100644 index 00000000..fe7ecf96 --- /dev/null +++ b/EML/lib/backend/ricsv/architecture.ml @@ -0,0 +1,172 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(** RISC-V: ISA, platform config, and codegen API in one module. *) + +open Base + +module Riscv_backend = struct + type reg = + | Zero + | RA + | SP + | A of int + | T of int + | S of int + [@@deriving eq] + + type offset = reg * int + + type location = + | Loc_reg of reg + | Loc_mem of offset + + type instr = + | Addi of reg * reg * int (* сложение с константой: rd = rs + imm *) + | Ld of reg * offset (* загрузка 8 байт из памяти: rd = mem[base + offset] *) + | Sd of reg * offset (* сохранение 8 байт в память: mem[base + offset] = rd *) + | Mv of reg * reg (* копирование регистра: rd = rs *) + | Li of reg * int (* загрузить константу: rd = imm *) + | Add of reg * reg * reg (* сложение: rd = rs1 + rs2 *) + | Sub of reg * reg * reg (* вычитание: rd = rs1 - rs2 *) + | Call of string (* вызов функции по имени *) + | Ret (* возврат из функции *) + | Beq of + reg * reg * string (* переход если равно: если rs1 == rs2, переход на метку *) + | J of string (* безусловный переход на метку *) + | Label of string (* метка: именованная точка в коде, цель для Beq/J *) + | La of reg * string (* загрузить адрес: rd = адрес метки *) + (* Сравнения и логика *) + | Slt of reg * reg * reg (* записать в rd 1 если rs1 < rs2, иначе 0 *) + | Seqz of reg * reg (* записать в rd 1 если rs == 0, иначе 0 *) + | Snez of reg * reg (* записать в rd 1 если rs != 0, иначе 0 *) + | Xori of reg * reg * int (* xor регистра с константой: rd = rs ^ imm *) + | Xor of reg * reg * reg (* xor двух регистров: rd = rs1 ^ rs2 *) + | Mul of reg * reg * reg (* умножение: rd = rs1 * rs2 *) + | Srli of reg * reg * int (* логический сдвиг вправо на константу: rd = rs >>> imm *) + + let pp_reg ppf = + let open Stdlib.Format in + function + | Zero -> fprintf ppf "zero" + | RA -> fprintf ppf "ra" + | SP -> fprintf ppf "sp" + | A n -> fprintf ppf "a%d" n + | T n -> fprintf ppf "t%d" n + | S 0 -> fprintf ppf "fp" + | S n -> fprintf ppf "s%d" n + ;; + + let pp_offset ppf offset = + Stdlib.Format.fprintf ppf "%d(%a)" (snd offset) pp_reg (fst offset) + ;; + + let pp_instr ppf = + let open Stdlib.Format in + function + | Addi (rd, rs, imm) -> fprintf ppf "addi %a, %a, %d" pp_reg rd pp_reg rs imm + | Add (rd, rs1, rs2) -> fprintf ppf "add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sub (rd, rs1, rs2) -> fprintf ppf "sub %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mul (rd, rs1, rs2) -> fprintf ppf "mul %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Srli (rd, rs1, imm) -> fprintf ppf "srli %a, %a, %d" pp_reg rd pp_reg rs1 imm + | Xori (rd, rs1, imm) -> fprintf ppf "xori %a, %a, %d" pp_reg rd pp_reg rs1 imm + | Xor (rd, rs1, rs2) -> fprintf ppf "xor %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Slt (rd, rs1, rs2) -> fprintf ppf "slt %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Seqz (rd, rs) -> fprintf ppf "seqz %a, %a" pp_reg rd pp_reg rs + | Snez (rd, rs) -> fprintf ppf "snez %a, %a" pp_reg rd pp_reg rs + | Li (rd, imm) -> fprintf ppf "li %a, %d" pp_reg rd imm + | La (rd, s) -> fprintf ppf "la %a, %s" pp_reg rd s + | Mv (rd, rs) -> fprintf ppf "mv %a, %a" pp_reg rd pp_reg rs + | Ld (rd, ofs) -> fprintf ppf "ld %a, %a" pp_reg rd pp_offset ofs + | Sd (rs, ofs) -> fprintf ppf "sd %a, %a" pp_reg rs pp_offset ofs + | Beq (rs1, rs2, s) -> fprintf ppf "beq %a, %a, %s" pp_reg rs1 pp_reg rs2 s + | J s -> fprintf ppf "j %s" s + | Label s -> fprintf ppf "%s:" s + | Call s -> fprintf ppf "call %s" s + | Ret -> fprintf ppf "ret" + ;; + + let tag_int n = 1 + (n lsl 1) + let fp = S 0 + let sp = SP + let ra = RA + let zero = Zero + let a0 = A 0 + let a1 = A 1 + let a2 = A 2 + let a3 = A 3 + let a4 = A 4 + let a5 = A 5 + let a6 = A 6 + let a7 = A 7 + let t0 = T 0 + let t1 = T 1 + + (* ----- Platform (RISC-V layout) ----- *) + let arg_regs = [ a0; a1; a2; a3; a4; a5; a6; a7 ] + let candidate_regs_for_spill = arg_regs + let arg_regs_count = 8 + let word_size = 8 + let frame_header_size = 2 * word_size + let saved_fp_offset = 0 + let saved_ra_offset = word_size + let result_reg = a0 + + let is_caller_saved = function + | A _ | T _ -> true + | Zero | RA | SP | S _ -> false + ;; + + (* *) + let addi rd rs imm = [ Addi (rd, rs, imm) ] + let ld rd ofs = [ Ld (rd, ofs) ] + let sd rs ofs = [ Sd (rs, ofs) ] + let mv rd rs = [ Mv (rd, rs) ] + let li rd imm = [ Li (rd, imm) ] + let add rd rs1 rs2 = [ Add (rd, rs1, rs2) ] + let sub rd rs1 rs2 = [ Sub (rd, rs1, rs2) ] + let call s = [ Call s ] + let ret () = [ Ret ] + let beq rs1 rs2 lbl = [ Beq (rs1, rs2, lbl) ] + let j lbl = [ J lbl ] + let label s = [ Label s ] + let la rd s = [ La (rd, s) ] + let slt rd rs1 rs2 = [ Slt (rd, rs1, rs2) ] + let seqz rd rs = [ Seqz (rd, rs) ] + let snez rd rs = [ Snez (rd, rs) ] + let xori rd rs imm = [ Xori (rd, rs, imm) ] + let xor rd rs1 rs2 = [ Xor (rd, rs1, rs2) ] + let mul rd rs1 rs2 = [ Mul (rd, rs1, rs2) ] + let srli rd rs imm = [ Srli (rd, rs, imm) ] + let add_tag_items dst delta = [ Addi (dst, dst, delta) ] + + let prologue ~name ~stack_size = + let ra_slot = sp, stack_size - saved_ra_offset in + let fp_slot = sp, stack_size - frame_header_size in + let base = + label name + @ addi sp sp (-stack_size) + @ sd ra ra_slot + @ sd fp fp_slot + @ addi fp sp (stack_size - frame_header_size) + in + base + ;; + + let epilogue ~is_main = + let base = + addi sp fp frame_header_size + @ ld ra (fp, saved_ra_offset) + @ ld fp (fp, saved_fp_offset) + in + if is_main then base @ li a0 0 @ ret () else base @ ret () + ;; + + let format_item ppf i = + (match i with + | Label _ -> Stdlib.Format.fprintf ppf "%a" pp_instr i + | _ -> Stdlib.Format.fprintf ppf " %a" pp_instr i); + Stdlib.Format.fprintf ppf "\n" + ;; +end From 6a81f7f1f9543f5fd49c6c1bec6bcfe6c05b8b3c Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:54:24 +0300 Subject: [PATCH 19/74] add config with runtime primitives Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/config.ml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 EML/lib/backend/ricsv/config.ml diff --git a/EML/lib/backend/ricsv/config.ml b/EML/lib/backend/ricsv/config.ml new file mode 100644 index 00000000..ce535dd6 --- /dev/null +++ b/EML/lib/backend/ricsv/config.ml @@ -0,0 +1,14 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(** Primitives the generated code can call. *) + +type primitive = + { name : string + ; arity : int + } + +let primitive_arities : primitive list = + [ { name = "print_int"; arity = 1 }; { name = "print_endline"; arity = 1 } ] +;; From 714b6193598c9f3e2394776f303bf9ce804f9d4a Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:54:53 +0300 Subject: [PATCH 20/74] add generator state monad Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/generator_state.ml | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 EML/lib/backend/ricsv/generator_state.ml diff --git a/EML/lib/backend/ricsv/generator_state.ml b/EML/lib/backend/ricsv/generator_state.ml new file mode 100644 index 00000000..19b4f14f --- /dev/null +++ b/EML/lib/backend/ricsv/generator_state.ml @@ -0,0 +1,67 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Frontend.Ast +open Architecture.Riscv_backend + +type env = (ident, location, String.comparator_witness) Map.t + +type state = + { frame_offset : int + ; fresh_id : int + ; arity_map : (ident, int, String.comparator_witness) Map.t + ; env : env + ; instr_buffer : instr list + } + +type 'a t = state -> ('a * state, string) Result.t + +let return x st = Ok (x, st) +let fail e = fun _ -> Error e + +let bind m f = + fun state -> + match m state with + | Ok (x, st') -> f x st' + | Error e -> Error e +;; + +let ( let* ) = bind +let get st = Ok (st, st) +let put st = fun _ -> Ok ((), st) + +let modify f = + let* st = get in + put (f st) +;; + +let modify_env f = modify (fun st -> { st with env = f st.env }) + +let get_env = + let* st = get in + return st.env +;; + +let set_env env = modify (fun st -> { st with env }) + +let fresh = + let modify_fresh_id f = modify (fun st -> { st with fresh_id = f st.fresh_id }) in + let* st = get in + let* () = modify_fresh_id Int.succ in + return st.fresh_id +;; + +let run m init = m init + +let append (items : instr list) = + let modify_instr_buffer f = + modify (fun st -> { st with instr_buffer = f st.instr_buffer }) + in + if List.is_empty items + then return () + else + modify_instr_buffer (fun l -> + List.fold_left items ~init:l ~f:(fun acc it -> it :: acc)) +;; From fb2d67903625ad475f4e76a3cda5258ff3f480bb Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:55:40 +0300 Subject: [PATCH 21/74] add function_layout, analysis before generation Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/analysis.ml | 83 +++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 EML/lib/backend/ricsv/analysis.ml diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml new file mode 100644 index 00000000..fdfb5dbc --- /dev/null +++ b/EML/lib/backend/ricsv/analysis.ml @@ -0,0 +1,83 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Frontend.Ast +open Middleend.Anf + +type function_layout = + { func_name : string + ; params : immediate list + ; body : anf_expr + ; slots_count : int + } + +type analysis_result = + { arity_map : (string, int, String.comparator_witness) Map.t + ; functions : function_layout list + } + +let rec slots_in_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and slots_in_cexpr = function + | ComplexImmediate imm -> slots_in_imm imm + | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> slots_in_imm left + slots_in_imm right + | ComplexUnarOper (_, imm) -> slots_in_imm imm + | ComplexTuple (first, second, rest) -> + List.fold_left (first :: second :: rest) ~init:0 ~f:(fun acc e -> + acc + slots_in_imm e) + | ComplexField (imm, _) -> slots_in_imm imm + | ComplexList imm_list -> + List.fold_left imm_list ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) + | ComplexApp (first, second, rest) -> + List.fold_left (first :: second :: rest) ~init:0 ~f:(fun acc e -> + acc + slots_in_imm e) + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> slots_in_imm imm + | ComplexLambda (_, body) -> slots_in_anf body + | ComplexBranch (cond, then_e, else_e) -> + slots_in_imm cond + slots_in_anf then_e + slots_in_anf else_e + +and slots_in_anf = function + | AnfExpr cexp -> slots_in_cexpr cexp + | AnfLet (_, _, cexp, cont) -> 1 + slots_in_cexpr cexp + slots_in_anf cont +;; + +let rec params_of_anf = function + | AnfExpr (ComplexLambda (pats, body)) -> + let imms = + List.filter_map pats ~f:(function + | PatVariable id -> Some (ImmediateVar id) + | _ -> None) + in + let rest, inner = params_of_anf body in + imms @ rest, inner + | other -> [], other +;; + +let arity_map_of_program (program : anf_program) = + List.fold + program + ~init:(Map.empty (module String)) + ~f:(fun map -> function + | AnfValue (_, (fid, arity, _), and_binds) -> + let map = Map.set map ~key:fid ~data:arity in + List.fold and_binds ~init:map ~f:(fun acc (id, arity, _) -> + Map.set acc ~key:id ~data:arity) + | _ -> map) +;; + +let analyze (program : anf_program) = + let arity_map = arity_map_of_program program in + let functions = + List.filter_map program ~f:(function + | AnfValue (_, (func_name, _arity, body), _) -> + let params, body = params_of_anf body in + Some { func_name; params; body; slots_count = slots_in_anf body } + | AnfEval _ -> None) + in + { arity_map; functions } +;; From e721796d6430088886db2bc947fbd48386f1cbfa Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:56:24 +0300 Subject: [PATCH 22/74] add auxillary Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/auxillary.ml | 87 ++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 EML/lib/backend/ricsv/auxillary.ml diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml new file mode 100644 index 00000000..90a9e966 --- /dev/null +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -0,0 +1,87 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Frontend.Ast +open Architecture.Riscv_backend +open Middleend.Anf + +let compare_ordering dst r1 r2 ~invert = + let base = slt dst r1 r2 in + if invert then base @ xori dst dst 1 else base +;; + +let compare_eq_ne dst r1 r2 ~is_eq = + let base = xor dst r1 r2 in + if is_eq then base @ seqz dst dst else base @ snez dst dst +;; + +let bin_op dst op r1 r2 = + match op with + | "+" -> add dst r1 r2 @ add_tag_items dst (-1) + | "-" -> sub dst r1 r2 @ add_tag_items dst 1 + | "*" -> srli r1 r1 1 @ addi r2 r2 (-1) @ mul dst r1 r2 @ add_tag_items dst 1 + | "<" -> compare_ordering dst r1 r2 ~invert:false + | ">" -> compare_ordering dst r2 r1 ~invert:false + | "<=" -> compare_ordering dst r2 r1 ~invert:true + | ">=" -> compare_ordering dst r1 r2 ~invert:true + | "=" -> compare_eq_ne dst r1 r2 ~is_eq:true + | "<>" -> compare_eq_ne dst r1 r2 ~is_eq:false + | _ -> failwith ("unsupported binary operator: " ^ op) +;; + +let bin_oper_to_string = function + | Plus -> "+" + | Minus -> "-" + | Multiply -> "*" + | Division -> "/" + | And -> "&&" + | Or -> "||" + | Equal -> "=" + | NotEqual -> "<>" + | GreaterThan -> ">" + | LowerThan -> "<" + | GretestEqual -> ">=" + | LowestEqual -> "<=" +;; + +let vars_in_caller_saved_regs env = + Map.to_alist env + |> List.filter_map ~f:(fun (name, loc) -> + match loc with + | Loc_reg r when is_caller_saved r -> Some (name, r) + | _ -> None) +;; + +type call_style = + | Nullary of string + | Curry_chain of + { fname : string + ; arity : int + ; first_args : immediate list + ; rest_args : immediate list + } + | Direct of + { fname : string + ; args : immediate list + } + | Via_apply_nargs of + { fname : string + ; nargs : int + ; args : immediate list + } + +let classify_call ~nargs ~callee_arity_opt ~fname ~args : call_style = + match callee_arity_opt with + | Some 0 when nargs = 1 -> Nullary fname + | Some arity when nargs > arity -> + Curry_chain + { fname + ; arity + ; first_args = List.take args arity + ; rest_args = List.drop args arity + } + | Some arity when nargs = arity -> Direct { fname; args } + | _ -> Via_apply_nargs { fname; nargs; args } +;; From d298d24022ce2b6cdac4e92ec7a6a9e5780bc0b3 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:56:41 +0300 Subject: [PATCH 23/74] add code generator Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/generator.ml | 364 +++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 EML/lib/backend/ricsv/generator.ml diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml new file mode 100644 index 00000000..5a567378 --- /dev/null +++ b/EML/lib/backend/ricsv/generator.ml @@ -0,0 +1,364 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Stdlib.Format +open Frontend.Ast +open Architecture.Riscv_backend +open Analysis +open Generator_state +open Auxillary +open Middleend.Anf + +let alloc_frame_slot = + let modify_frame_offset f = + modify (fun st -> { st with frame_offset = f st.frame_offset }) + in + let* () = modify_frame_offset (fun n -> n + word_size) in + let* st = get in + return (fp, -st.frame_offset) +;; + +let store_reg_into_frame reg = + let* slot = alloc_frame_slot in + let* () = append (sd reg slot) in + return (Loc_mem slot) +;; + +let load_into_reg dst_reg loc = + let instrs = + match loc with + | Loc_reg src_reg when equal_reg src_reg dst_reg -> [] + | Loc_reg src_reg -> mv dst_reg src_reg + | Loc_mem ofs -> ld dst_reg ofs + in + let* () = append instrs in + return () +;; + +let caller_saved_locs env = + Map.to_alist env + |> List.filter_map ~f:(fun (name, loc) -> + match loc with + | Loc_reg r when is_caller_saved r -> Some (name, r) + | _ -> None) +;; + +let spill_caller_saved_vars_to_frame = + let* env = get_env in + let vars = vars_in_caller_saved_regs env in + let frame_bytes = List.length vars * word_size in + let* () = if frame_bytes > 0 then append (addi sp sp (-frame_bytes)) else return () in + let rec spill env = function + | [] -> return env + | (name, r) :: rest -> + let* new_loc = store_reg_into_frame r in + spill (Map.set env ~key:name ~data:new_loc) rest + in + let* new_env = spill env vars in + set_env new_env +;; + +let evacuate_reg dst = + let is_reg_used env r = + Map.exists env ~f:(function + | Loc_reg r' -> equal_reg r r' + | Loc_mem _ -> false) + in + let rewrite_loc_in_env env ~from_reg ~to_loc = + Map.map env ~f:(function + | Loc_reg r when equal_reg r from_reg -> to_loc + | loc -> loc) + in + let* env = get_env in + if not (is_reg_used env dst) + then return () + else ( + match List.find candidate_regs_for_spill ~f:(fun r -> not (is_reg_used env r)) with + | Some new_reg -> + let* () = append (mv new_reg dst) in + let new_env = rewrite_loc_in_env env ~from_reg:dst ~to_loc:(Loc_reg new_reg) in + set_env new_env + | None -> + let* new_loc = store_reg_into_frame dst in + let new_env = rewrite_loc_in_env env ~from_reg:dst ~to_loc:new_loc in + set_env new_env) +;; + +let gen_imm dst = function + | ImmediateConst (ConstInt n) -> append (li dst (tag_int n)) + | ImmediateConst (ConstBool b) -> append (li dst (if b then tag_int 1 else tag_int 0)) + | ImmediateConst (ConstChar c) -> append (li dst (tag_int (Char.to_int c))) + | ImmediateConst (ConstString _) -> fail "String constants not yet supported in codegen" + | ImmediateVar name -> + let* env = get_env in + (match Map.find env name with + | Some loc -> load_into_reg dst loc + | None -> + let* state = get in + (match Map.find state.arity_map name with + | Some 0 -> append (call name) + | Some arity -> + let* () = append (la result_reg name) in + let* () = append (li (List.nth_exn arg_regs 1) arity) in + append (call "alloc_closure") + | _ -> fail ("unbound variable: " ^ name))) +;; + +let copy_result_to dst = + if equal_reg dst result_reg then return () else append (mv dst result_reg) +;; + +let indices_of_args_to_spill state exps = + let is_rewrites_result_regs state = function + | ImmediateConst _ -> false + | ImmediateVar id -> Map.mem state.arity_map id + in + List.filter_mapi exps ~f:(fun i arg -> + if is_rewrites_result_regs state arg then Some i else None) +;; + +let spill_dangerous_args state exps = + let dangerous_idxs = indices_of_args_to_spill state exps in + let spill_slots = List.length dangerous_idxs * word_size in + let* () = if spill_slots > 0 then append (addi sp sp (-spill_slots)) else return () in + List.foldi exps ~init:(return Map.Poly.empty) ~f:(fun i acc arg -> + let* spilled = acc in + if List.mem dangerous_idxs i ~equal:Int.equal + then + let* () = gen_imm result_reg arg in + let* loc = store_reg_into_frame result_reg in + return (Map.set spilled ~key:i ~data:loc) + else return spilled) +;; + +let load_exps_into_regs spilled_locs arg_regs exps = + let n = min (List.length exps) (List.length arg_regs) in + List.foldi (List.take exps n) ~init:(return ()) ~f:(fun i acc arg -> + let* () = acc in + let reg = List.nth_exn arg_regs i in + match Map.find spilled_locs i with + | Some loc -> load_into_reg reg loc + | None -> gen_imm reg arg) +;; + +let push_stack_args stack_args = + let n = List.length stack_args in + if n = 0 + then return 0 + else ( + let stack_bytes = n * word_size in + let* () = append (addi sp sp (-stack_bytes)) in + let* () = + List.foldi stack_args ~init:(return ()) ~f:(fun i acc arg -> + let* () = acc in + let offset = i * word_size in + let* () = gen_imm t0 arg in + append (sd t0 (sp, offset))) + in + return stack_bytes) +;; + +let gen_call_with_regs dst regs args spilled symbol = + let* () = load_exps_into_regs spilled regs args in + let stack_args = List.drop args (List.length regs) in + let* reserved = push_stack_args stack_args in + let* () = append (call symbol) in + let* () = copy_result_to dst in + if reserved > 0 then append (addi sp sp reserved) else return () +;; + +(* let foo = ... in + foo () *) +let gen_nullary dst fname = + let* () = append (call fname) in + copy_result_to dst +;; + +let gen_direct_call dst fname args spilled = + gen_call_with_regs dst arg_regs args spilled fname +;; + +let gen_via_apply_nargs dst fname nargs args spilled = + let* () = gen_imm (List.nth_exn arg_regs 0) (ImmediateVar fname) in + let* () = append (li (List.nth_exn arg_regs 1) nargs) in + let regs = List.drop arg_regs 2 in + gen_call_with_regs dst regs args spilled "apply_nargs" +;; + +let rec gen_invocation dst fname args = + let* () = spill_caller_saved_vars_to_frame in + let* state = get in + let* spilled = spill_dangerous_args state args in + let nargs = List.length args in + let callee_arity_opt = Map.find state.arity_map fname in + let style = classify_call ~nargs ~callee_arity_opt ~fname ~args in + match style with + | Nullary name -> gen_nullary dst name + | Curry_chain { fname = fn; arity; first_args; rest_args } -> + gen_curried_call dst fn arity first_args rest_args + | Direct { fname = fn; args = a } -> gen_direct_call dst fn a spilled + | Via_apply_nargs { fname = fn; nargs = n; args = a } -> + gen_via_apply_nargs dst fn n a spilled + +and gen_curried_call dst fname _arity first_args rest_args = + let fresh_partial_name = + let* id = fresh in + return (Printf.sprintf "part_%d" id) + in + let* part_name = fresh_partial_name in + let* () = + gen_cexpr + dst + (ComplexApp (ImmediateVar fname, List.hd_exn first_args, List.tl_exn first_args)) + in + let* loc = store_reg_into_frame dst in + let* () = modify_env (Map.set ~key:part_name ~data:loc) in + gen_cexpr + dst + (ComplexApp (ImmediateVar part_name, List.hd_exn rest_args, List.tl_exn rest_args)) + +and gen_unit dst = append (li dst (tag_int 0)) +and gen_imm dst imm = gen_imm dst imm + +and gen_neg dst op = + let* () = gen_imm t0 op in + let* () = append (li dst (tag_int 0)) in + append (sub dst dst t0) + +and gen_not dst op = + let* () = gen_imm t0 op in + append (xori dst t0 (tag_int 1)) + +and gen_binop dst op left right = + let* () = gen_imm t0 left in + let* () = gen_imm t1 right in + let* () = evacuate_reg dst in + append (bin_op dst (bin_oper_to_string op) t0 t1) + +and gen_branch dst cond then_e else_e = + let fresh_branch_labels = + let* id = fresh in + return (Printf.sprintf "else_%d" id, Printf.sprintf "end_%d" id) + in + let* () = gen_imm t0 cond in + let* else_lbl, end_lbl = fresh_branch_labels in + let* () = append (beq t0 zero else_lbl) in + let* st_before_then = get in + let frame_before_then = st_before_then.frame_offset in + let* () = gen_anf dst then_e in + let* () = append (j end_lbl) in + let* st_after_then = get in + let* () = + put + { st_before_then with + frame_offset = frame_before_then + ; instr_buffer = st_after_then.instr_buffer + } + in + let* () = append (label else_lbl) in + let* () = gen_anf dst else_e in + append (label end_lbl) + +and gen_app dst fname first rest = gen_invocation dst fname (first :: rest) + +and gen_cexpr dst = function + | ComplexUnit -> gen_unit dst + | ComplexImmediate imm -> gen_imm dst imm + | ComplexUnarOper (Negative, op) -> gen_neg dst op + | ComplexUnarOper (Not, op) -> gen_not dst op + | ComplexBinOper (op, left, right) -> gen_binop dst op left right + | ComplexBranch (cond, then_e, else_e) -> gen_branch dst cond then_e else_e + | ComplexApp (ImmediateVar name, first, rest) -> gen_app dst name first rest + | ComplexApp (_, _, _) -> fail "ComplexApp: function must be a variable" + | ComplexLambda _ | ComplexList _ | ComplexOption _ | ComplexField _ | ComplexTuple _ -> + fail "gen_cexpr: Lambda/List/Option/Tuple not implemented" + +and gen_anf dst = function + | AnfExpr cexp -> gen_cexpr dst cexp + | AnfLet (_, name, rhs, cont) -> + let* () = gen_cexpr result_reg rhs in + let* loc = store_reg_into_frame result_reg in + let* () = modify_env (Map.set ~key:name ~data:loc) in + gen_anf dst cont +;; + +let bind_param_to_reg env i = function + | ImmediateVar name -> + let r = List.nth_exn arg_regs i in + return (Map.set env ~key:name ~data:(Loc_reg r)) + | _ -> fail "unsupported pattern" +;; + +let bind_param_to_stack env i = function + | ImmediateVar name -> + let off = (i + 2) * word_size in + return (Map.set env ~key:name ~data:(Loc_mem (fp, off))) + | _ -> fail "unsupported pattern" +;; + +let flush_instr_buffer ppf = + let get_instr_buffer = + let* st = get in + return st.instr_buffer + in + let clear_instr_buffer = modify (fun st -> { st with instr_buffer = [] }) in + let* buf = get_instr_buffer in + let* () = clear_instr_buffer in + let () = List.iter (List.rev buf) ~f:(fun item -> format_item ppf item) in + return () +;; + +let gen_func func_name params body frame_sz ppf = + fprintf ppf "\n .globl %s\n .type %s, @function\n" func_name func_name; + let args = List.length params in + let params_reg, params_stack = List.split_n params (min args arg_regs_count) in + let env0 = Map.empty (module String) in + let* env = + List.foldi params_reg ~init:(return env0) ~f:(fun i acc p -> + let* e = acc in + bind_param_to_reg e i p) + in + let* env = + List.foldi params_stack ~init:(return env) ~f:(fun i acc p -> + let* e = acc in + bind_param_to_stack e i p) + in + let* () = set_env env in + let* () = append (prologue ~name:func_name ~stack_size:frame_sz) in + let* st = get in + let* () = put { st with frame_offset = 0 } in + let* () = gen_anf result_reg body in + let* () = append (epilogue ~is_main:(String.equal func_name "main")) in + let* () = flush_instr_buffer ppf in + return () +;; + +let gen_program ppf (analysis : analysis_result) = + fprintf ppf ".section .text"; + let base = Config.primitive_arities in + let arity_map = + List.fold base ~init:analysis.arity_map ~f:(fun map prim -> + Map.set map ~key:prim.name ~data:prim.arity) + in + let init = + { frame_offset = 0 + ; fresh_id = 0 + ; arity_map + ; env = Map.empty (module String) + ; instr_buffer = [] + } + in + let comp = + List.fold analysis.functions ~init:(return ()) ~f:(fun acc fn -> + let frame_sz = (2 + fn.slots_count) * word_size in + let* () = acc in + gen_func fn.func_name fn.params fn.body frame_sz ppf) + in + match run comp init with + | Ok ((), _) -> pp_print_flush ppf () + | Error msg -> + Stdlib.Format.eprintf "Codegen error: %s\n%!" msg; + Stdlib.exit 1 +;; From 1d7452ababcb22ea11ae3ce5e0c8d750c8885a8f Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 24 Feb 2026 23:57:31 +0300 Subject: [PATCH 24/74] code generator entry point Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/runner.ml | 11 +++++++++++ EML/lib/backend/ricsv/runner.mli | 5 +++++ 2 files changed, 16 insertions(+) create mode 100644 EML/lib/backend/ricsv/runner.ml create mode 100644 EML/lib/backend/ricsv/runner.mli diff --git a/EML/lib/backend/ricsv/runner.ml b/EML/lib/backend/ricsv/runner.ml new file mode 100644 index 00000000..9b5907c0 --- /dev/null +++ b/EML/lib/backend/ricsv/runner.ml @@ -0,0 +1,11 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Analysis + +let gen_program ppf (program : anf_program) = + let analysis = analyze program in + Generator.gen_program ppf analysis +;; diff --git a/EML/lib/backend/ricsv/runner.mli b/EML/lib/backend/ricsv/runner.mli new file mode 100644 index 00000000..9742e73c --- /dev/null +++ b/EML/lib/backend/ricsv/runner.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program : Format.formatter -> Middleend.Anf.anf_program -> unit From 69b30420b7ddd1a40ed5b06f67a41102710e191d Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 25 Feb 2026 10:03:36 +0300 Subject: [PATCH 25/74] add ppx_deriving.eq Signed-off-by: Victoria Ostrovskaya --- EML/lib/dune | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EML/lib/dune b/EML/lib/dune index 99d5ce41..fb8425e4 100644 --- a/EML/lib/dune +++ b/EML/lib/dune @@ -6,6 +6,6 @@ (modules :standard) (libraries base angstrom) (preprocess - (pps ppx_deriving.show)) + (pps ppx_deriving.show ppx_deriving.eq)) (instrumentation (backend bisect_ppx))) From ba2391af1a587fb71f33a7d28484712b15b8136a Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 25 Feb 2026 10:04:01 +0300 Subject: [PATCH 26/74] update tests for arity and anf_fun_bind Signed-off-by: Victoria Ostrovskaya --- EML/tests/anf_tests.ml | 95 +++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index c897f9cc..113b2361 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -29,7 +29,7 @@ let%expect_test "001.ml" = [%expect {| [(AnfValue (NonRec, - ("recfac", + ("recfac", 1, (AnfExpr (ComplexLambda ([(PatVariable "n")], (AnfLet (NonRec, "anf_t0", @@ -61,45 +61,40 @@ let%expect_test "003occurs.ml" = [%expect {| [(AnfValue (NonRec, - ("fix", + ("fix", 1, (AnfExpr (ComplexLambda ([(PatVariable "f")], - (AnfExpr + (AnfLet (NonRec, "anf_t3", (ComplexLambda ([(PatVariable "x")], - (AnfExpr + (AnfLet (NonRec, "anf_t1", (ComplexLambda ([(PatVariable "f")], - (AnfLet (NonRec, "anf_t1", + (AnfExpr (ComplexApp ((ImmediateVar "x"), - (ImmediateVar "x"), [(ImmediateVar "f")])), - (AnfLet (NonRec, "anf_t3", - (ComplexApp ((ImmediateVar "f"), - (ImmediateVar "anf_t1"), [])), - (AnfExpr - (ComplexLambda ([(PatVariable "x")], - (AnfExpr - (ComplexLambda ([(PatVariable "f")], - (AnfLet (NonRec, "anf_t5", - (ComplexApp ((ImmediateVar "x"), - (ImmediateVar "x"), - [(ImmediateVar "f")])), - (AnfLet (NonRec, "anf_t7", - (ComplexApp ( - (ImmediateVar "f"), - (ImmediateVar "anf_t5"), - [])), - (AnfExpr - (ComplexApp ( - (ImmediateVar "anf_t3"), - (ImmediateVar "anf_t7"), - []))) - )) - )) - ))) - ))) - )) - )) - ))) - ))) + (ImmediateVar "x"), [(ImmediateVar "f")]))) + )), + (AnfExpr + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t1"), []))) + )) + )), + (AnfLet (NonRec, "anf_t7", + (ComplexLambda ([(PatVariable "x")], + (AnfLet (NonRec, "anf_t5", + (ComplexLambda ([(PatVariable "f")], + (AnfExpr + (ComplexApp ((ImmediateVar "x"), + (ImmediateVar "x"), [(ImmediateVar "f")]))) + )), + (AnfExpr + (ComplexApp ((ImmediateVar "f"), + (ImmediateVar "anf_t5"), []))) + )) + )), + (AnfExpr + (ComplexApp ((ImmediateVar "anf_t3"), + (ImmediateVar "anf_t7"), []))) + )) + )) )))), [])) ]|}] @@ -110,8 +105,8 @@ let%expect_test "004let_poly.ml" = [%expect {| [(AnfValue (NonRec, - ("temp", - (AnfExpr + ("temp", 0, + (AnfLet (NonRec, "anf_t3", (ComplexLambda ([(PatVariable "f")], (AnfLet (NonRec, "anf_t0", (ComplexApp ((ImmediateVar "f"), (ImmediateConst (ConstInt 1)), @@ -119,22 +114,20 @@ let%expect_test "004let_poly.ml" = (AnfLet (NonRec, "anf_t1", (ComplexApp ((ImmediateVar "f"), (ImmediateConst (ConstBool true)), [])), - (AnfLet (NonRec, "anf_t3", + (AnfExpr (ComplexTuple ((ImmediateVar "anf_t0"), - (ImmediateVar "anf_t1"), [])), - (AnfExpr - (ComplexLambda ([(PatVariable "x")], - (AnfLet (NonRec, "anf_t4", - (ComplexImmediate (ImmediateVar "x")), - (AnfExpr - (ComplexApp ((ImmediateVar "anf_t3"), - (ImmediateVar "anf_t4"), []))) - )) - ))) - )) + (ImmediateVar "anf_t1"), []))) )) )) - )))), + )), + (AnfLet (NonRec, "anf_t4", + (ComplexLambda ([(PatVariable "x")], + (AnfExpr (ComplexImmediate (ImmediateVar "x"))))), + (AnfExpr + (ComplexApp ((ImmediateVar "anf_t3"), (ImmediateVar "anf_t4"), + []))) + )) + ))), [])) ]|}] ;; @@ -144,7 +137,7 @@ let%expect_test "002if.ml" = [%expect {| [(AnfValue (NonRec, - ("main", + ("main", 0, (AnfExpr (ComplexBranch ((ImmediateConst (ConstBool true)), (AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 1)))), From ab59721b0bdeeb238187b7ee9f2c8d9f22f970e2 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 25 Feb 2026 10:04:30 +0300 Subject: [PATCH 27/74] add riscv backend tests Signed-off-by: Victoria Ostrovskaya --- EML/tests/riscv_tests.ml | 538 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 538 insertions(+) create mode 100644 EML/tests/riscv_tests.ml diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml new file mode 100644 index 00000000..d60b0504 --- /dev/null +++ b/EML/tests/riscv_tests.ml @@ -0,0 +1,538 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(** RISC-V codegen tests. *) + +open EML_lib +open Frontend.Parser +open Middleend.Anf + +let compile src : string = + match parse src with + | Error e -> "Parse error: " ^ e + | Ok ast -> + (match anf_program ast with + | Error e -> "ANF error: " ^ e + | Ok anf -> + let buf = Buffer.create 1024 in + let ppf = Format.formatter_of_buffer buf in + Backend.Ricsv.Runner.gen_program ppf anf; + Format.pp_print_flush ppf (); + Buffer.contents buf) +;; + +let run src = Format.printf "%s" (compile src) + +let%expect_test "unary_minus" = + run "let x = -5"; + [%expect + {| +.section .text + .globl x + .type x, @function +x: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li t0, 11 + li a0, 1 + sub a0, a0, t0 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret +|}] +;; + +let%expect_test "unary_not" = + run "let x = not true"; + [%expect + {| +.section .text + .globl x + .type x, @function +x: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li t0, 3 + xori a0, t0, 3 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret +|}] +;; + +let%expect_test "unit_main" = + run "let main = ()"; + [%expect + {| +.section .text + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "mul_only" = + run "let main = 7 * 8"; + [%expect + {| +.section .text + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li t0, 15 + li t1, 17 + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "double_fn" = + run + {| + let double x = x + x + let main = double 21 + |}; + [%expect + {| +.section .text + .globl double + .type double, @function +double: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + mv t0, a0 + mv t1, a0 + mv a1, a0 + add a0, t0, t1 + addi a0, a0, -1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 43 + call double + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "abs_fn" = + run + {| + let abs x = if x < 0 then -x else x + let main = abs 7 + |}; + [%expect + {| +.section .text + .globl abs + .type abs, @function +abs: + addi sp, sp, -24 + sd ra, 16(sp) + sd fp, 8(sp) + addi fp, sp, 8 + mv t0, a0 + li t1, 1 + mv a1, a0 + slt a0, t0, t1 + sd a0, -8(fp) + ld t0, -8(fp) + beq t0, zero, else_0 + mv t0, a1 + li a0, 1 + sub a0, a0, t0 + j end_0 +else_0: + mv a0, a1 +end_0: + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 15 + call abs + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "nested_calls" = + run + {| + let sq x = x * x + let sum_of_squares a b = sq a + sq b + let main = sum_of_squares 3 4 + |}; + [%expect + {| +.section .text + .globl sq + .type sq, @function +sq: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + mv t0, a0 + mv t1, a0 + mv a1, a0 + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl sum_of_squares + .type sum_of_squares, @function +sum_of_squares: + addi sp, sp, -32 + sd ra, 24(sp) + sd fp, 16(sp) + addi fp, sp, 16 + addi sp, sp, -16 + sd a0, -8(fp) + sd a1, -16(fp) + ld a0, -8(fp) + call sq + sd a0, -24(fp) + ld a0, -16(fp) + call sq + sd a0, -32(fp) + ld t0, -24(fp) + ld t1, -32(fp) + add a0, t0, t1 + addi a0, a0, -1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 7 + li a1, 9 + call sum_of_squares + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "fibonacci" = + run + {| + let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) + let main = fib 6 + |}; + [%expect + {| +.section .text + .globl fib + .type fib, @function +fib: + addi sp, sp, -56 + sd ra, 48(sp) + sd fp, 40(sp) + addi fp, sp, 40 + mv t0, a0 + li t1, 5 + mv a1, a0 + slt a0, t0, t1 + sd a0, -8(fp) + ld t0, -8(fp) + beq t0, zero, else_0 + li a0, 3 + j end_0 +else_0: + mv t0, a1 + li t1, 3 + sub a0, t0, t1 + addi a0, a0, 1 + sd a0, -16(fp) + addi sp, sp, -8 + sd a1, -24(fp) + ld a0, -16(fp) + call fib + sd a0, -32(fp) + ld t0, -24(fp) + li t1, 5 + sub a0, t0, t1 + addi a0, a0, 1 + sd a0, -40(fp) + ld a0, -40(fp) + call fib + sd a0, -48(fp) + ld t0, -32(fp) + ld t1, -48(fp) + add a0, t0, t1 + addi a0, a0, -1 +end_0: + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 13 + call fib + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "is_positive" = + run + {| + let is_positive n = n > 0 + let main = is_positive 42 + |}; + [%expect + {| +.section .text + .globl is_positive + .type is_positive, @function +is_positive: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + mv t0, a0 + li t1, 1 + mv a1, a0 + slt a0, t1, t0 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 85 + call is_positive + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "mul3" = + run + {| + let mul3 a b c = a * b * c + let main = mul3 2 3 4 + |}; + [%expect + {| +.section .text + .globl mul3 + .type mul3, @function +mul3: + addi sp, sp, -24 + sd ra, 16(sp) + sd fp, 8(sp) + addi fp, sp, 8 + mv t0, a0 + mv t1, a1 + mv a3, a0 + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + sd a0, -8(fp) + ld t0, -8(fp) + mv t1, a2 + srli t0, t0, 1 + addi t1, t1, -1 + mul a0, t0, t1 + addi a0, a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function +main: + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 0 + li a0, 5 + li a1, 7 + li a2, 9 + call mul3 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; + +let%expect_test "test1" = + run + {| + let large x = if 0<>x then print_int 0 else print_int 1 + let main = + let x = if (if (if 0 + then 0 else (let t42 = print_int 42 in 1)) + then 0 else 1) + then 0 else 1 in + large x + |}; + [%expect + {| + .section .text + .globl large + .type large, @function + large: + addi sp, sp, -24 + sd ra, 16(sp) + sd fp, 8(sp) + addi fp, sp, 8 + li t0, 1 + mv t1, a0 + mv a1, a0 + xor a0, t0, t1 + snez a0, a0 + sd a0, -8(fp) + ld t0, -8(fp) + beq t0, zero, else_0 + addi sp, sp, -8 + sd a1, -16(fp) + li a0, 1 + call print_int + j end_0 + else_0: + addi sp, sp, -8 + sd a1, -16(fp) + li a0, 3 + call print_int + end_0: + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function + main: + addi sp, sp, -48 + sd ra, 40(sp) + sd fp, 32(sp) + addi fp, sp, 32 + li t0, 1 + beq t0, zero, else_1 + li a0, 1 + j end_1 + else_1: + li a0, 85 + call print_int + sd a0, -8(fp) + li a0, 3 + end_1: + sd a0, -16(fp) + ld t0, -16(fp) + beq t0, zero, else_2 + li a0, 1 + j end_2 + else_2: + li a0, 3 + end_2: + sd a0, -24(fp) + ld t0, -24(fp) + beq t0, zero, else_3 + li a0, 1 + j end_3 + else_3: + li a0, 3 + end_3: + sd a0, -32(fp) + ld a0, -32(fp) + call large + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret +|}] +;; From d83a985816a8861ffe70dda79acd12b0c87cf3ed Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 25 Feb 2026 12:28:13 +0300 Subject: [PATCH 28/74] rewrite gen_via_apply_nargs and fix bug with inf recursion Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/generator.ml | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 5a567378..9422cd3b 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -181,10 +181,26 @@ let gen_direct_call dst fname args spilled = ;; let gen_via_apply_nargs dst fname nargs args spilled = - let* () = gen_imm (List.nth_exn arg_regs 0) (ImmediateVar fname) in - let* () = append (li (List.nth_exn arg_regs 1) nargs) in - let regs = List.drop arg_regs 2 in - gen_call_with_regs dst regs args spilled "apply_nargs" + let argv_bytes = nargs * word_size in + let* () = gen_imm a0 (ImmediateVar fname) in + let* () = append (li a1 nargs) in + let* () = append (addi sp sp (-argv_bytes)) in + let* () = + List.foldi args ~init:(return ()) ~f:(fun i acc arg -> + let* () = acc in + let offset = i * word_size in + let src = + match Map.find spilled i with + | Some loc -> load_into_reg t0 loc + | None -> gen_imm t0 arg + in + let* () = src in + append (sd t0 (sp, offset))) + in + let* () = append (mv a2 sp) in + let* () = append (call "eml_applyN") in + let* () = copy_result_to dst in + append (addi sp sp argv_bytes) ;; let rec gen_invocation dst fname args = @@ -220,7 +236,6 @@ and gen_curried_call dst fname _arity first_args rest_args = (ComplexApp (ImmediateVar part_name, List.hd_exn rest_args, List.tl_exn rest_args)) and gen_unit dst = append (li dst (tag_int 0)) -and gen_imm dst imm = gen_imm dst imm and gen_neg dst op = let* () = gen_imm t0 op in From e41cd5933f52b69179e0bf90420e2094fb484d10 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sat, 28 Feb 2026 21:28:23 +0300 Subject: [PATCH 29/74] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20runtime?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- EML/bin/dune | 2 +- EML/bin/main.ml | 52 +++++++++-- EML/bin/runtime.c | 10 -- EML/lib/backend/ricsv/config.ml | 6 +- EML/lib/backend/ricsv/generator.ml | 2 +- EML/lib/runtime/dune | 7 ++ EML/lib/runtime/runtime.c | 143 +++++++++++++++++++++++++++++ EML/tests/dune | 7 ++ EML/tests/riscv.t | 22 +++++ EML/tests/riscv_tests.ml | 114 +++++++++++++++++++++++ 10 files changed, 345 insertions(+), 20 deletions(-) delete mode 100644 EML/bin/runtime.c create mode 100644 EML/lib/runtime/dune create mode 100644 EML/lib/runtime/runtime.c create mode 100644 EML/tests/riscv.t diff --git a/EML/bin/dune b/EML/bin/dune index 5e119f12..57cdf378 100644 --- a/EML/bin/dune +++ b/EML/bin/dune @@ -19,4 +19,4 @@ (cram (package EML) - (deps ./main.exe runtime.c)) + (deps ./main.exe)) diff --git a/EML/bin/main.ml b/EML/bin/main.ml index 9fc57907..c52db13f 100644 --- a/EML/bin/main.ml +++ b/EML/bin/main.ml @@ -3,13 +3,51 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) open EML_lib.Frontend.Parser -open EML_lib.Frontend.Ast -open Printf +open EML_lib.Middleend.Anf +open EML_lib.Backend.Ricsv.Runner -let parse_program input = - match parse input with - | Ok ast -> printf "%s\n" (show_program ast) - | Error fail -> printf "Ошибка: %s\n" fail +let compile_to_asm src out_ppf = + match parse src with + | Error e -> + Printf.eprintf "Parse error: %s\n%!" e; + exit 1 + | Ok ast -> + (match anf_program ast with + | Error e -> + Printf.eprintf "ANF error: %s\n%!" e; + exit 1 + | Ok anf -> gen_program out_ppf anf) ;; -let () = parse_program "let x = 1 + 2" +let () = + let args = Sys.argv in + let nargs = Array.length args in + (* Parse command line: [EML.exe] [-fromfile ] [-o ] *) + let src_file = ref None in + let out_file = ref None in + let i = ref 1 in + while !i < nargs do + (match args.(!i) with + | "-fromfile" -> + incr i; + if !i < nargs then src_file := Some args.(!i) + | "-o" -> + incr i; + if !i < nargs then out_file := Some args.(!i) + | _ -> ()); + incr i + done; + let src = + match !src_file with + | Some path -> In_channel.(with_open_text path input_all) + | None -> In_channel.input_all stdin + in + match !out_file with + | None -> compile_to_asm src Format.std_formatter + | Some path -> + let oc = open_out path in + let ppf = Format.formatter_of_out_channel oc in + compile_to_asm src ppf; + Format.pp_print_flush ppf (); + close_out oc +;; diff --git a/EML/bin/runtime.c b/EML/bin/runtime.c deleted file mode 100644 index 9c00aa03..00000000 --- a/EML/bin/runtime.c +++ /dev/null @@ -1,10 +0,0 @@ -/* Copyright 2023-2024, Kakadu and contributors */ -/* SPDX-License-Identifier: LGPL-3.0-or-later */ - -#include -#include - -void print_int(int64_t n) { - putchar(n); - fflush(stdout); -} \ No newline at end of file diff --git a/EML/lib/backend/ricsv/config.ml b/EML/lib/backend/ricsv/config.ml index ce535dd6..35187904 100644 --- a/EML/lib/backend/ricsv/config.ml +++ b/EML/lib/backend/ricsv/config.ml @@ -10,5 +10,9 @@ type primitive = } let primitive_arities : primitive list = - [ { name = "print_int"; arity = 1 }; { name = "print_endline"; arity = 1 } ] + [ { name = "print_int"; arity = 1 } + ; { name = "print_endline"; arity = 1 } + ; { name = "alloc_closure"; arity = 2 } + ; { name = "eml_applyN"; arity = 3 } + ] ;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 9422cd3b..7e60f65f 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -376,4 +376,4 @@ let gen_program ppf (analysis : analysis_result) = | Error msg -> Stdlib.Format.eprintf "Codegen error: %s\n%!" msg; Stdlib.exit 1 -;; +;; \ No newline at end of file diff --git a/EML/lib/runtime/dune b/EML/lib/runtime/dune new file mode 100644 index 00000000..aba9047d --- /dev/null +++ b/EML/lib/runtime/dune @@ -0,0 +1,7 @@ +(rule + (targets rv64_runtime.a) + (deps runtime.c) + (action + (progn + (run riscv64-linux-gnu-gcc -march=rv64gc -mabi=lp64d -O2 -c %{dep:runtime.c} -o rv64_runtime.o) + (run riscv64-linux-gnu-ar rcs %{targets} rv64_runtime.o)))) diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/runtime.c new file mode 100644 index 00000000..caad3b6a --- /dev/null +++ b/EML/lib/runtime/runtime.c @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TO_ML_INTEGER(n) ((uint64_t)((uint64_t)(n) >> 1)) + +void print_int(long n) { printf("%ld", TO_ML_INTEGER(n)); } + +#define TAG_CLOSURE 247 +#define RISCV_REG_ARGS 8 + + + +static void *eml_alloc(size_t size_in_bytes, uint64_t tag) { +#ifdef ENABLE_GC + uint64_t size_in_words = + ((uint64_t)size_in_bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); + return gc_alloc(size_in_words, tag); +#else + return malloc(size_in_bytes); +#endif +} + +typedef struct { + void *code; + int64_t arity; + int64_t received; + void *args[]; +} closure; + + +closure *alloc_closure(void *code, int64_t arity) { + size_t size_in_bytes = sizeof(closure) + arity * sizeof(void *); + + closure *c = (closure *)eml_alloc(size_in_bytes, TAG_CLOSURE); + + c->code = code; + c->arity = arity; + c->received = 0; + + memset(c->args, 0, sizeof(void *) * arity); + return c; +} + +static closure *copy_closure(const closure *src) { + size_t total_size = sizeof(closure) + src->arity * sizeof(void *); + + closure *dst = (closure *)eml_alloc(total_size, TAG_CLOSURE); + + memcpy(dst, src, total_size); + return dst; +} + + +static void *call_closure_full(closure *c, void **args) { + int64_t arity = c->arity; + int64_t args_in_stack = (arity > RISCV_REG_ARGS) ? (arity - RISCV_REG_ARGS) : 0; + size_t storage_for_stack_args = (size_t)args_in_stack * sizeof(void *); + void **stack_args = (args_in_stack > 0) ? args + RISCV_REG_ARGS : NULL; + void *result; + + asm volatile( + "mv t0, %[storage_for_stack_args]\n" + "sub sp, sp, t0\n" + + "beqz %[args_in_stack], .Lend_stack_push\n" + "mv t1, sp\n" + "mv t2, %[stack_args]\n" + "mv t3, %[args_in_stack]\n" + "li t4, 0\n" + ".Lloop_stack_push:\n" + "beq t4, t3, .Lend_stack_push\n" + "slli t5, t4, 3\n" + "add t6, t2, t5\n" + "ld t0, 0(t6)\n" + "sd t0, 0(t1)\n" + "addi t1, t1, 8\n" + "addi t4, t4, 1\n" + "j .Lloop_stack_push\n" + ".Lend_stack_push:\n" + + "mv a0, %[a0]\n" + "mv a1, %[a1]\n" + "mv a2, %[a2]\n" + "mv a3, %[a3]\n" + "mv a4, %[a4]\n" + "mv a5, %[a5]\n" + "mv a6, %[a6]\n" + "mv a7, %[a7]\n" + + "mv t6, %[fn]\n" + "jalr ra, t6, 0\n" + + "mv t0, %[storage_for_stack_args]\n" + "add sp, sp, t0\n" + "mv %[result], a0\n" + + : [result] "=r"(result) + : [fn] "r"(c->code), + [a0] "r"(args[0]), [a1] "r"(args[1]), + [a2] "r"(args[2]), [a3] "r"(args[3]), + [a4] "r"(args[4]), [a5] "r"(args[5]), + [a6] "r"(args[6]), [a7] "r"(args[7]), + [stack_args] "r"(stack_args), [args_in_stack] "r"(args_in_stack), + [storage_for_stack_args] "r"(storage_for_stack_args) + : "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "memory"); + + return result; +} + +void *eml_applyN(closure *c, int64_t argc, void **argv) { + + int64_t all_receive_args = c->received + argc; + + if (all_receive_args == c->arity) { + int64_t total_count_args = c->arity; + void **args = (void **)eml_alloc(total_count_args * sizeof(void *), TAG_CLOSURE); + + for (int64_t i = 0; i < c->received; i++) { + args[i] = c->args[i]; + } + for (int64_t i = 0; i < argc; i++) { + args[c->received + i] = argv[i]; + } + return call_closure_full(c, args); + } + + closure *partial = copy_closure(c); + + for (int64_t i = 0; i < argc; i++) { + partial->args[partial->received++] = argv[i]; + } + + return partial; +} diff --git a/EML/tests/dune b/EML/tests/dune index 0572289f..fce0abe0 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -7,3 +7,10 @@ (inline_tests) (instrumentation (backend bisect_ppx))) + +(cram + (applies_to riscv) + (deps + (package EML) + (file ../lib/runtime/rv64_runtime.a) + (source_tree manytests))) diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t new file mode 100644 index 00000000..d777534e --- /dev/null +++ b/EML/tests/riscv.t @@ -0,0 +1,22 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + $ EML -o fact.s < let rec fac x = if x = 0 then 1 else x * fac (x - 1) + > + > let main = print_int (fac 4) + + $ riscv64-linux-gnu-as -march=rv64gc fact.s -o fact.o + $ riscv64-linux-gnu-gcc fact.o ../lib/runtime/rv64_runtime.a -o fact.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./fact.exe + 24 + + $ EML -o fibo.s < let rec fib x = if x <= 1 then x else fib (x - 1) + fib (x - 2) + > + > let main = print_int (fib 6) + + $ riscv64-linux-gnu-as -march=rv64gc fibo.s -o fibo.o + $ riscv64-linux-gnu-gcc fibo.o ../lib/runtime/rv64_runtime.a -o fibo.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./fibo.exe + 8 diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml index d60b0504..737f7626 100644 --- a/EML/tests/riscv_tests.ml +++ b/EML/tests/riscv_tests.ml @@ -536,3 +536,117 @@ let%expect_test "test1" = ret |}] ;; + + +let%expect_test "codegen closure fn with 10 arg" = + run + {| + let add a b c d e f g = a + b + c + d + e + f + g + + let main = + let temp1 = add 1 1 1 1 in + let temp2 = temp1 1 1 in + let temp3 = temp2 1 1 in + print_int temp3 + ;; + |}; + [%expect + {| + .section .text + .globl add + .type add, @function + add: + addi sp, sp, -56 + sd ra, 48(sp) + sd fp, 40(sp) + addi fp, sp, 40 + mv t0, a0 + mv t1, a1 + mv a7, a0 + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -8(fp) + ld t0, -8(fp) + mv t1, a2 + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -16(fp) + ld t0, -16(fp) + mv t1, a3 + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -24(fp) + ld t0, -24(fp) + mv t1, a4 + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -32(fp) + ld t0, -32(fp) + mv t1, a5 + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -40(fp) + ld t0, -40(fp) + mv t1, a6 + add a0, t0, t1 + addi a0, a0, -1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + ret + + .globl main + .type main, @function + main: + addi sp, sp, -40 + sd ra, 32(sp) + sd fp, 24(sp) + addi fp, sp, 24 + la a0, add + li a1, 7 + call alloc_closure + li a1, 4 + addi sp, sp, -32 + li t0, 3 + sd t0, 0(sp) + li t0, 3 + sd t0, 8(sp) + li t0, 3 + sd t0, 16(sp) + li t0, 3 + sd t0, 24(sp) + mv a2, sp + call eml_applyN + addi sp, sp, 32 + sd a0, -8(fp) + ld a0, -8(fp) + li a1, 2 + addi sp, sp, -16 + li t0, 3 + sd t0, 0(sp) + li t0, 3 + sd t0, 8(sp) + mv a2, sp + call eml_applyN + addi sp, sp, 16 + sd a0, -16(fp) + ld a0, -16(fp) + li a1, 2 + addi sp, sp, -16 + li t0, 3 + sd t0, 0(sp) + li t0, 3 + sd t0, 8(sp) + mv a2, sp + call eml_applyN + addi sp, sp, 16 + sd a0, -24(fp) + ld a0, -24(fp) + call print_int + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret + |}] +;; From 3c237009ef5473c45b8516bf451fd449ca0b7593 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sat, 7 Mar 2026 18:56:56 +0300 Subject: [PATCH 30/74] update riscv Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/analysis.ml | 104 +++++++--- EML/lib/backend/ricsv/architecture.ml | 59 +++--- EML/lib/backend/ricsv/auxillary.ml | 67 +++++-- EML/lib/backend/ricsv/config.ml | 18 -- EML/lib/backend/ricsv/generator.ml | 238 ++++++++++++++--------- EML/lib/backend/ricsv/generator_state.ml | 152 +++++++++------ EML/lib/backend/ricsv/runner.ml | 4 +- EML/lib/frontend/ast.ml | 1 + EML/lib/frontend/binutils.ml | 19 ++ 9 files changed, 404 insertions(+), 258 deletions(-) delete mode 100644 EML/lib/backend/ricsv/config.ml create mode 100644 EML/lib/frontend/binutils.ml diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index fdfb5dbc..07d3a279 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -2,20 +2,21 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open Base open Frontend.Ast open Middleend.Anf type function_layout = { func_name : string + ; asm_name : string ; params : immediate list ; body : anf_expr ; slots_count : int } type analysis_result = - { arity_map : (string, int, String.comparator_witness) Map.t + { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t ; functions : function_layout list + ; resolve : int -> string -> (string * int) option } let rec slots_in_imm = function @@ -31,15 +32,21 @@ and slots_in_cexpr = function acc + slots_in_imm e) | ComplexField (imm, _) -> slots_in_imm imm | ComplexList imm_list -> - List.fold_left imm_list ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) + let n = List.length imm_list in + n + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 imm_list | ComplexApp (first, second, rest) -> - List.fold_left (first :: second :: rest) ~init:0 ~f:(fun acc e -> - acc + slots_in_imm e) + (* +1 for curried-call intermediate; +1 per arg for spill_dangerous_args. + +8 for spill_caller_saved_vars_to_frame at start of every invocation (can spill a0-a7). + +N when nargs >= 2: margin so partial stays above argv (confirmed: overwrite → eml_applyN gets c=0x3). *) + let args = first :: second :: rest in + let nargs = List.length args in + let extra = if nargs >= 2 then 12 else 0 in + 1 + 8 + nargs + extra + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 args | ComplexOption None -> 0 | ComplexOption (Some imm) -> slots_in_imm imm | ComplexLambda (_, body) -> slots_in_anf body | ComplexBranch (cond, then_e, else_e) -> - slots_in_imm cond + slots_in_anf then_e + slots_in_anf else_e + 1 + slots_in_imm cond + slots_in_anf then_e + slots_in_anf else_e and slots_in_anf = function | AnfExpr cexp -> slots_in_cexpr cexp @@ -49,9 +56,11 @@ and slots_in_anf = function let rec params_of_anf = function | AnfExpr (ComplexLambda (pats, body)) -> let imms = - List.filter_map pats ~f:(function - | PatVariable id -> Some (ImmediateVar id) - | _ -> None) + List.filter_map + (function + | PatVariable id -> Some (ImmediateVar id) + | _ -> None) + pats in let rest, inner = params_of_anf body in imms @ rest, inner @@ -59,25 +68,74 @@ let rec params_of_anf = function ;; let arity_map_of_program (program : anf_program) = - List.fold + List.fold_left + (fun map -> function + | AnfValue (_, (fid, arity, _), and_binds) -> + let map = Base.Map.set map ~key:fid ~data:arity in + List.fold_left + (fun acc (id, arity, _) -> Base.Map.set acc ~key:id ~data:arity) + map + and_binds + | _ -> map) + (Base.Map.empty (module Base.String)) program - ~init:(Map.empty (module String)) - ~f:(fun map -> function - | AnfValue (_, (fid, arity, _), and_binds) -> - let map = Map.set map ~key:fid ~data:arity in - List.fold and_binds ~init:map ~f:(fun acc (id, arity, _) -> - Map.set acc ~key:id ~data:arity) - | _ -> map) ;; let analyze (program : anf_program) = let arity_map = arity_map_of_program program in + let raw = + List.filter_map + (function + | AnfValue (_, (func_name, _arity, body), _) -> + let params, body = params_of_anf body in + Some (func_name, params, body, slots_in_anf body) + | AnfEval _ -> None) + program + in + let counts = ref (Base.Map.empty (module Base.String)) in + let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in + let asm_name name = + let base = mangle_reserved name in + let n = Base.Map.find !counts name |> Option.value ~default:0 in + counts := Base.Map.set !counts ~key:name ~data:(n + 1); + if n = 0 then base else base ^ "_" ^ Int.to_string n + in let functions = - List.filter_map program ~f:(function - | AnfValue (_, (func_name, _arity, body), _) -> - let params, body = params_of_anf body in - Some { func_name; params; body; slots_count = slots_in_anf body } - | AnfEval _ -> None) + List.map + (fun (func_name, params, body, slots_count) -> + { func_name; asm_name = asm_name func_name; params; body; slots_count }) + raw + in + let has_main = List.exists (fun fn -> String.equal fn.func_name "main") functions in + let functions = + if has_main + then functions + else ( + let synthetic_main = + { func_name = "main" + ; asm_name = "main" + ; params = [] + ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) + ; slots_count = 0 + } + in + functions @ [ synthetic_main ]) + in + let arity_map = + if has_main then arity_map else Base.Map.set arity_map ~key:"main" ~data:0 + in + let resolver func_index var_name = + let rec find i = + if i < 0 + then None + else ( + match Base.List.nth functions i with + | None -> None + | Some fn when String.equal fn.func_name var_name -> + Some (fn.asm_name, List.length fn.params) + | Some _ -> find (i - 1)) + in + find (func_index - 1) in - { arity_map; functions } + { arity_map; functions; resolve = resolver } ;; diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index fe7ecf96..b10ec378 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -2,9 +2,7 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -(** RISC-V: ISA, platform config, and codegen API in one module. *) - -open Base +open Format module Riscv_backend = struct type reg = @@ -18,10 +16,6 @@ module Riscv_backend = struct type offset = reg * int - type location = - | Loc_reg of reg - | Loc_mem of offset - type instr = | Addi of reg * reg * int (* сложение с константой: rd = rs + imm *) | Ld of reg * offset (* загрузка 8 байт из памяти: rd = mem[base + offset] *) @@ -44,11 +38,10 @@ module Riscv_backend = struct | Xori of reg * reg * int (* xor регистра с константой: rd = rs ^ imm *) | Xor of reg * reg * reg (* xor двух регистров: rd = rs1 ^ rs2 *) | Mul of reg * reg * reg (* умножение: rd = rs1 * rs2 *) + | Div of reg * reg * reg (* целочисленное деление: rd = rs1 / rs2 *) | Srli of reg * reg * int (* логический сдвиг вправо на константу: rd = rs >>> imm *) - let pp_reg ppf = - let open Stdlib.Format in - function + let pp_reg ppf = function | Zero -> fprintf ppf "zero" | RA -> fprintf ppf "ra" | SP -> fprintf ppf "sp" @@ -58,17 +51,14 @@ module Riscv_backend = struct | S n -> fprintf ppf "s%d" n ;; - let pp_offset ppf offset = - Stdlib.Format.fprintf ppf "%d(%a)" (snd offset) pp_reg (fst offset) - ;; + let pp_offset ppf offset = fprintf ppf "%d(%a)" (snd offset) pp_reg (fst offset) - let pp_instr ppf = - let open Stdlib.Format in - function + let pp_instr ppf = function | Addi (rd, rs, imm) -> fprintf ppf "addi %a, %a, %d" pp_reg rd pp_reg rs imm | Add (rd, rs1, rs2) -> fprintf ppf "add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 | Sub (rd, rs1, rs2) -> fprintf ppf "sub %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 | Mul (rd, rs1, rs2) -> fprintf ppf "mul %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Div (rd, rs1, rs2) -> fprintf ppf "div %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 | Srli (rd, rs1, imm) -> fprintf ppf "srli %a, %a, %d" pp_reg rd pp_reg rs1 imm | Xori (rd, rs1, imm) -> fprintf ppf "xori %a, %a, %d" pp_reg rd pp_reg rs1 imm | Xor (rd, rs1, rs2) -> fprintf ppf "xor %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 @@ -102,23 +92,7 @@ module Riscv_backend = struct let a7 = A 7 let t0 = T 0 let t1 = T 1 - - (* ----- Platform (RISC-V layout) ----- *) - let arg_regs = [ a0; a1; a2; a3; a4; a5; a6; a7 ] - let candidate_regs_for_spill = arg_regs - let arg_regs_count = 8 - let word_size = 8 - let frame_header_size = 2 * word_size - let saved_fp_offset = 0 - let saved_ra_offset = word_size let result_reg = a0 - - let is_caller_saved = function - | A _ | T _ -> true - | Zero | RA | SP | S _ -> false - ;; - - (* *) let addi rd rs imm = [ Addi (rd, rs, imm) ] let ld rd ofs = [ Ld (rd, ofs) ] let sd rs ofs = [ Sd (rs, ofs) ] @@ -138,8 +112,23 @@ module Riscv_backend = struct let xori rd rs imm = [ Xori (rd, rs, imm) ] let xor rd rs1 rs2 = [ Xor (rd, rs1, rs2) ] let mul rd rs1 rs2 = [ Mul (rd, rs1, rs2) ] + let div rd rs1 rs2 = [ Div (rd, rs1, rs2) ] let srli rd rs imm = [ Srli (rd, rs, imm) ] let add_tag_items dst delta = [ Addi (dst, dst, delta) ] + let arg_regs = [ a0; a1; a2; a3; a4; a5; a6; a7 ] + let candidate_regs_for_spill = arg_regs + let arg_regs_count = 8 + let word_size = 8 + + (* RISC-V ABI: stack must be 16-byte aligned at call boundaries *) + let stack_align = 16 + let frame_header_size = 2 * word_size + let saved_fp_offset = 0 + let saved_ra_offset = word_size + + type location = + | Loc_reg of reg + | Loc_mem of offset let prologue ~name ~stack_size = let ra_slot = sp, stack_size - saved_ra_offset in @@ -165,8 +154,8 @@ module Riscv_backend = struct let format_item ppf i = (match i with - | Label _ -> Stdlib.Format.fprintf ppf "%a" pp_instr i - | _ -> Stdlib.Format.fprintf ppf " %a" pp_instr i); - Stdlib.Format.fprintf ppf "\n" + | Label _ -> fprintf ppf "%a" pp_instr i + | _ -> fprintf ppf " %a" pp_instr i); + fprintf ppf "\n" ;; end diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml index 90a9e966..889de3f1 100644 --- a/EML/lib/backend/ricsv/auxillary.ml +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -2,33 +2,46 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open Base -open Frontend.Ast -open Architecture.Riscv_backend open Middleend.Anf +open Architecture +open Riscv_backend +open Generator_state +open Frontend.Ast + +let is_caller_saved = function + | A _ | T _ -> true + | Zero | RA | SP | S _ -> false +;; + +let to_tagged_bool dst = add dst dst dst @ add_tag_items dst 1 let compare_ordering dst r1 r2 ~invert = let base = slt dst r1 r2 in - if invert then base @ xori dst dst 1 else base + let tagged = if invert then base @ xori dst dst 1 else base in + tagged @ to_tagged_bool dst ;; let compare_eq_ne dst r1 r2 ~is_eq = let base = xor dst r1 r2 in - if is_eq then base @ seqz dst dst else base @ snez dst dst + let tagged = if is_eq then base @ seqz dst dst else base @ snez dst dst in + tagged @ to_tagged_bool dst ;; -let bin_op dst op r1 r2 = +let bin_op dst op r1 r2 : (instr list, string) result = match op with - | "+" -> add dst r1 r2 @ add_tag_items dst (-1) - | "-" -> sub dst r1 r2 @ add_tag_items dst 1 - | "*" -> srli r1 r1 1 @ addi r2 r2 (-1) @ mul dst r1 r2 @ add_tag_items dst 1 - | "<" -> compare_ordering dst r1 r2 ~invert:false - | ">" -> compare_ordering dst r2 r1 ~invert:false - | "<=" -> compare_ordering dst r2 r1 ~invert:true - | ">=" -> compare_ordering dst r1 r2 ~invert:true - | "=" -> compare_eq_ne dst r1 r2 ~is_eq:true - | "<>" -> compare_eq_ne dst r1 r2 ~is_eq:false - | _ -> failwith ("unsupported binary operator: " ^ op) + | "+" -> Ok (add dst r1 r2 @ add_tag_items dst (-1)) + | "-" -> Ok (sub dst r1 r2 @ add_tag_items dst 1) + | "*" -> Ok (srli r1 r1 1 @ addi r2 r2 (-1) @ mul dst r1 r2 @ add_tag_items dst 1) + | "/" -> + Ok + (srli r1 r1 1 @ srli r2 r2 1 @ div dst r1 r2 @ add dst dst dst @ add_tag_items dst 1) + | "<" -> Ok (compare_ordering dst r1 r2 ~invert:false) + | ">" -> Ok (compare_ordering dst r2 r1 ~invert:false) + | "<=" -> Ok (compare_ordering dst r2 r1 ~invert:true) + | ">=" -> Ok (compare_ordering dst r1 r2 ~invert:true) + | "=" -> Ok (compare_eq_ne dst r1 r2 ~is_eq:true) + | "<>" -> Ok (compare_eq_ne dst r1 r2 ~is_eq:false) + | _ -> Error ("unsupported binary operator: " ^ op) ;; let bin_oper_to_string = function @@ -47,13 +60,27 @@ let bin_oper_to_string = function ;; let vars_in_caller_saved_regs env = - Map.to_alist env - |> List.filter_map ~f:(fun (name, loc) -> + Base.Map.to_alist env + |> List.filter_map (fun (name, loc) -> match loc with | Loc_reg r when is_caller_saved r -> Some (name, r) | _ -> None) ;; +let indices_of_args_to_spill state exps = + let is_rewrites_result_regs state = function + | ImmediateConst _ -> false + | ImmediateVar id -> Base.Map.mem state.arity_map id + in + List.rev + (snd + (List.fold_left + (fun (i, acc) arg -> + i + 1, if is_rewrites_result_regs state arg then i :: acc else acc) + (0, []) + exps)) +;; + type call_style = | Nullary of string | Curry_chain of @@ -79,8 +106,8 @@ let classify_call ~nargs ~callee_arity_opt ~fname ~args : call_style = Curry_chain { fname ; arity - ; first_args = List.take args arity - ; rest_args = List.drop args arity + ; first_args = Base.List.take args arity + ; rest_args = Base.List.drop args arity } | Some arity when nargs = arity -> Direct { fname; args } | _ -> Via_apply_nargs { fname; nargs; args } diff --git a/EML/lib/backend/ricsv/config.ml b/EML/lib/backend/ricsv/config.ml deleted file mode 100644 index 35187904..00000000 --- a/EML/lib/backend/ricsv/config.ml +++ /dev/null @@ -1,18 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -(** Primitives the generated code can call. *) - -type primitive = - { name : string - ; arity : int - } - -let primitive_arities : primitive list = - [ { name = "print_int"; arity = 1 } - ; { name = "print_endline"; arity = 1 } - ; { name = "alloc_closure"; arity = 2 } - ; { name = "eml_applyN"; arity = 3 } - ] -;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 7e60f65f..912586d9 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -2,14 +2,14 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open Base -open Stdlib.Format +open Format open Frontend.Ast -open Architecture.Riscv_backend +open Middleend.Anf +open Architecture open Analysis +open Riscv_backend open Generator_state open Auxillary -open Middleend.Anf let alloc_frame_slot = let modify_frame_offset f = @@ -37,12 +37,17 @@ let load_into_reg dst_reg loc = return () ;; -let caller_saved_locs env = - Map.to_alist env - |> List.filter_map ~f:(fun (name, loc) -> - match loc with - | Loc_reg r when is_caller_saved r -> Some (name, r) - | _ -> None) +(** Spill function parameters to the frame in param order (index 0 → first slot). + Ensures env maps each param name to a consistent slot so (self l) loads self, not l. *) +let spill_params_to_frame params_reg = + Base.List.foldi params_reg ~init:(return ()) ~f:(fun i acc p -> + let* () = acc in + match p with + | ImmediateVar name -> + let r = List.nth arg_regs i in + let* slot = store_reg_into_frame r in + modify_env (fun env -> Base.Map.set env ~key:name ~data:slot) + | _ -> return ()) ;; let spill_caller_saved_vars_to_frame = @@ -54,7 +59,7 @@ let spill_caller_saved_vars_to_frame = | [] -> return env | (name, r) :: rest -> let* new_loc = store_reg_into_frame r in - spill (Map.set env ~key:name ~data:new_loc) rest + spill (Base.Map.set env ~key:name ~data:new_loc) rest in let* new_env = spill env vars in set_env new_env @@ -62,12 +67,13 @@ let spill_caller_saved_vars_to_frame = let evacuate_reg dst = let is_reg_used env r = - Map.exists env ~f:(function + Base.Map.exists env ~f:(fun loc -> + match loc with | Loc_reg r' -> equal_reg r r' | Loc_mem _ -> false) in - let rewrite_loc_in_env env ~from_reg ~to_loc = - Map.map env ~f:(function + let rewrite_loc_in_env env from_reg to_loc = + Base.Map.map env ~f:(function | Loc_reg r when equal_reg r from_reg -> to_loc | loc -> loc) in @@ -75,70 +81,81 @@ let evacuate_reg dst = if not (is_reg_used env dst) then return () else ( - match List.find candidate_regs_for_spill ~f:(fun r -> not (is_reg_used env r)) with + match List.find_opt (fun r -> not (is_reg_used env r)) candidate_regs_for_spill with | Some new_reg -> let* () = append (mv new_reg dst) in - let new_env = rewrite_loc_in_env env ~from_reg:dst ~to_loc:(Loc_reg new_reg) in + let new_env = rewrite_loc_in_env env dst (Loc_reg new_reg) in set_env new_env | None -> let* new_loc = store_reg_into_frame dst in - let new_env = rewrite_loc_in_env env ~from_reg:dst ~to_loc:new_loc in + let new_env = rewrite_loc_in_env env dst new_loc in set_env new_env) ;; +let resolve_call_symbol name = + let* st = get in + match st.symbol_resolve st.current_func_index name with + | Some (asm, _) -> return asm + | None -> return name +;; + let gen_imm dst = function | ImmediateConst (ConstInt n) -> append (li dst (tag_int n)) | ImmediateConst (ConstBool b) -> append (li dst (if b then tag_int 1 else tag_int 0)) - | ImmediateConst (ConstChar c) -> append (li dst (tag_int (Char.to_int c))) + | ImmediateConst (ConstChar c) -> append (li dst (tag_int (Char.code c))) | ImmediateConst (ConstString _) -> fail "String constants not yet supported in codegen" | ImmediateVar name -> let* env = get_env in - (match Map.find env name with + (match Base.Map.find env name with | Some loc -> load_into_reg dst loc | None -> let* state = get in - (match Map.find state.arity_map name with - | Some 0 -> append (call name) - | Some arity -> - let* () = append (la result_reg name) in - let* () = append (li (List.nth_exn arg_regs 1) arity) in - append (call "alloc_closure") - | _ -> fail ("unbound variable: " ^ name))) + let sym, arity = + match state.symbol_resolve state.current_func_index name with + | Some (asm_name, a) -> asm_name, a + | None -> + (match Base.Map.find state.arity_map name with + | Some a -> name, a + | None -> name, -1) + in + if arity < 0 + then fail ("unbound variable: " ^ name) + else ( + match arity with + | 0 -> append (call sym) + | n -> + let* () = append (la result_reg sym) in + let* () = append (li (List.nth arg_regs 1) n) in + append (call "alloc_closure"))) ;; let copy_result_to dst = if equal_reg dst result_reg then return () else append (mv dst result_reg) ;; -let indices_of_args_to_spill state exps = - let is_rewrites_result_regs state = function - | ImmediateConst _ -> false - | ImmediateVar id -> Map.mem state.arity_map id - in - List.filter_mapi exps ~f:(fun i arg -> - if is_rewrites_result_regs state arg then Some i else None) -;; - let spill_dangerous_args state exps = let dangerous_idxs = indices_of_args_to_spill state exps in let spill_slots = List.length dangerous_idxs * word_size in let* () = if spill_slots > 0 then append (addi sp sp (-spill_slots)) else return () in - List.foldi exps ~init:(return Map.Poly.empty) ~f:(fun i acc arg -> - let* spilled = acc in - if List.mem dangerous_idxs i ~equal:Int.equal - then - let* () = gen_imm result_reg arg in - let* loc = store_reg_into_frame result_reg in - return (Map.set spilled ~key:i ~data:loc) - else return spilled) + Base.List.foldi + exps + ~init:(return (Base.Map.empty (module Base.Int))) + ~f:(fun i acc arg -> + let* spilled = acc in + if List.mem i dangerous_idxs + then + let* () = gen_imm result_reg arg in + let* loc = store_reg_into_frame result_reg in + return (Base.Map.add_exn spilled ~key:i ~data:loc) + else return spilled) ;; let load_exps_into_regs spilled_locs arg_regs exps = let n = min (List.length exps) (List.length arg_regs) in - List.foldi (List.take exps n) ~init:(return ()) ~f:(fun i acc arg -> + Base.List.foldi (Base.List.take exps n) ~init:(return ()) ~f:(fun i acc arg -> let* () = acc in - let reg = List.nth_exn arg_regs i in - match Map.find spilled_locs i with + let reg = List.nth arg_regs i in + match Base.Map.find spilled_locs i with | Some loc -> load_into_reg reg loc | None -> gen_imm reg arg) ;; @@ -151,7 +168,7 @@ let push_stack_args stack_args = let stack_bytes = n * word_size in let* () = append (addi sp sp (-stack_bytes)) in let* () = - List.foldi stack_args ~init:(return ()) ~f:(fun i acc arg -> + Base.List.foldi stack_args ~init:(return ()) ~f:(fun i acc arg -> let* () = acc in let offset = i * word_size in let* () = gen_imm t0 arg in @@ -162,22 +179,22 @@ let push_stack_args stack_args = let gen_call_with_regs dst regs args spilled symbol = let* () = load_exps_into_regs spilled regs args in - let stack_args = List.drop args (List.length regs) in + let stack_args = Base.List.drop args (List.length regs) in let* reserved = push_stack_args stack_args in let* () = append (call symbol) in let* () = copy_result_to dst in if reserved > 0 then append (addi sp sp reserved) else return () ;; -(* let foo = ... in - foo () *) let gen_nullary dst fname = - let* () = append (call fname) in + let* sym = resolve_call_symbol fname in + let* () = append (call sym) in copy_result_to dst ;; let gen_direct_call dst fname args spilled = - gen_call_with_regs dst arg_regs args spilled fname + let* sym = resolve_call_symbol fname in + gen_call_with_regs dst arg_regs args spilled sym ;; let gen_via_apply_nargs dst fname nargs args spilled = @@ -186,11 +203,11 @@ let gen_via_apply_nargs dst fname nargs args spilled = let* () = append (li a1 nargs) in let* () = append (addi sp sp (-argv_bytes)) in let* () = - List.foldi args ~init:(return ()) ~f:(fun i acc arg -> + Base.List.foldi args ~init:(return ()) ~f:(fun i acc arg -> let* () = acc in let offset = i * word_size in let src = - match Map.find spilled i with + match Base.Map.find spilled i with | Some loc -> load_into_reg t0 loc | None -> gen_imm t0 arg in @@ -208,7 +225,7 @@ let rec gen_invocation dst fname args = let* state = get in let* spilled = spill_dangerous_args state args in let nargs = List.length args in - let callee_arity_opt = Map.find state.arity_map fname in + let callee_arity_opt = Base.Map.find state.arity_map fname in let style = classify_call ~nargs ~callee_arity_opt ~fname ~args in match style with | Nullary name -> gen_nullary dst name @@ -219,21 +236,25 @@ let rec gen_invocation dst fname args = gen_via_apply_nargs dst fn n a spilled and gen_curried_call dst fname _arity first_args rest_args = - let fresh_partial_name = - let* id = fresh in - return (Printf.sprintf "part_%d" id) - in - let* part_name = fresh_partial_name in + let* part_name = fresh_partial in let* () = gen_cexpr dst - (ComplexApp (ImmediateVar fname, List.hd_exn first_args, List.tl_exn first_args)) + (ComplexApp (ImmediateVar fname, List.hd first_args, List.tl first_args)) in let* loc = store_reg_into_frame dst in - let* () = modify_env (Map.set ~key:part_name ~data:loc) in - gen_cexpr - dst - (ComplexApp (ImmediateVar part_name, List.hd_exn rest_args, List.tl_exn rest_args)) + let* () = modify_env (fun env -> Base.Map.set env ~key:part_name ~data:loc) in + (* Apply each rest_arg one at a time (eml_applyN expects one application per call) *) + let rec apply_rest = function + | [] -> return () + | [ arg ] -> gen_cexpr dst (ComplexApp (ImmediateVar part_name, arg, [])) + | arg :: rest -> + let* () = gen_cexpr dst (ComplexApp (ImmediateVar part_name, arg, [])) in + let* loc' = store_reg_into_frame dst in + let* () = modify_env (fun env -> Base.Map.set env ~key:part_name ~data:loc') in + apply_rest rest + in + apply_rest rest_args and gen_unit dst = append (li dst (tag_int 0)) @@ -250,16 +271,16 @@ and gen_binop dst op left right = let* () = gen_imm t0 left in let* () = gen_imm t1 right in let* () = evacuate_reg dst in - append (bin_op dst (bin_oper_to_string op) t0 t1) + match bin_op dst (bin_oper_to_string op) t0 t1 with + | Ok instrs -> append instrs + | Error msg -> fail msg and gen_branch dst cond then_e else_e = - let fresh_branch_labels = - let* id = fresh in - return (Printf.sprintf "else_%d" id, Printf.sprintf "end_%d" id) - in let* () = gen_imm t0 cond in - let* else_lbl, end_lbl = fresh_branch_labels in - let* () = append (beq t0 zero else_lbl) in + let* else_lbl, end_lbl = fresh_branch in + (* Branch to else when cond equals tagged false (1); not zero *) + let* () = append (li t1 (tag_int 0)) in + let* () = append (beq t0 t1 else_lbl) in let* st_before_then = get in let frame_before_then = st_before_then.frame_offset in let* () = gen_anf dst then_e in @@ -276,6 +297,18 @@ and gen_branch dst cond then_e else_e = let* () = gen_anf dst else_e in append (label end_lbl) +and gen_list dst = function + | [] -> append (li dst (tag_int 0)) + | hd :: tl -> + let* () = gen_list dst tl in + let* tail_loc = store_reg_into_frame dst in + let* () = gen_imm t0 hd in + let* () = spill_caller_saved_vars_to_frame in + let* () = append (li result_reg 2) in + let* () = load_into_reg (List.nth arg_regs 1) (Loc_reg t0) in + let* () = load_into_reg (List.nth arg_regs 2) tail_loc in + copy_result_to dst + and gen_app dst fname first rest = gen_invocation dst fname (first :: rest) and gen_cexpr dst = function @@ -293,23 +326,24 @@ and gen_cexpr dst = function and gen_anf dst = function | AnfExpr cexp -> gen_cexpr dst cexp | AnfLet (_, name, rhs, cont) -> + let* () = evacuate_reg result_reg in let* () = gen_cexpr result_reg rhs in let* loc = store_reg_into_frame result_reg in - let* () = modify_env (Map.set ~key:name ~data:loc) in + let* () = modify_env (fun env -> Base.Map.set env ~key:name ~data:loc) in gen_anf dst cont ;; let bind_param_to_reg env i = function | ImmediateVar name -> - let r = List.nth_exn arg_regs i in - return (Map.set env ~key:name ~data:(Loc_reg r)) + let r = List.nth arg_regs i in + return (Base.Map.set env ~key:name ~data:(Loc_reg r)) | _ -> fail "unsupported pattern" ;; let bind_param_to_stack env i = function | ImmediateVar name -> let off = (i + 2) * word_size in - return (Map.set env ~key:name ~data:(Loc_mem (fp, off))) + return (Base.Map.set env ~key:name ~data:(Loc_mem (fp, off))) | _ -> fail "unsupported pattern" ;; @@ -321,59 +355,69 @@ let flush_instr_buffer ppf = let clear_instr_buffer = modify (fun st -> { st with instr_buffer = [] }) in let* buf = get_instr_buffer in let* () = clear_instr_buffer in - let () = List.iter (List.rev buf) ~f:(fun item -> format_item ppf item) in + let () = List.iter (fun item -> format_item ppf item) (List.rev buf) in return () ;; -let gen_func func_name params body frame_sz ppf = - fprintf ppf "\n .globl %s\n .type %s, @function\n" func_name func_name; +let gen_func asm_name params body frame_sz ppf = + fprintf ppf "\n .globl %s\n .type %s, @function\n" asm_name asm_name; let args = List.length params in - let params_reg, params_stack = List.split_n params (min args arg_regs_count) in - let env0 = Map.empty (module String) in + let params_reg, params_stack = + ( Base.List.take params (min args arg_regs_count) + , Base.List.drop params (min args arg_regs_count) ) + in + let env0 = Base.Map.empty (module Base.String) in let* env = - List.foldi params_reg ~init:(return env0) ~f:(fun i acc p -> + Base.List.foldi params_reg ~init:(return env0) ~f:(fun i acc p -> let* e = acc in bind_param_to_reg e i p) in let* env = - List.foldi params_stack ~init:(return env) ~f:(fun i acc p -> + Base.List.foldi params_stack ~init:(return env) ~f:(fun i acc p -> let* e = acc in bind_param_to_stack e i p) in let* () = set_env env in - let* () = append (prologue ~name:func_name ~stack_size:frame_sz) in + let* () = append (prologue ~name:asm_name ~stack_size:frame_sz) in let* st = get in let* () = put { st with frame_offset = 0 } in + let* () = spill_params_to_frame params_reg in let* () = gen_anf result_reg body in - let* () = append (epilogue ~is_main:(String.equal func_name "main")) in + let* () = append (epilogue ~is_main:(String.equal asm_name "main")) in let* () = flush_instr_buffer ppf in return () ;; let gen_program ppf (analysis : analysis_result) = fprintf ppf ".section .text"; - let base = Config.primitive_arities in + let base = Frontend.Builtins.all_runtime_prims in let arity_map = - List.fold base ~init:analysis.arity_map ~f:(fun map prim -> - Map.set map ~key:prim.name ~data:prim.arity) + List.fold_left + (fun map { Frontend.Builtins.name; arity } -> + Base.Map.set map ~key:name ~data:arity) + analysis.arity_map + base in let init = { frame_offset = 0 - ; fresh_id = 0 + ; naming_state = Default_naming.init ; arity_map - ; env = Map.empty (module String) + ; env = Base.Map.empty (module Base.String) ; instr_buffer = [] + ; current_func_index = 0 + ; symbol_resolve = analysis.resolve } in let comp = - List.fold analysis.functions ~init:(return ()) ~f:(fun acc fn -> + Base.List.foldi analysis.functions ~init:(return ()) ~f:(fun i acc fn -> let frame_sz = (2 + fn.slots_count) * word_size in let* () = acc in - gen_func fn.func_name fn.params fn.body frame_sz ppf) + let* () = modify (fun st -> { st with current_func_index = i }) in + gen_func fn.asm_name fn.params fn.body frame_sz ppf) in match run comp init with - | Ok ((), _) -> pp_print_flush ppf () - | Error msg -> - Stdlib.Format.eprintf "Codegen error: %s\n%!" msg; - Stdlib.exit 1 -;; \ No newline at end of file + | Ok ((), _) -> + pp_print_flush ppf (); + Ok () + | Error msg -> Error msg +;; diff --git a/EML/lib/backend/ricsv/generator_state.ml b/EML/lib/backend/ricsv/generator_state.ml index 19b4f14f..93d65b6a 100644 --- a/EML/lib/backend/ricsv/generator_state.ml +++ b/EML/lib/backend/ricsv/generator_state.ml @@ -2,66 +2,92 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open Base -open Frontend.Ast -open Architecture.Riscv_backend - -type env = (ident, location, String.comparator_witness) Map.t - -type state = - { frame_offset : int - ; fresh_id : int - ; arity_map : (ident, int, String.comparator_witness) Map.t - ; env : env - ; instr_buffer : instr list - } - -type 'a t = state -> ('a * state, string) Result.t - -let return x st = Ok (x, st) -let fail e = fun _ -> Error e - -let bind m f = - fun state -> - match m state with - | Ok (x, st') -> f x st' - | Error e -> Error e -;; - -let ( let* ) = bind -let get st = Ok (st, st) -let put st = fun _ -> Ok ((), st) - -let modify f = - let* st = get in - put (f st) -;; - -let modify_env f = modify (fun st -> { st with env = f st.env }) - -let get_env = - let* st = get in - return st.env -;; - -let set_env env = modify (fun st -> { st with env }) - -let fresh = - let modify_fresh_id f = modify (fun st -> { st with fresh_id = f st.fresh_id }) in - let* st = get in - let* () = modify_fresh_id Int.succ in - return st.fresh_id -;; - -let run m init = m init - -let append (items : instr list) = - let modify_instr_buffer f = - modify (fun st -> { st with instr_buffer = f st.instr_buffer }) - in - if List.is_empty items - then return () - else - modify_instr_buffer (fun l -> - List.fold_left items ~init:l ~f:(fun acc it -> it :: acc)) -;; +open Architecture +open Riscv_backend + +module type NAMING = sig + type t + + val init : t + val fresh_partial : t -> string * t + val fresh_branch : t -> (string * string) * t +end + +module Default_naming : NAMING = struct + type t = int + + let init = 0 + let fresh_partial n = "part_" ^ string_of_int n, n + 1 + let fresh_branch n = ("else_" ^ string_of_int n, "end_" ^ string_of_int n), n + 1 +end + +module Make (N : NAMING) = struct + type env = (string, location, Base.String.comparator_witness) Base.Map.t + + type state = + { frame_offset : int + ; naming_state : N.t + ; arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; env : env + ; instr_buffer : instr list + ; current_func_index : int + ; symbol_resolve : int -> string -> (string * int) option + } + + type 'a t = state -> ('a * state, string) Result.t + + let return x st = Ok (x, st) + let fail e = fun _ -> Error e + + let bind m f = + fun state -> + match m state with + | Ok (x, st') -> f x st' + | Error e -> Error e + ;; + + let ( let* ) = bind + let get st = Ok (st, st) + let put st = fun _ -> Ok ((), st) + + let modify f = + let* st = get in + put (f st) + ;; + + let modify_env f = modify (fun st -> { st with env = f st.env }) + + let get_env = + let* st = get in + return st.env + ;; + + let set_env env = modify (fun st -> { st with env }) + + let fresh_partial = + let* st = get in + let name, next = N.fresh_partial st.naming_state in + let* () = put { st with naming_state = next } in + return name + ;; + + let fresh_branch = + let* st = get in + let pair, next = N.fresh_branch st.naming_state in + let* () = put { st with naming_state = next } in + return pair + ;; + + let run m init = m init + + let append (items : instr list) = + let modify_instr_buffer f = + modify (fun st -> { st with instr_buffer = f st.instr_buffer }) + in + if items = [] + then return () + else modify_instr_buffer (fun l -> List.fold_left (fun acc it -> it :: acc) l items) + ;; +end + +include Make (Default_naming) diff --git a/EML/lib/backend/ricsv/runner.ml b/EML/lib/backend/ricsv/runner.ml index 9b5907c0..752784b3 100644 --- a/EML/lib/backend/ricsv/runner.ml +++ b/EML/lib/backend/ricsv/runner.ml @@ -5,7 +5,7 @@ open Middleend.Anf open Analysis -let gen_program ppf (program : anf_program) = +let gen_program ~enable_gc ppf (program : anf_program) = let analysis = analyze program in - Generator.gen_program ppf analysis + Generator.gen_program ~enable_gc ppf analysis ;; diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml index a7388e64..5806081a 100644 --- a/EML/lib/frontend/ast.ml +++ b/EML/lib/frontend/ast.ml @@ -89,6 +89,7 @@ type structure = type program = structure list [@@deriving show { with_path = false }] let bin_op_list = [ "*"; "/"; "+"; "-"; "^"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] +let unary_op_list = [ "~-" ] let is_bin_op op = List.mem op bin_op_list let is_operator opr = List.exists (fun s -> String.equal s opr) bin_op_list let is_unary_minus op = op = "~-" diff --git a/EML/lib/frontend/binutils.ml b/EML/lib/frontend/binutils.ml new file mode 100644 index 00000000..69d44eb5 --- /dev/null +++ b/EML/lib/frontend/binutils.ml @@ -0,0 +1,19 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(** Built-in names and arities. Single place to change stdlib/runtime for + closure conversion, backend, etc. *) + +type primitive = + { name : string + ; arity : int + } + +let all_runtime_prims : primitive list = + [ { name = "print_int"; arity = 1 }; { name = "print_endline"; arity = 1 } ] +;; + +let builtin_global_names = + List.map (fun p -> p.name) all_runtime_prims @ Ast.unary_op_list @ Ast.bin_op_list +;; From edf24c4297f27012a73c897d632d88931886362f Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sat, 7 Mar 2026 19:05:25 +0300 Subject: [PATCH 31/74] feat entry point Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 108 ++++++++++++++++++++++++++++++++++ EML/bin/LL.ml | 94 ----------------------------- EML/bin/dune | 23 +------- EML/bin/fact | 4 -- EML/bin/main.ml | 53 ----------------- EML/bin/run.t | 0 EML/lib/frontend/binutils.mli | 11 ++++ EML/lib/frontend/runner.ml | 29 +++++++++ EML/lib/frontend/runner.mli | 18 ++++++ 9 files changed, 168 insertions(+), 172 deletions(-) create mode 100644 EML/bin/EML.ml delete mode 100644 EML/bin/LL.ml delete mode 100644 EML/bin/fact delete mode 100644 EML/bin/main.ml delete mode 100644 EML/bin/run.t create mode 100644 EML/lib/frontend/binutils.mli create mode 100644 EML/lib/frontend/runner.ml create mode 100644 EML/lib/frontend/runner.mli diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml new file mode 100644 index 00000000..3c7bbff4 --- /dev/null +++ b/EML/bin/EML.ml @@ -0,0 +1,108 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Stdio +open EML_lib +open Frontend + +type backend = Ricsv + +type opts = + { input_file : string option + ; output_file : string option + } + +let default_opts = { input_file = None; output_file = None } + +type env = Inferencer.TypeEnv.t + +let report_parse_error oc s = + Out_channel.output_string oc (Format.asprintf "Parsing error: %s\n" s) +;; + +let report_infer_error oc e = + Out_channel.output_string + oc + (Format.asprintf "Inferencer error: %a\n" Inferencer.pp_error e) +;; + +let with_frontend text env oc f_success : (env, unit) Result.t = + match Frontend.Runner.run text env with + | Error (Frontend.Runner.Parse s) -> + report_parse_error oc s; + Error () + | Error (Frontend.Runner.Infer e) -> + report_infer_error oc e; + Error () + | Ok (ast, env', out_list) -> f_success ast env' out_list +;; + +let with_middleend ast _env' f : (env, unit) Result.t = + match Middleend.Anf.anf_program ast with + | Error _ -> Error () + | Ok anf_ast -> f anf_ast +;; + +let run_compile text env oc : (env, unit) Result.t = + with_frontend text env oc (fun ast env' _out_list -> + with_middleend ast env' (fun anf_ast -> + let ppf = Format.formatter_of_out_channel oc in + let res = Backend.Ricsv.Runner.gen_program ppf anf_ast in + match res with + | Ok () -> Ok env' + | Error msg -> + Format.eprintf "Codegen error: %s\n%!" msg; + Error ())) +;; + +(* ------------------------------------------------------------------------- *) +(* Compiler entry point *) +(* ------------------------------------------------------------------------- *) + +let compiler opts : (unit, unit) Result.t = + let run text env oc = run_compile text env oc in + let env0 = Inferencer.TypeEnv.env_with_print_funs_and_gc in + let with_output f = + match opts.output_file with + | Some path -> Out_channel.with_file path ~f + | None -> f Out_channel.stdout + in + let input = + match opts.input_file with + | Some path -> In_channel.read_all path |> String.trim + | None -> In_channel.input_all stdin |> String.trim + in + match with_output (fun oc -> run input env0 oc) with + | Ok _env -> Ok () + | Error () -> Error () +;; + +(* ------------------------------------------------------------------------- *) +(* CLI *) +(* ------------------------------------------------------------------------- *) + +let parse_args () : (opts, unit) Result.t = + let input_file = ref default_opts.input_file in + let output_file = ref default_opts.output_file in + let positional_seen = ref false in + let open Arg in + let spec = + [ "-fromfile", String (fun s -> input_file := Some s), " Read source from file" + ; "-o", String (fun s -> output_file := Some s), " Write output to file" + ] + in + parse spec (fun _ -> positional_seen := true) "Compiler for custom language"; + if !positional_seen then Error () else compiler +;; + +let () = + match parse_args () with + | Error () -> + Format.eprintf "Positional arguments are not supported\n"; + exit 1 + | Ok opts -> + (match compiler opts with + | Error () -> exit 1 + | Ok () -> ()) +;; diff --git a/EML/bin/LL.ml b/EML/bin/LL.ml deleted file mode 100644 index 53331175..00000000 --- a/EML/bin/LL.ml +++ /dev/null @@ -1,94 +0,0 @@ -[@@@ocaml.text "/*"] - -(** Copyright 2023-2024, Kakadu and contributors *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -[@@@ocaml.text "/*"] - -open Llvm -open Printf - -module type S = sig - val context : Llvm.llcontext - val module_ : Llvm.llmodule - val builder : Llvm.llbuilder - val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue - val build_call : lltype -> ?name:string -> llvalue -> llvalue list -> llvalue - val lookup_func_exn : string -> llvalue - val has_toplevel_func : string -> bool - val build_add : ?name:string -> llvalue -> llvalue -> llvalue - val build_sub : ?name:string -> llvalue -> llvalue -> llvalue - val build_mul : ?name:string -> llvalue -> llvalue -> llvalue - val build_sdiv : ?name:string -> llvalue -> llvalue -> llvalue [@@inline] - val build_icmp : ?name:string -> Icmp.t -> llvalue -> llvalue -> llvalue - - (** [set_metadata v kind fmt] sets metadata to value [v] of kind [k]. - Returns this value [v]. Useful for attaching debugging *) - val set_metadata - : llvalue - -> string - -> ('a, Format.formatter, unit, llvalue) format4 - -> 'a - - (* ?? *) - - val build_ptrtoint : ?name:string -> llvalue -> lltype -> llvalue - val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue - val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue - - (** Just aliases *) - - val const_int : Llvm.lltype -> int -> Llvm.llvalue - val params : Llvm.llvalue -> Llvm.llvalue array - val pp_value : Format.formatter -> llvalue -> unit -end - -let make context builder module_ = - let module L : S = struct - let context = context - let builder = builder - let module_ = module_ - let build_store a b = Llvm.build_store a b builder - - let build_call typ ?(name = "") f args = - build_call typ f (Array.of_list args) name builder - ;; - - let has_toplevel_func fname = - match lookup_function fname module_ with - | Some _ -> true - | None -> false - ;; - - let lookup_func_exn fname = - match lookup_function fname module_ with - | Some f -> f - | None -> failwith (sprintf "Function '%s' not found" fname) - ;; - - let build_add ?(name = "") l r = build_add l r name builder - let build_sub ?(name = "") l r = build_sub l r name builder - let build_mul ?(name = "") l r = build_mul l r name builder - let build_sdiv ?(name = "") l r = build_sdiv l r name builder - let build_icmp ?(name = "") op l r = build_icmp op l r name builder - let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder - let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder - let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder - - let set_metadata v kind fmt = - Format.kasprintf - (fun s -> - Llvm.set_metadata v (Llvm.mdkind_id context kind) (Llvm.mdstring context s); - v) - fmt - ;; - - (* Aliases *) - let const_int = Llvm.const_int - let params = Llvm.params - let pp_value ppf x = Format.fprintf ppf "%s" (Llvm.string_of_llvalue x) - end - in - (module L : S) -;; diff --git a/EML/bin/dune b/EML/bin/dune index 57cdf378..1d0e46b7 100644 --- a/EML/bin/dune +++ b/EML/bin/dune @@ -1,22 +1,3 @@ -(library - (name LL) - (public_name EML.LL) - (modules LL) - (wrapped false) - (libraries - llvm - llvm.analysis - ; - )) - (executable - (public_name EML) - (name main) - (modules main) - (libraries LL EML_lib) - (instrumentation - (backend bisect_ppx))) - -(cram - (package EML) - (deps ./main.exe)) + (name EML) + (libraries stdio base EML.lib)) diff --git a/EML/bin/fact b/EML/bin/fact deleted file mode 100644 index dfe17f61..00000000 --- a/EML/bin/fact +++ /dev/null @@ -1,4 +0,0 @@ -let rec factorial n = - if n <= 1 then 1 else n * factorial (n - 1) -in -factorial 5 \ No newline at end of file diff --git a/EML/bin/main.ml b/EML/bin/main.ml deleted file mode 100644 index c52db13f..00000000 --- a/EML/bin/main.ml +++ /dev/null @@ -1,53 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -open EML_lib.Frontend.Parser -open EML_lib.Middleend.Anf -open EML_lib.Backend.Ricsv.Runner - -let compile_to_asm src out_ppf = - match parse src with - | Error e -> - Printf.eprintf "Parse error: %s\n%!" e; - exit 1 - | Ok ast -> - (match anf_program ast with - | Error e -> - Printf.eprintf "ANF error: %s\n%!" e; - exit 1 - | Ok anf -> gen_program out_ppf anf) -;; - -let () = - let args = Sys.argv in - let nargs = Array.length args in - (* Parse command line: [EML.exe] [-fromfile ] [-o ] *) - let src_file = ref None in - let out_file = ref None in - let i = ref 1 in - while !i < nargs do - (match args.(!i) with - | "-fromfile" -> - incr i; - if !i < nargs then src_file := Some args.(!i) - | "-o" -> - incr i; - if !i < nargs then out_file := Some args.(!i) - | _ -> ()); - incr i - done; - let src = - match !src_file with - | Some path -> In_channel.(with_open_text path input_all) - | None -> In_channel.input_all stdin - in - match !out_file with - | None -> compile_to_asm src Format.std_formatter - | Some path -> - let oc = open_out path in - let ppf = Format.formatter_of_out_channel oc in - compile_to_asm src ppf; - Format.pp_print_flush ppf (); - close_out oc -;; diff --git a/EML/bin/run.t b/EML/bin/run.t deleted file mode 100644 index e69de29b..00000000 diff --git a/EML/lib/frontend/binutils.mli b/EML/lib/frontend/binutils.mli new file mode 100644 index 00000000..80aee0a7 --- /dev/null +++ b/EML/lib/frontend/binutils.mli @@ -0,0 +1,11 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type primitive = + { name : string + ; arity : int + } + +val all_runtime_prims : bool -> primitive list +val builtin_global_names : string list diff --git a/EML/lib/frontend/runner.ml b/EML/lib/frontend/runner.ml new file mode 100644 index 00000000..7e3ba283 --- /dev/null +++ b/EML/lib/frontend/runner.ml @@ -0,0 +1,29 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Format +open Inferencer + +type error = + | Parse of string + | Infer of Inferencer.error + +let pp_error ppf = function + | Parse s -> fprintf ppf "Parse error: %s" s + | Infer e -> fprintf ppf "Inference error: %a" Inferencer.pp_error e +;; + +let parse (text : string) : (program, string) Result.t = Parser.parse text + +let run (text : string) (env : TypeEnv.t) + : (program * TypeEnv.t * (ident option * ty) list, error) Result.t + = + match Parser.parse text with + | Error s -> Error (Parse s) + | Ok ast -> + (match Inferencer.ResultMonad.run (infer_structure env ast) with + | Error e -> Error (Infer e) + | Ok (env', out_list) -> Ok (ast, env', out_list)) +;; diff --git a/EML/lib/frontend/runner.mli b/EML/lib/frontend/runner.mli new file mode 100644 index 00000000..42948926 --- /dev/null +++ b/EML/lib/frontend/runner.mli @@ -0,0 +1,18 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Inferencer + +type error = + | Parse of string + | Infer of Inferencer.error + +val pp_error : Format.formatter -> error -> unit +val parse : string -> (program, string) Result.t + +val run + : string + -> TypeEnv.t + -> (program * TypeEnv.t * (ident option * ty) list, error) Result.t From e2e5cb3e04343f64e1f91087e554f865fbba1f10 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sat, 7 Mar 2026 19:15:40 +0300 Subject: [PATCH 32/74] add ll/cc Signed-off-by: Victoria Ostrovskaya --- EML/lib/middleend/cc.ml | 330 ++++++++++++++++++++++++++++++++ EML/lib/middleend/ll.ml | 407 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 737 insertions(+) create mode 100644 EML/lib/middleend/cc.ml create mode 100644 EML/lib/middleend/ll.ml diff --git a/EML/lib/middleend/cc.ml b/EML/lib/middleend/cc.ml new file mode 100644 index 00000000..50dd770d --- /dev/null +++ b/EML/lib/middleend/cc.ml @@ -0,0 +1,330 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Frontend.Ast +open Frontend.Binutils +module VarSet = Set.Make (String) +module EnvMap = Map.Make (String) + +let union_map_list f list = + List.fold_left (fun acc x -> VarSet.union acc (f x)) VarSet.empty list +;; + +let var_set_of_list lst = List.fold_left (fun s x -> VarSet.add x s) VarSet.empty lst + +let vars_in_pattern p = + let rec walk = function + | PatAny -> VarSet.empty + | PatVariable x -> VarSet.singleton x + | PatConst _ -> VarSet.empty + | PatConstruct (_, None) -> VarSet.empty + | PatConstruct (_, Some q) -> walk q + | PatType (q, _) -> walk q + | PatUnit | PatList _ | PatOption _ -> VarSet.empty + in + walk p +;; + +let rec collect_free_vars = function + | ExpIdent v -> VarSet.singleton v + | ExpConst _ -> VarSet.empty + | ExpLet (flag, (pat, exp), binds, body) -> + let all_binds = (pat, exp) :: binds in + let bound_vars = union_map_list (fun (p, _) -> vars_in_pattern p) all_binds in + let free_vars_in_rhs = + match flag with + | Rec -> + union_map_list + (fun (_, e) -> VarSet.diff (collect_free_vars e) bound_vars) + all_binds + | NonRec -> union_map_list (fun (_, e) -> collect_free_vars e) all_binds + in + VarSet.union free_vars_in_rhs (VarSet.diff (collect_free_vars body) bound_vars) + | ExpLambda (pat, pats, exp) -> + let bound_vars = union_map_list (fun p -> vars_in_pattern p) (pat :: pats) in + VarSet.diff (collect_free_vars exp) bound_vars + | ExpApply (e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) + | ExpFunction ((pat, exp), cases) -> + let one (p, e) = VarSet.diff (collect_free_vars e) (vars_in_pattern p) in + union_map_list one ((pat, exp) :: cases) + | ExpMatch (e, (pat, branch), cases) -> + let one (p, e) = VarSet.diff (collect_free_vars e) (vars_in_pattern p) in + let in_branches = union_map_list one ((pat, branch) :: cases) in + VarSet.union (collect_free_vars e) in_branches + | ExpBranch (cond, then_e, else_opt) -> + union_map_list + collect_free_vars + (cond + :: then_e + :: + (match else_opt with + | None -> [] + | Some e -> [ e ])) + | ExpConstruct (_, None) -> VarSet.empty + | ExpConstruct (_, Some e) -> collect_free_vars e + | ExpTypeAnnotation (e, _) -> collect_free_vars e + | ExpBinOper (_, e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) + | ExpUnarOper (_, e) -> collect_free_vars e + | ExpList es -> union_map_list collect_free_vars es + | ExpOption e_opt -> + (match e_opt with + | None -> VarSet.empty + | Some e -> collect_free_vars e) +;; + +type context = + { globals : VarSet.t + ; env : VarSet.t EnvMap.t + } + +let with_globals ctx g = { ctx with globals = g } +let with_env ctx e = { ctx with env = e } + +type error = LambdaWithoutParameters + +let pp_error ppf = function + | LambdaWithoutParameters -> fprintf ppf "closure_conversion: lambda without parameters" +;; + +type 'a t = context -> ('a, error) Result.t + +let return x = fun _ -> Ok x +let fail e = fun _ -> Error e + +let bind m f = + fun ctx -> + match m ctx with + | Ok a -> f a ctx + | Error e -> Error e +;; + +let ask = fun ctx -> Ok ctx +let local f m = fun ctx -> m (f ctx) +let run ctx m = m ctx +let ( let* ) = bind + +let of_result = function + | Ok x -> return x + | Error e -> fail e +;; + +let extend_capture_env env pat captured_set = + let rec add_captures_for_pat acc = function + | PatAny | PatConst _ | PatConstruct (_, None) -> acc + | PatVariable name -> EnvMap.add name captured_set acc + | PatConstruct (_, Some p) | PatType (p, _) -> add_captures_for_pat acc p + | PatUnit | PatList _ | PatOption _ -> acc + in + add_captures_for_pat env pat +;; + +let rec build_closure ~apply param_list body_ast captured_ids = + let* body_ast' = convert_expr body_ast in + let make_lam first rest = ExpLambda (first, rest, body_ast') in + match param_list with + | [] -> fail LambdaWithoutParameters + | first :: rest_params -> + if VarSet.is_empty captured_ids + then return (make_lam first rest_params) + else ( + let captured_list = VarSet.elements captured_ids in + let all_params = + List.map (fun id -> PatVariable id) captured_list @ (first :: rest_params) + in + let lam = make_lam (List.hd all_params) (List.tl all_params) in + return + (if apply + then List.fold_left (fun t id -> ExpApply (t, ExpIdent id)) lam captured_list + else lam)) + +and convert_expr = function + | ExpIdent id -> + let* current_ctx = ask in + return + (try + let env_fvs = EnvMap.find id current_ctx.env in + List.fold_left + (fun t fv -> ExpApply (t, ExpIdent fv)) + (ExpIdent id) + (VarSet.elements env_fvs) + with + | Not_found -> ExpIdent id) + | ExpConst c -> return (ExpConst c) + | ExpLet (flag, (pat, exp), more, body) -> + let* (pat', exp'), rest_binds, body_ctx = convert_let_bindings flag (pat, exp) more in + let* body' = local (fun _ -> body_ctx) (convert_expr body) in + return (ExpLet (flag, (pat', exp'), rest_binds, body')) + | ExpLambda (pat, pats, body) as lam -> + let* current_ctx = ask in + let param_list = pat :: pats in + let captured = VarSet.diff (collect_free_vars lam) current_ctx.globals in + build_closure ~apply:true param_list body captured + | ExpApply (f, arg) -> + let* f' = convert_expr f in + let* arg' = convert_expr arg in + return (ExpApply (f', arg')) + | ExpFunction ((pat, exp), cases) -> + let* first_exp = convert_expr exp in + let* rest_cases = + List.fold_right + (fun (p, e) acc -> + let* e' = convert_expr e in + let* rest = acc in + return ((p, e') :: rest)) + cases + (return []) + in + return (ExpFunction ((pat, first_exp), rest_cases)) + | ExpMatch (e, (pat, branch), cases) -> + let* scrutinee' = convert_expr e in + let* branch' = convert_expr branch in + let* rest_cases = + List.fold_right + (fun (p, e) acc -> + let* e' = convert_expr e in + let* rest = acc in + return ((p, e') :: rest)) + cases + (return []) + in + return (ExpMatch (scrutinee', (pat, branch'), rest_cases)) + | ExpBranch (cond, then_e, else_opt) -> + let* cond' = convert_expr cond in + let* then_e' = convert_expr then_e in + let* else_e' = + match else_opt with + | None -> return None + | Some e -> + let* e' = convert_expr e in + return (Some e') + in + return (ExpBranch (cond', then_e', else_e')) + | ExpConstruct (_, None) as e -> return e + | ExpConstruct (tag, Some e) -> + let* e' = convert_expr e in + return (ExpConstruct (tag, Some e')) + | ExpTypeAnnotation (e, typ) -> + let* e' = convert_expr e in + return (ExpTypeAnnotation (e', typ)) + | ExpBinOper (op, e1, e2) -> + let* e1' = convert_expr e1 in + let* e2' = convert_expr e2 in + return (ExpBinOper (op, e1', e2')) + | ExpUnarOper (op, e) -> + let* e' = convert_expr e in + return (ExpUnarOper (op, e')) + | ExpList es -> + let* es' = + List.fold_right + (fun e acc -> + let* e' = convert_expr e in + let* acc' = acc in + return (e' :: acc')) + es + (return []) + in + return (ExpList es') + | ExpOption e_opt -> + (match e_opt with + | None -> return (ExpOption None) + | Some e -> + let* e' = convert_expr e in + return (ExpOption (Some e'))) + +and convert_let_bindings rec_flag (pat, exp) rest_binds = + let* current_ctx = ask in + let bind_group = (pat, exp) :: rest_binds in + let bound_ids = union_map_list (fun (p, _) -> vars_in_pattern p) bind_group in + match rec_flag with + | Rec -> + let globals' = VarSet.union current_ctx.globals bound_ids in + let group_captured = + union_map_list (fun (_, e) -> VarSet.diff (collect_free_vars e) globals') bind_group + in + let env' = + List.fold_left + (fun acc (p, _) -> extend_capture_env acc p group_captured) + current_ctx.env + bind_group + in + let rec_group_ctx = with_env (with_globals current_ctx globals') env' in + let rec loop acc = function + | [] -> return (List.rev acc) + | (p, e) :: rest -> + let fvs = VarSet.diff group_captured (vars_in_pattern p) in + let res = + match e with + | ExpLambda (lam_pat, lam_pats, body) -> + run rec_group_ctx (build_closure ~apply:false (lam_pat :: lam_pats) body fvs) + | _ -> run rec_group_ctx (convert_expr e) + in + let* e' = of_result res in + loop ((p, e') :: acc) rest + in + let* transformed_binds = loop [] bind_group in + return (List.hd transformed_binds, List.tl transformed_binds, rec_group_ctx) + | NonRec -> + let rec loop env_acc rev_binds = function + | [] -> + let transformed_binds = List.rev rev_binds in + return + ( List.hd transformed_binds + , List.tl transformed_binds + , with_env current_ctx env_acc ) + | (p, e) :: rest -> + let captured = VarSet.diff (collect_free_vars e) current_ctx.globals in + let ctx_with_env = { current_ctx with env = env_acc } in + let res = + match e with + | ExpLambda (lam_pat, lam_pats, body) -> + run + ctx_with_env + (build_closure ~apply:false (lam_pat :: lam_pats) body captured) + | _ -> run ctx_with_env (convert_expr e) + in + let* e' = of_result res in + let env_next = + match e with + | ExpLambda _ -> extend_capture_env env_acc p captured + | _ -> env_acc + in + loop env_next ((p, e') :: rev_binds) rest + in + loop current_ctx.env [] bind_group +;; + +let convert_item = function + | SEval expr -> + let* e' = convert_expr expr in + let* current_ctx = ask in + return (current_ctx.globals, SEval e') + | SValue (rec_flag, (pat, expr), and_binds) -> + let* (pat', expr'), rest_binds, after_binds_ctx = + convert_let_bindings rec_flag (pat, expr) and_binds + in + let bound_ids = + union_map_list (fun (p, _) -> vars_in_pattern p) ((pat, expr) :: and_binds) + in + return + ( VarSet.union after_binds_ctx.globals bound_ids + , SValue (rec_flag, (pat', expr'), rest_binds) ) +;; + +let builtin_globals = var_set_of_list builtin_global_names +let initial_context = { globals = builtin_globals; env = EnvMap.empty } + +let closure_conversion_result (program : Frontend.Ast.program) + : (structure list, error) Result.t + = + let rec convert_items rev_acc item_ctx = function + | [] -> Ok (List.rev rev_acc) + | item :: tail -> + (match run item_ctx (convert_item item) with + | Ok (globals', item') -> + convert_items (item' :: rev_acc) { globals = globals'; env = EnvMap.empty } tail + | Error e -> Error e) + in + convert_items [] initial_context program +;; diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml new file mode 100644 index 00000000..0ceb8eeb --- /dev/null +++ b/EML/lib/middleend/ll.ml @@ -0,0 +1,407 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +module StringSet = Set.Make (String) +module Map = Map.Make (String) + +module type NAMING = sig + type t + + val fresh : t -> string * t + val init : t +end + +module Default_naming : NAMING = struct + type t = int + + let init = 0 + + let fresh n = + let s = "lifted_" ^ Int.to_string n in + s, n + 1 + ;; +end + +type lift_result = + { structures : structure list + ; expr : expr + } + +let names_in_pattern p = + let rec collect = function + | PatAny -> [] + | PatVariable s -> [ s ] + | PatConst _ -> [] + | PatConstruct (_, None) -> [] + | PatConstruct (_, Some q) -> collect q + | PatType (q, _) -> collect q + | PatUnit -> [] + | PatList ps -> List.concat (List.map collect ps) + | PatOption p_opt -> + (match p_opt with + | None -> [] + | Some x -> collect x) + (* | PatType _ -> ... *) + in + collect p +;; + +let rename_pattern env p = + let rec subst = function + | PatVariable s -> + let s' = + try Map.find s env with + | Not_found -> s + in + PatVariable s' + | PatConstruct (id, p_opt) -> PatConstruct (id, Option.map (fun x -> subst x) p_opt) + | PatType (p, t) -> PatType (subst p, t) + | PatList ps -> PatList (List.map subst ps) + | PatOption p_opt -> PatOption (Option.map (fun x -> subst x) p_opt) + | other -> other + in + subst p +;; + +let unique_names_in_bind_group binds = + let add_if_new (rev_list, set) id = + if StringSet.mem id set then rev_list, set else id :: rev_list, StringSet.add id set + in + let rev_list, _seen = + List.fold_left + (fun (rev_list, set) (p, _) -> + List.fold_left add_if_new (rev_list, set) (names_in_pattern p)) + ([], StringSet.empty) + binds + in + List.rev rev_list +;; + +type error = + | RecLetEmptyBinding + | SValueEmptyBinding + +let pp_error ppf = function + | RecLetEmptyBinding -> + Format.fprintf ppf "lambda_lifting: Rec let must have at least one binding" + | SValueEmptyBinding -> + Format.fprintf ppf "lambda_lifting: SValue must have at least one binding" +;; + +module Make (N : NAMING) = struct + type 'a t = N.t -> ('a * N.t, error) Result.t + + let return (x : 'a) : 'a t = fun st -> Ok (x, st) + let fail (e : error) : _ t = fun _ -> Error e + + let bind (m : 'a t) (f : 'a -> 'b t) : 'b t = + fun st -> + match m st with + | Ok (x, st') -> f x st' + | Error e -> Error e + ;; + + let ( let* ) = bind + + let take_names k : string list t = + fun st -> + let rec loop acc st' i = + if i <= 0 + then Ok (List.rev acc, st') + else ( + let name, st'' = N.fresh st' in + loop (name :: acc) st'' (i - 1)) + in + loop [] st k + ;; + + let map2 (m1 : 'a t) (m2 : 'b t) (f : 'a -> 'b -> 'c) : 'c t = + fun st -> + match m1 st with + | Error e -> Error e + | Ok (x1, st1) -> + (match m2 st1 with + | Error e -> Error e + | Ok (x2, st2) -> Ok (f x1 x2, st2)) + ;; + + let pair (m1 : lift_result t) (m2 : lift_result t) (f : expr -> expr -> expr) + : lift_result t + = + map2 m1 m2 (fun r1 r2 -> + { structures = r1.structures @ r2.structures; expr = f r1.expr r2.expr }) + ;; + + let triple + (m1 : lift_result t) + (m2 : lift_result t) + (m3 : lift_result t) + (f : expr -> expr -> expr -> expr) + : lift_result t + = + map2 + m1 + (map2 m2 m3 (fun r2 r3 -> r2, r3)) + (fun r1 (r2, r3) -> + { structures = r1.structures @ r2.structures @ r3.structures + ; expr = f r1.expr r2.expr r3.expr + }) + ;; + + let list (exprs : expr list) (m : expr -> lift_result t) + : (structure list * expr list) t + = + fun st -> + let rec loop rev_structs rev_exprs st' = function + | [] -> Ok ((List.concat (List.rev rev_structs), List.rev rev_exprs), st') + | e :: rest -> + (match m e st' with + | Error e_err -> Error e_err + | Ok (r, st'') -> + loop (r.structures :: rev_structs) (r.expr :: rev_exprs) st'' rest) + in + loop [] [] st exprs + ;; + + type context = + { renames : string Map.t + ; at_toplevel : bool + } + + let initial_renames = Map.empty + + let without_bindings renames names = + List.fold_left (fun m k -> Map.remove k m) renames names + ;; + + let inner (ctx : context) = { ctx with at_toplevel = false } + + let fold_binds (ctx : context) binds (f : context -> pattern -> expr -> lift_result t) + : (structure list * (pattern * expr) list) t + = + List.fold_left + (fun acc (p, e) -> + let* rev_structures, rev_binds = acc in + let* res = f ctx p e in + return (res.structures :: rev_structures, (p, res.expr) :: rev_binds)) + (return ([], [])) + binds + |> fun m -> + let* rev_structures, rev_binds = m in + return (List.concat (List.rev rev_structures), List.rev rev_binds) + ;; + + let rec lift_expr (ctx : context) (e : expr) : lift_result t = + match e with + | ExpIdent name -> + let name' = + try Map.find name ctx.renames with + | Not_found -> name + in + return { structures = []; expr = ExpIdent name' } + | (ExpConst _ | ExpConstruct (_, None)) as e -> return { structures = []; expr = e } + | ExpLet (NonRec, (pat, exp), more, body) -> + let* res_rhs = lift_expr (inner ctx) exp in + let* extra_structures, rest_binds = lift_binds (inner ctx) more in + let all_defs = + names_in_pattern pat + @ List.concat (List.map (fun (p, _) -> names_in_pattern p) more) + in + let body_ctx = + { (inner ctx) with renames = without_bindings ctx.renames all_defs } + in + let* res_body = lift_expr body_ctx body in + return + { structures = res_rhs.structures @ extra_structures @ res_body.structures + ; expr = ExpLet (NonRec, (pat, res_rhs.expr), rest_binds, res_body.expr) + } + | ExpLet (Rec, (pat, exp), more, body) -> + let rec_binds = (pat, exp) :: more in + let unique_ids = unique_names_in_bind_group rec_binds in + let* names = take_names (List.length unique_ids) in + let rec_ctx = + { (inner ctx) with + renames = + List.fold_left + (fun env (id, name) -> Map.add id name env) + ctx.renames + (List.combine unique_ids names) + } + in + let* inner_structures, lifted_binds = + List.fold_left + (fun acc (p, e) -> + let* structures_acc, binds_acc = acc in + let* res = lift_expr rec_ctx e in + return + ( structures_acc @ res.structures + , binds_acc @ [ rename_pattern rec_ctx.renames p, res.expr ] )) + (return ([], [])) + rec_binds + in + let* res_body = lift_expr rec_ctx body in + let* first_bind, rest_binds = + match lifted_binds with + | first :: rest -> return (first, rest) + | [] -> fail RecLetEmptyBinding + in + return + { structures = + inner_structures + @ [ SValue (Rec, first_bind, rest_binds) ] + @ res_body.structures + ; expr = res_body.expr + } + | ExpLambda (pat, pats, body) when ctx.at_toplevel -> + let* res = lift_expr (inner ctx) body in + return { structures = res.structures; expr = ExpLambda (pat, pats, res.expr) } + | ExpLambda (pat, pats, body) -> + let* names = take_names 1 in + let name = List.hd names in + let args = pat :: pats in + let bound = List.concat (List.map names_in_pattern args) in + let* res = + lift_expr { (inner ctx) with renames = without_bindings ctx.renames bound } body + in + let value_def = + SValue (NonRec, (PatVariable name, ExpLambda (pat, pats, res.expr)), []) + in + return { structures = res.structures @ [ value_def ]; expr = ExpIdent name } + | ExpApply (e1, e2) -> + pair + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (fun e1' e2' -> ExpApply (e1', e2')) + | ExpFunction ((pat, exp), cases) when ctx.at_toplevel -> + let ctx_rhs = + { (inner ctx) with renames = without_bindings ctx.renames (names_in_pattern pat) } + in + let* res_rhs = lift_expr ctx_rhs exp in + let* case_structures, lifted_cases = + lift_binds_with_pattern_scope (inner ctx) cases + in + return + { structures = res_rhs.structures @ case_structures + ; expr = ExpFunction ((pat, res_rhs.expr), lifted_cases) + } + | ExpFunction ((pat1, exp1), cases) -> + let* names = take_names 1 in + let name = List.hd names in + let ctx_body = + { ctx with renames = without_bindings ctx.renames (names_in_pattern pat1) } + in + let* res_body = lift_expr ctx_body exp1 in + let* case_structures, lifted_cases = lift_binds_with_pattern_scope ctx cases in + let value_def = + SValue + ( NonRec + , (PatVariable name, ExpFunction ((pat1, res_body.expr), lifted_cases)) + , [] ) + in + return + { structures = res_body.structures @ case_structures @ [ value_def ] + ; expr = ExpIdent name + } + | ExpMatch (e, (pat, branch), cases) -> + let* res_scrut = lift_expr (inner ctx) e in + let ctx_branch = + { (inner ctx) with renames = without_bindings ctx.renames (names_in_pattern pat) } + in + let* res_branch = lift_expr ctx_branch branch in + let* case_structures, lifted_cases = lift_binds_with_pattern_scope ctx cases in + return + { structures = res_scrut.structures @ res_branch.structures @ case_structures + ; expr = ExpMatch (res_scrut.expr, (pat, res_branch.expr), lifted_cases) + } + | ExpBranch (e1, e2, e3_opt) -> + (match e3_opt with + | None -> + pair + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (fun e1' e2' -> ExpBranch (e1', e2', None)) + | Some e3 -> + triple + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (lift_expr (inner ctx) e3) + (fun e1' e2' e3' -> ExpBranch (e1', e2', Some e3'))) + | ExpConstruct (id, Some e) -> + let* res = lift_expr (inner ctx) e in + return { structures = res.structures; expr = ExpConstruct (id, Some res.expr) } + | ExpTypeAnnotation (e, typ) -> + let* res = lift_expr (inner ctx) e in + return { structures = res.structures; expr = ExpTypeAnnotation (res.expr, typ) } + | ExpBinOper (op, e1, e2) -> + pair + (lift_expr (inner ctx) e1) + (lift_expr (inner ctx) e2) + (fun e1' e2' -> ExpBinOper (op, e1', e2')) + | ExpUnarOper (op, e) -> + let* res = lift_expr (inner ctx) e in + return { structures = res.structures; expr = ExpUnarOper (op, res.expr) } + | ExpList es -> + let* elem_structures, lifted_elems = list es (lift_expr (inner ctx)) in + return { structures = elem_structures; expr = ExpList lifted_elems } + | ExpOption None -> return { structures = []; expr = ExpOption None } + | ExpOption (Some e) -> + let* res = lift_expr (inner ctx) e in + return { structures = res.structures; expr = ExpOption (Some res.expr) } + (* | ExpTuple (e1, e2, rest) -> *) + + and lift_binds (ctx : context) binds : (structure list * (pattern * expr) list) t = + fold_binds ctx binds (fun ctx _ e -> lift_expr ctx e) + + and lift_binds_with_pattern_scope (ctx : context) binds + : (structure list * (pattern * expr) list) t + = + fold_binds ctx binds (fun ctx p e -> + let ctx_binding = + { ctx with renames = without_bindings ctx.renames (names_in_pattern p) } + in + lift_expr ctx_binding e) + ;; + + let lift_structure : structure -> structure list t = function + | SEval e -> + let toplevel = { renames = initial_renames; at_toplevel = true } in + let* res = lift_expr toplevel e in + return (res.structures @ [ SEval res.expr ]) + | SValue (is_rec, bind, more) -> + let toplevel = { renames = initial_renames; at_toplevel = true } in + let* inner_structures, lifted_binds = lift_binds toplevel (bind :: more) in + (match lifted_binds with + | first :: rest -> return (inner_structures @ [ SValue (is_rec, first, rest) ]) + | [] -> fail SValueEmptyBinding) + ;; + + let run_program (program : program) (naming_init : N.t) + : (structure list * N.t, error) Result.t + = + let m = + List.fold_left + (fun acc item -> + let* rev_structure_lists = acc in + let* struct_structures = lift_structure item in + return (struct_structures :: rev_structure_lists)) + (return []) + program + in + match m naming_init with + | Ok (rev_structure_lists, st_final) -> + Ok (List.concat (List.rev rev_structure_lists), st_final) + | Error e -> Error e + ;; +end + +module Transform = Make (Default_naming) + +let lambda_lifting_result (program : Frontend.Ast.program) + : (structure list, error) Result.t + = + match Transform.run_program program Default_naming.init with + | Ok (lst, _) -> Ok lst + | Error e -> Error e +;; From 5be629bcb42f7f6fc0befa494ea54fec54cb09b3 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sat, 7 Mar 2026 19:16:09 +0300 Subject: [PATCH 33/74] add runner for midleend Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 6 ++++-- EML/lib/middleend/runner.ml | 30 ++++++++++++++++++++++++++++++ EML/lib/middleend/runner.mli | 14 ++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 EML/lib/middleend/runner.ml create mode 100644 EML/lib/middleend/runner.mli diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 3c7bbff4..40a928ee 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -39,8 +39,10 @@ let with_frontend text env oc f_success : (env, unit) Result.t = ;; let with_middleend ast _env' f : (env, unit) Result.t = - match Middleend.Anf.anf_program ast with - | Error _ -> Error () + match Middleend.Runner.run ast with + | Error e_mid -> + Format.eprintf "Middleend error: %a\n%!" Middleend.Runner.pp_error e_mid; + Error () | Ok anf_ast -> f anf_ast ;; diff --git a/EML/lib/middleend/runner.ml b/EML/lib/middleend/runner.ml new file mode 100644 index 00000000..6a0834a2 --- /dev/null +++ b/EML/lib/middleend/runner.ml @@ -0,0 +1,30 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Frontend.Ast +open Cc +open Ll +open Anf + +type error = + | Closure of Cc.error + | Lifting of Ll.error + | Anf of string + +let pp_error ppf = function + | Closure e -> fprintf ppf "closure conversion: %a" Cc.pp_error e + | Lifting e -> fprintf ppf "lambda lifting: %a" Ll.pp_error e + | Anf s -> fprintf ppf "ANF: %s" s +;; + +let run (program : program) : (anf_program, error) Result.t = + let ( >>= ) = Result.bind in + closure_conversion_result program + |> Result.map_error (fun e -> Closure e) + >>= fun cc_ast -> + lambda_lifting_result cc_ast + |> Result.map_error (fun e -> Lifting e) + >>= fun ll_ast -> anf_program ll_ast |> Result.map_error (fun e -> Anf e) +;; diff --git a/EML/lib/middleend/runner.mli b/EML/lib/middleend/runner.mli new file mode 100644 index 00000000..e0ce122d --- /dev/null +++ b/EML/lib/middleend/runner.mli @@ -0,0 +1,14 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +open Anf + +type error = + | Closure of Cc.error + | Lifting of Ll.error + | Anf of string + +val pp_error : Format.formatter -> error -> unit +val run : program -> (anf_program, error) Result.t From f72520c5834e4eb2a865884410bb56d64cfa2b52 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 00:10:11 +0300 Subject: [PATCH 34/74] add gc & some tests --- EML/bin/main.ml | 55 +++++ EML/lib/backend/ricsv/architecture.ml | 10 +- EML/lib/backend/ricsv/config.ml | 29 +++ EML/lib/backend/ricsv/generator.ml | 23 +- EML/lib/backend/ricsv/runner.mli | 6 +- EML/lib/runtime/runtime.c | 223 +++++++++++++++++- EML/many_tests/.ocamlformat | 1 - EML/many_tests/typed/001fac.ml | 1 - EML/many_tests/typed/dune | 2 - EML/many_tests/typed/typed.t | 2 - EML/tests/Makefile | 47 ++++ EML/tests/dune | 13 +- EML/tests/gc_tests.t | 120 ++++++++++ .../closure/01_add5_staged_partial_gc.ml | 14 ++ .../closure/02_affine_live_dead_gc.ml | 14 ++ .../closure/03_add10_staged_partial_gc.ml | 15 ++ EML/tests/many_tests | 1 + EML/tests/riscv.t | 36 ++- 18 files changed, 584 insertions(+), 28 deletions(-) create mode 100644 EML/bin/main.ml create mode 100644 EML/lib/backend/ricsv/config.ml delete mode 100644 EML/many_tests/.ocamlformat delete mode 120000 EML/many_tests/typed/001fac.ml delete mode 100644 EML/many_tests/typed/dune delete mode 100644 EML/many_tests/typed/typed.t create mode 100644 EML/tests/Makefile create mode 100644 EML/tests/gc_tests.t create mode 100644 EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml create mode 100644 EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml create mode 100644 EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml create mode 120000 EML/tests/many_tests diff --git a/EML/bin/main.ml b/EML/bin/main.ml new file mode 100644 index 00000000..8800c642 --- /dev/null +++ b/EML/bin/main.ml @@ -0,0 +1,55 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib.Frontend.Parser +open EML_lib.Middleend.Anf +open EML_lib.Backend.Ricsv.Runner + +let compile_to_asm ~enable_gc src out_ppf = + match parse src with + | Error e -> + Printf.eprintf "Parse error: %s\n%!" e; + exit 1 + | Ok ast -> + (match anf_program ast with + | Error e -> + Printf.eprintf "ANF error: %s\n%!" e; + exit 1 + | Ok anf -> gen_program ~enable_gc out_ppf anf) +;; + +let () = + let args = Sys.argv in + let nargs = Array.length args in + (* Parse command line: [EML.exe] [-fromfile ] [-o ] *) + let src_file = ref None in + let out_file = ref None in + let enable_gc = ref false in + let i = ref 1 in + while !i < nargs do + (match args.(!i) with + | "-fromfile" -> + incr i; + if !i < nargs then src_file := Some args.(!i) + | "-o" -> + incr i; + if !i < nargs then out_file := Some args.(!i) + | "-gc" -> enable_gc := true + | _ -> ()); + incr i + done; + let src = + match !src_file with + | Some path -> In_channel.(with_open_text path input_all) + | None -> In_channel.input_all stdin + in + match !out_file with + | None -> compile_to_asm ~enable_gc:!enable_gc src Format.std_formatter + | Some path -> + let oc = open_out path in + let ppf = Format.formatter_of_out_channel oc in + compile_to_asm ~enable_gc:!enable_gc src ppf; + Format.pp_print_flush ppf (); + close_out oc +;; diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index b10ec378..9067e2f1 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -130,7 +130,7 @@ module Riscv_backend = struct | Loc_reg of reg | Loc_mem of offset - let prologue ~name ~stack_size = + let prologue ~enable_gc ~name ~stack_size = let ra_slot = sp, stack_size - saved_ra_offset in let fp_slot = sp, stack_size - frame_header_size in let base = @@ -140,11 +140,15 @@ module Riscv_backend = struct @ sd fp fp_slot @ addi fp sp (stack_size - frame_header_size) in - base + if enable_gc && String.equal name "main" + then base @ call "init_gc" @ mv a0 fp @ call "set_ptr_stack" + else base ;; - let epilogue ~is_main = + let epilogue ~enable_gc ~is_main = let base = + (if enable_gc && is_main then call "destroy_gc" else []) + @ addi sp fp frame_header_size @ ld ra (fp, saved_ra_offset) @ ld fp (fp, saved_fp_offset) diff --git a/EML/lib/backend/ricsv/config.ml b/EML/lib/backend/ricsv/config.ml new file mode 100644 index 00000000..4c7fb888 --- /dev/null +++ b/EML/lib/backend/ricsv/config.ml @@ -0,0 +1,29 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(** Primitives the generated code can call. *) + +type primitive = + { name : string + ; arity : int + } + +let primitive_arities ~enable_gc : primitive list = + let base = + [ { name = "print_int"; arity = 1 } + ; { name = "print_endline"; arity = 1 } + ; { name = "alloc_closure"; arity = 2 } + ; { name = "eml_applyN"; arity = 3 } + ] + in + if enable_gc + then + base + @ [ { name = "get_heap_start"; arity = 0 } + ; { name = "get_heap_final"; arity = 0 } + ; { name = "collect"; arity = 0 } + ; { name = "print_gc_status"; arity = 0 } + ] + else base +;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 912586d9..7b5ea8c7 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -359,8 +359,13 @@ let flush_instr_buffer ppf = return () ;; +<<<<<<< HEAD let gen_func asm_name params body frame_sz ppf = fprintf ppf "\n .globl %s\n .type %s, @function\n" asm_name asm_name; +======= +let gen_func ~enable_gc func_name params body frame_sz ppf = + fprintf ppf "\n .globl %s\n .type %s, @function\n" func_name func_name; +>>>>>>> 698314c (add gc & some tests) let args = List.length params in let params_reg, params_stack = ( Base.List.take params (min args arg_regs_count) @@ -378,19 +383,31 @@ let gen_func asm_name params body frame_sz ppf = bind_param_to_stack e i p) in let* () = set_env env in +<<<<<<< HEAD let* () = append (prologue ~name:asm_name ~stack_size:frame_sz) in +======= + let* () = append (prologue ~enable_gc ~name:func_name ~stack_size:frame_sz) in +>>>>>>> 698314c (add gc & some tests) let* st = get in let* () = put { st with frame_offset = 0 } in let* () = spill_params_to_frame params_reg in let* () = gen_anf result_reg body in +<<<<<<< HEAD let* () = append (epilogue ~is_main:(String.equal asm_name "main")) in +======= + let* () = append (epilogue ~enable_gc ~is_main:(String.equal func_name "main")) in +>>>>>>> 698314c (add gc & some tests) let* () = flush_instr_buffer ppf in return () ;; -let gen_program ppf (analysis : analysis_result) = +let gen_program ?(enable_gc = false) ppf (analysis : analysis_result) = fprintf ppf ".section .text"; +<<<<<<< HEAD let base = Frontend.Builtins.all_runtime_prims in +======= + let base = Config.primitive_arities ~enable_gc in +>>>>>>> 698314c (add gc & some tests) let arity_map = List.fold_left (fun map { Frontend.Builtins.name; arity } -> @@ -412,8 +429,12 @@ let gen_program ppf (analysis : analysis_result) = Base.List.foldi analysis.functions ~init:(return ()) ~f:(fun i acc fn -> let frame_sz = (2 + fn.slots_count) * word_size in let* () = acc in +<<<<<<< HEAD let* () = modify (fun st -> { st with current_func_index = i }) in gen_func fn.asm_name fn.params fn.body frame_sz ppf) +======= + gen_func ~enable_gc fn.func_name fn.params fn.body frame_sz ppf) +>>>>>>> 698314c (add gc & some tests) in match run comp init with | Ok ((), _) -> diff --git a/EML/lib/backend/ricsv/runner.mli b/EML/lib/backend/ricsv/runner.mli index 9742e73c..f242edf7 100644 --- a/EML/lib/backend/ricsv/runner.mli +++ b/EML/lib/backend/ricsv/runner.mli @@ -2,4 +2,8 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -val gen_program : Format.formatter -> Middleend.Anf.anf_program -> unit +val gen_program + : ?enable_gc:bool + -> Format.formatter + -> Middleend.Anf.anf_program + -> unit diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/runtime.c index caad3b6a..536191d5 100644 --- a/EML/lib/runtime/runtime.c +++ b/EML/lib/runtime/runtime.c @@ -1,13 +1,10 @@ -#include -#include #include -#include #include +#include #include #include #include #include -#include #define TO_ML_INTEGER(n) ((uint64_t)((uint64_t)(n) >> 1)) @@ -15,17 +12,223 @@ void print_int(long n) { printf("%ld", TO_ML_INTEGER(n)); } #define TAG_CLOSURE 247 #define RISCV_REG_ARGS 8 +#define SIZE_HEAP 800 +#define HEADER_WORDS 1 +typedef struct { + uint64_t raw; +} box_header_t; + +static inline uint16_t header_tag(const box_header_t *h) { return (uint16_t)(h->raw & 0xFFFFu); } +static inline uint16_t header_size(const box_header_t *h) { return (uint16_t)((h->raw >> 16) & 0xFFFFu); } +static inline void set_header(box_header_t *h, uint16_t tag, uint16_t size) { + h->raw = ((uint64_t)size << 16) | (uint64_t)tag; +} + +static inline box_header_t *get_header(uint64_t *payload) { + return (box_header_t *)((uint64_t *)payload - 1); +} + +static inline uint64_t *get_payload(box_header_t *hdr) { return (uint64_t *)(hdr + 1); } + +#define IS_INT(v) ((v)&0x1) +#define IS_PTR(v) ((v) != 0 && !IS_INT(v)) + +typedef struct { + uint64_t *start[2]; + uint64_t *end[2]; + uint64_t *alloc_ptr; + int current_bank; + uint64_t allocations; + uint64_t collections; + uint64_t words_allocated_total; +} gc_state; + +static gc_state GC; +static uint64_t *PTR_STACK = NULL; +static bool gc_enabled = false; + +static inline int get_current_bank_idx() { return GC.current_bank; } +static inline int get_another_bank_idx() { return GC.current_bank ^ 1; } +static inline bool in_bank(uint64_t *ptr, int bank_idx) { + return (GC.start[bank_idx] <= ptr) && (ptr < GC.end[bank_idx]); +} + +static size_t scan_start_for_tag(uint16_t tag) { + if (tag == TAG_CLOSURE) { + return 3; + } + return 0; +} + +void init_gc(void) { + if (gc_enabled) { + return; + } + for (int i = 0; i < 2; ++i) { + GC.start[i] = (uint64_t *)malloc(SIZE_HEAP * sizeof(uint64_t)); + if (GC.start[i] == NULL) { + fprintf(stderr, "Failed to allocate GC bank\n"); + abort(); + } + GC.end[i] = GC.start[i] + SIZE_HEAP; + } + GC.current_bank = 0; + GC.alloc_ptr = GC.start[0]; + GC.allocations = 0; + GC.collections = 0; + GC.words_allocated_total = 0; + gc_enabled = true; +} + +void destroy_gc(void) { + if (!gc_enabled) { + return; + } + for (int i = 0; i < 2; ++i) { + free(GC.start[i]); + GC.start[i] = NULL; + GC.end[i] = NULL; + } + GC.alloc_ptr = NULL; + PTR_STACK = NULL; + gc_enabled = false; +} + +void set_ptr_stack(uint64_t *ptr) { PTR_STACK = ptr; } + +static void mark_and_copy(uint64_t *stack_slot); + +static uint64_t *forward_or_copy(uint64_t *old_payload, bool *did_copy) { + uint64_t possible_forward_ptr = *((uint64_t *)old_payload - 1); + if (in_bank((uint64_t *)possible_forward_ptr, get_current_bank_idx())) { + *did_copy = false; + return (uint64_t *)possible_forward_ptr; + } + + box_header_t *old_header = get_header(old_payload); + uint16_t payload_words = header_size(old_header); + uint16_t object_tag = header_tag(old_header); + size_t need_words = (size_t)payload_words + HEADER_WORDS; + + if (GC.alloc_ptr + need_words > GC.end[GC.current_bank]) { + fprintf(stderr, "Out of memory during GC copy\n"); + abort(); + } + + box_header_t *new_header = (box_header_t *)GC.alloc_ptr; + set_header(new_header, object_tag, payload_words); + uint64_t *new_payload = get_payload(new_header); + memcpy(new_payload, old_payload, (size_t)payload_words * sizeof(uint64_t)); + GC.alloc_ptr += need_words; + GC.words_allocated_total += need_words; + *((uint64_t *)old_payload - 1) = (uint64_t)new_payload; + *did_copy = true; + return new_payload; +} + +static void scan_object(uint64_t *obj) { + box_header_t *header = get_header(obj); + size_t start = scan_start_for_tag(header_tag(header)); + size_t payload_words = (size_t)header_size(header); + for (size_t i = start; i < payload_words; i++) { + mark_and_copy(obj + i); + } +} + +static void mark_and_copy(uint64_t *stack_slot) { + uint64_t raw_value = *stack_slot; + if (!IS_PTR(raw_value)) { + return; + } + + uint64_t *old_object_payload = (uint64_t *)raw_value; + int old_bank = get_another_bank_idx(); + if (!in_bank(old_object_payload, old_bank)) { + return; + } + + bool copied_now = false; + uint64_t *new_object_payload = forward_or_copy(old_object_payload, &copied_now); + *stack_slot = (uint64_t)new_object_payload; + + if (copied_now) { + scan_object(new_object_payload); + } +} + +void collect(void) { + uint64_t dummy; + uint64_t *current_stack_top = &dummy; + if (!PTR_STACK || current_stack_top > PTR_STACK) { + return; + } + + GC.current_bank ^= 1; + GC.alloc_ptr = GC.start[GC.current_bank]; + + for (uint64_t *stack_slot = current_stack_top; stack_slot <= PTR_STACK; stack_slot++) { + mark_and_copy(stack_slot); + } + + GC.collections++; +} + +uint64_t *gc_alloc(size_t words, uint64_t tag) { + size_t total_words = words + HEADER_WORDS; + + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { + collect(); + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + } + + box_header_t *h = (box_header_t *)GC.alloc_ptr; + set_header(h, (uint16_t)tag, (uint16_t)words); + uint64_t *obj = get_payload(h); + memset(obj, 0, words * sizeof(uint64_t)); + + GC.alloc_ptr += total_words; + GC.allocations++; + GC.words_allocated_total += total_words; + return obj; +} + +int64_t get_heap_start(void) { return 1; } +int64_t get_heap_final(void) { return (int64_t)((SIZE_HEAP << 1) | 1); } + + +void print_gc_status() { + int bank = GC.current_bank; + ptrdiff_t current_alloc = GC.alloc_ptr - GC.start[bank]; + ptrdiff_t free_space = GC.end[bank] - GC.alloc_ptr; + uint64_t total = GC.words_allocated_total; + uint64_t collections = GC.collections; + uint64_t allocations = GC.allocations; + + printf("=== GC Status ===\n"); + printf("Current allocated: %td\n", current_alloc); + printf("Total allocated: %" PRIu64 "\n", total); + printf("Free space: %td\n", free_space); + printf("Heap size: %d\n", SIZE_HEAP); + printf("Current bank index: %d\n", bank); + printf("GC collections: %" PRIu64 "\n", collections); + printf("GC allocations: %" PRIu64 "\n", allocations); + printf("=================\n"); + fflush(stdout); +} static void *eml_alloc(size_t size_in_bytes, uint64_t tag) { -#ifdef ENABLE_GC - uint64_t size_in_words = - ((uint64_t)size_in_bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); - return gc_alloc(size_in_words, tag); -#else + if (gc_enabled) { + uint64_t size_in_words = + ((uint64_t)size_in_bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); + return gc_alloc(size_in_words, tag); + } + (void)tag; return malloc(size_in_bytes); -#endif } typedef struct { diff --git a/EML/many_tests/.ocamlformat b/EML/many_tests/.ocamlformat deleted file mode 100644 index e3346c16..00000000 --- a/EML/many_tests/.ocamlformat +++ /dev/null @@ -1 +0,0 @@ -disable=true diff --git a/EML/many_tests/typed/001fac.ml b/EML/many_tests/typed/001fac.ml deleted file mode 120000 index 219cccf7..00000000 --- a/EML/many_tests/typed/001fac.ml +++ /dev/null @@ -1 +0,0 @@ -../../../manytests/typed/001fac.ml \ No newline at end of file diff --git a/EML/many_tests/typed/dune b/EML/many_tests/typed/dune deleted file mode 100644 index f66331bb..00000000 --- a/EML/many_tests/typed/dune +++ /dev/null @@ -1,2 +0,0 @@ -(cram - (deps ./001fac.ml)) diff --git a/EML/many_tests/typed/typed.t b/EML/many_tests/typed/typed.t deleted file mode 100644 index ab349b10..00000000 --- a/EML/many_tests/typed/typed.t +++ /dev/null @@ -1,2 +0,0 @@ - $ wc 001fac.ml - 6 30 105 001fac.ml diff --git a/EML/tests/Makefile b/EML/tests/Makefile new file mode 100644 index 00000000..c5eaefd0 --- /dev/null +++ b/EML/tests/Makefile @@ -0,0 +1,47 @@ +SHELL := /bin/bash + +ROOT := ../.. +EML_DIR := .. +RUNTIME_A := $(EML_DIR)/_build/default/lib/runtime/rv64_runtime.a + +ARGS := $(filter-out compile,$(MAKECMDGOALS)) +USE_GC := $(if $(filter 1 true yes on,$(GC)),1,0) +INPUT := $(firstword $(ARGS)) +EXTRA_GOALS := $(ARGS) + +.PHONY: compile $(EXTRA_GOALS) + +compile: + @set -euo pipefail; \ + FILE="$(INPUT)"; \ + if [[ -z "$$FILE" ]]; then \ + echo "Usage: make compile [GC=1] " >&2; \ + exit 1; \ + fi; \ + if [[ "$$FILE" == *.s ]]; then \ + ASM_FILE="$$FILE"; \ + (cd "$(EML_DIR)" && dune build lib/runtime/rv64_runtime.a); \ + else \ + if [[ -f "$$FILE" ]]; then SRC="$$FILE"; \ + else echo "Source file not found: $$FILE" >&2; exit 1; fi; \ + SRC="$$(realpath "$$SRC")"; \ + TMP_SRC_DIR="$$(mktemp -d)"; \ + trap 'rm -rf "$$TMP_SRC_DIR"' EXIT; \ + ASM_FILE="$$TMP_SRC_DIR/prog.s"; \ + (cd "$(EML_DIR)" && dune build lib/runtime/rv64_runtime.a && \ + if [[ "$(USE_GC)" == "1" ]]; then \ + dune exec -- EML -- -gc -fromfile "$$SRC" -o "$$ASM_FILE"; \ + else \ + dune exec -- EML -- -fromfile "$$SRC" -o "$$ASM_FILE"; \ + fi); \ + fi; \ + TMP_BIN_DIR="$$(mktemp -d)"; \ + trap 'rm -rf "$$TMP_BIN_DIR"' EXIT; \ + OBJ_FILE="$$TMP_BIN_DIR/prog.o"; \ + EXE_FILE="$$TMP_BIN_DIR/prog.exe"; \ + riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ + riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ + qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" + +$(EXTRA_GOALS): + @: diff --git a/EML/tests/dune b/EML/tests/dune index fce0abe0..0179a688 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -12,5 +12,16 @@ (applies_to riscv) (deps (package EML) + (file Makefile) (file ../lib/runtime/rv64_runtime.a) - (source_tree manytests))) + (source_tree gc_tests) + (source_tree many_tests))) + +(cram + (applies_to gc_tests) + (deps + (package EML) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree gc_tests) + (source_tree many_tests))) diff --git a/EML/tests/gc_tests.t b/EML/tests/gc_tests.t new file mode 100644 index 00000000..3148404e --- /dev/null +++ b/EML/tests/gc_tests.t @@ -0,0 +1,120 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + + $ make compile GC=1 gc_tests/closure/01_add5_staged_partial_gc.ml + === GC Status === + Current allocated: 18 + Total allocated: 18 + Free space: 782 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 2 + ================= + === GC Status === + Current allocated: 27 + Total allocated: 27 + Free space: 773 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 18 + Total allocated: 45 + Free space: 782 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 27 + Total allocated: 81 + Free space: 773 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 4 + ================= + 15 + + $ make compile GC=1 gc_tests/closure/02_affine_live_dead_gc.ml + === GC Status === + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 + ================= + === GC Status === + Current allocated: 14 + Total allocated: 42 + Free space: 786 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 4 + ================= + === GC Status === + Current allocated: 28 + Total allocated: 56 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 6 + ================= + === GC Status === + Current allocated: 21 + Total allocated: 77 + Free space: 779 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 6 + ================= + 17 + + $ make compile GC=1 gc_tests/closure/03_add10_staged_partial_gc.ml + === GC Status === + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 2 + ================= + === GC Status === + Current allocated: 42 + Total allocated: 42 + Free space: 758 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 28 + Total allocated: 70 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 3 + ================= + === GC Status === + Current allocated: 42 + Total allocated: 126 + Free space: 758 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 4 + ================= + 55 diff --git a/EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml b/EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml new file mode 100644 index 00000000..e4de1f35 --- /dev/null +++ b/EML/tests/gc_tests/closure/01_add5_staged_partial_gc.ml @@ -0,0 +1,14 @@ +let add5 a b c d e = a + b + c + d + e + +let main = + let p1 = add5 1 2 in + let _ = print_gc_status () in + let p2 = p1 3 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + let p3 = p2 4 in + let _ = collect () in + let _ = print_gc_status () in + print_int (p3 5) +;; diff --git a/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml b/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml new file mode 100644 index 00000000..91ac6703 --- /dev/null +++ b/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml @@ -0,0 +1,14 @@ +let affine a b x = a * x + b + +let main = + let live = affine 2 7 in + let _dead = affine 100 1 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + let _dead2 = affine 50 3 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + print_int (live 5) +;; diff --git a/EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml b/EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml new file mode 100644 index 00000000..82a8f155 --- /dev/null +++ b/EML/tests/gc_tests/closure/03_add10_staged_partial_gc.ml @@ -0,0 +1,15 @@ +let add10 a b c d e f g h i j = a + b + c + d + e + f + g + h + i + j + +let main = + let c1 = add10 1 2 3 in + let _ = print_gc_status () in + let c2 = c1 4 5 in + let _ = print_gc_status () in + let _ = collect () in + let _ = print_gc_status () in + let c3 = c2 6 7 in + let _ = collect () in + let _ = print_gc_status () in + let c4 = c3 8 9 in + print_int (c4 10) +;; diff --git a/EML/tests/many_tests b/EML/tests/many_tests new file mode 120000 index 00000000..dbb09231 --- /dev/null +++ b/EML/tests/many_tests @@ -0,0 +1 @@ +/home/danil/comp25/manytests \ No newline at end of file diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t index d777534e..913731cf 100644 --- a/EML/tests/riscv.t +++ b/EML/tests/riscv.t @@ -6,9 +6,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later > > let main = print_int (fac 4) - $ riscv64-linux-gnu-as -march=rv64gc fact.s -o fact.o - $ riscv64-linux-gnu-gcc fact.o ../lib/runtime/rv64_runtime.a -o fact.exe - $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./fact.exe + $ make compile fact.s 24 $ EML -o fibo.s < > let main = print_int (fib 6) - $ riscv64-linux-gnu-as -march=rv64gc fibo.s -o fibo.o - $ riscv64-linux-gnu-gcc fibo.o ../lib/runtime/rv64_runtime.a -o fibo.exe - $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./fibo.exe + $ make compile fibo.s 8 + +====================== without gc ====================== + + $ make compile many_tests/typed/001fac.ml + 24 + + $ make compile many_tests/typed/003fib.ml + 33 + + $ make compile many_tests/typed/004manyargs.ml + 1111111111110100 + + $ make compile many_tests/typed/005fix.ml + 720 + + $ make compile many_tests/typed/006partial2.ml + 1237 + + + + $ make compile many_tests/typed/010fac_anf.ml + + $ make compile many_tests/typed/010faccps_ll.ml + 24 + + $ make compile many_tests/typed/010fibcps_ll.ml + 8 + From 42ee384811194e47d2ebaaa4b56be45b8be61fcf Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 00:47:44 +0300 Subject: [PATCH 35/74] fix fails --- EML/bin/EML.ml | 21 +- EML/lib/backend/ricsv/analysis.ml | 10 +- EML/lib/backend/ricsv/architecture.ml | 24 +- EML/lib/backend/ricsv/generator.ml | 25 +- EML/lib/backend/ricsv/runner.ml | 2 +- EML/lib/backend/ricsv/runner.mli | 2 +- EML/lib/frontend/binutils.mli | 2 +- EML/lib/frontend/inferencer.ml | 19 ++ EML/lib/frontend/runner.ml | 2 +- EML/lib/middleend/cc.ml | 19 ++ EML/lib/middleend/ll.ml | 11 +- EML/tests/Makefile | 25 +- EML/tests/dune | 4 +- EML/tests/riscv.t | 17 -- EML/tests/riscv_tests.ml | 330 ++++++++++++++++---------- 15 files changed, 307 insertions(+), 206 deletions(-) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 40a928ee..5e8bfaf5 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -6,14 +6,13 @@ open Stdio open EML_lib open Frontend -type backend = Ricsv - type opts = { input_file : string option ; output_file : string option + ; enable_gc : bool } -let default_opts = { input_file = None; output_file = None } +let default_opts = { input_file = None; output_file = None; enable_gc = false } type env = Inferencer.TypeEnv.t @@ -46,11 +45,11 @@ let with_middleend ast _env' f : (env, unit) Result.t = | Ok anf_ast -> f anf_ast ;; -let run_compile text env oc : (env, unit) Result.t = +let run_compile ~enable_gc text env oc : (env, unit) Result.t = with_frontend text env oc (fun ast env' _out_list -> with_middleend ast env' (fun anf_ast -> let ppf = Format.formatter_of_out_channel oc in - let res = Backend.Ricsv.Runner.gen_program ppf anf_ast in + let res = Backend.Ricsv.Runner.gen_program ~enable_gc ppf anf_ast in match res with | Ok () -> Ok env' | Error msg -> @@ -63,8 +62,10 @@ let run_compile text env oc : (env, unit) Result.t = (* ------------------------------------------------------------------------- *) let compiler opts : (unit, unit) Result.t = - let run text env oc = run_compile text env oc in - let env0 = Inferencer.TypeEnv.env_with_print_funs_and_gc in + let run text env oc = run_compile ~enable_gc:opts.enable_gc text env oc in + let env0 = + if opts.enable_gc then Inferencer.TypeEnv.env_with_gc else Inferencer.TypeEnv.initial_env + in let with_output f = match opts.output_file with | Some path -> Out_channel.with_file path ~f @@ -87,15 +88,19 @@ let compiler opts : (unit, unit) Result.t = let parse_args () : (opts, unit) Result.t = let input_file = ref default_opts.input_file in let output_file = ref default_opts.output_file in + let enable_gc = ref default_opts.enable_gc in let positional_seen = ref false in let open Arg in let spec = [ "-fromfile", String (fun s -> input_file := Some s), " Read source from file" ; "-o", String (fun s -> output_file := Some s), " Write output to file" + ; "-gc", Set enable_gc, " Enable GC runtime support" ] in parse spec (fun _ -> positional_seen := true) "Compiler for custom language"; - if !positional_seen then Error () else compiler + if !positional_seen + then Error () + else Ok { input_file = !input_file; output_file = !output_file; enable_gc = !enable_gc } ;; let () = diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index 07d3a279..3b2674f9 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -28,12 +28,12 @@ and slots_in_cexpr = function | ComplexBinOper (_, left, right) -> slots_in_imm left + slots_in_imm right | ComplexUnarOper (_, imm) -> slots_in_imm imm | ComplexTuple (first, second, rest) -> - List.fold_left (first :: second :: rest) ~init:0 ~f:(fun acc e -> + Base.List.fold_left (first :: second :: rest) ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) | ComplexField (imm, _) -> slots_in_imm imm | ComplexList imm_list -> let n = List.length imm_list in - n + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 imm_list + n + Base.List.fold_left imm_list ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) | ComplexApp (first, second, rest) -> (* +1 for curried-call intermediate; +1 per arg for spill_dangerous_args. +8 for spill_caller_saved_vars_to_frame at start of every invocation (can spill a0-a7). @@ -41,7 +41,11 @@ and slots_in_cexpr = function let args = first :: second :: rest in let nargs = List.length args in let extra = if nargs >= 2 then 12 else 0 in - 1 + 8 + nargs + extra + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 args + 1 + + 8 + + nargs + + extra + + Base.List.fold_left args ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) | ComplexOption None -> 0 | ComplexOption (Some imm) -> slots_in_imm imm | ComplexLambda (_, body) -> slots_in_anf body diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index 9067e2f1..9587ebf8 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -125,20 +125,34 @@ module Riscv_backend = struct let frame_header_size = 2 * word_size let saved_fp_offset = 0 let saved_ra_offset = word_size + let riscv_imm12_min = -2048 + let riscv_imm12_max = 2047 + + let fits_imm12 n = n >= riscv_imm12_min && n <= riscv_imm12_max type location = | Loc_reg of reg | Loc_mem of offset let prologue ~enable_gc ~name ~stack_size = - let ra_slot = sp, stack_size - saved_ra_offset in - let fp_slot = sp, stack_size - frame_header_size in + let ra_slot = fp, saved_ra_offset in + let fp_slot = fp, saved_fp_offset in + let dec_sp = + if fits_imm12 (-stack_size) + then addi sp sp (-stack_size) + else li t0 stack_size @ sub sp sp t0 + in + let set_fp = + let ofs = stack_size - frame_header_size in + if fits_imm12 ofs then addi fp sp ofs else li t0 ofs @ add fp sp t0 + in let base = label name - @ addi sp sp (-stack_size) + @ mv t1 fp + @ dec_sp + @ set_fp @ sd ra ra_slot - @ sd fp fp_slot - @ addi fp sp (stack_size - frame_header_size) + @ sd t1 fp_slot in if enable_gc && String.equal name "main" then base @ call "init_gc" @ mv a0 fp @ call "set_ptr_stack" diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 7b5ea8c7..ce6dd5d9 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -359,13 +359,8 @@ let flush_instr_buffer ppf = return () ;; -<<<<<<< HEAD -let gen_func asm_name params body frame_sz ppf = - fprintf ppf "\n .globl %s\n .type %s, @function\n" asm_name asm_name; -======= let gen_func ~enable_gc func_name params body frame_sz ppf = fprintf ppf "\n .globl %s\n .type %s, @function\n" func_name func_name; ->>>>>>> 698314c (add gc & some tests) let args = List.length params in let params_reg, params_stack = ( Base.List.take params (min args arg_regs_count) @@ -383,34 +378,22 @@ let gen_func ~enable_gc func_name params body frame_sz ppf = bind_param_to_stack e i p) in let* () = set_env env in -<<<<<<< HEAD - let* () = append (prologue ~name:asm_name ~stack_size:frame_sz) in -======= let* () = append (prologue ~enable_gc ~name:func_name ~stack_size:frame_sz) in ->>>>>>> 698314c (add gc & some tests) let* st = get in let* () = put { st with frame_offset = 0 } in let* () = spill_params_to_frame params_reg in let* () = gen_anf result_reg body in -<<<<<<< HEAD - let* () = append (epilogue ~is_main:(String.equal asm_name "main")) in -======= let* () = append (epilogue ~enable_gc ~is_main:(String.equal func_name "main")) in ->>>>>>> 698314c (add gc & some tests) let* () = flush_instr_buffer ppf in return () ;; let gen_program ?(enable_gc = false) ppf (analysis : analysis_result) = fprintf ppf ".section .text"; -<<<<<<< HEAD - let base = Frontend.Builtins.all_runtime_prims in -======= let base = Config.primitive_arities ~enable_gc in ->>>>>>> 698314c (add gc & some tests) let arity_map = List.fold_left - (fun map { Frontend.Builtins.name; arity } -> + (fun map { Config.name; arity } -> Base.Map.set map ~key:name ~data:arity) analysis.arity_map base @@ -429,12 +412,8 @@ let gen_program ?(enable_gc = false) ppf (analysis : analysis_result) = Base.List.foldi analysis.functions ~init:(return ()) ~f:(fun i acc fn -> let frame_sz = (2 + fn.slots_count) * word_size in let* () = acc in -<<<<<<< HEAD let* () = modify (fun st -> { st with current_func_index = i }) in - gen_func fn.asm_name fn.params fn.body frame_sz ppf) -======= - gen_func ~enable_gc fn.func_name fn.params fn.body frame_sz ppf) ->>>>>>> 698314c (add gc & some tests) + gen_func ~enable_gc fn.asm_name fn.params fn.body frame_sz ppf) in match run comp init with | Ok ((), _) -> diff --git a/EML/lib/backend/ricsv/runner.ml b/EML/lib/backend/ricsv/runner.ml index 752784b3..8bd38e7f 100644 --- a/EML/lib/backend/ricsv/runner.ml +++ b/EML/lib/backend/ricsv/runner.ml @@ -5,7 +5,7 @@ open Middleend.Anf open Analysis -let gen_program ~enable_gc ppf (program : anf_program) = +let gen_program ?(enable_gc = false) ppf (program : anf_program) = let analysis = analyze program in Generator.gen_program ~enable_gc ppf analysis ;; diff --git a/EML/lib/backend/ricsv/runner.mli b/EML/lib/backend/ricsv/runner.mli index f242edf7..e5df8f59 100644 --- a/EML/lib/backend/ricsv/runner.mli +++ b/EML/lib/backend/ricsv/runner.mli @@ -6,4 +6,4 @@ val gen_program : ?enable_gc:bool -> Format.formatter -> Middleend.Anf.anf_program - -> unit + -> (unit, string) result diff --git a/EML/lib/frontend/binutils.mli b/EML/lib/frontend/binutils.mli index 80aee0a7..62ba547c 100644 --- a/EML/lib/frontend/binutils.mli +++ b/EML/lib/frontend/binutils.mli @@ -7,5 +7,5 @@ type primitive = ; arity : int } -val all_runtime_prims : bool -> primitive list +val all_runtime_prims : primitive list val builtin_global_names : string list diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index b7266f2c..e6d70458 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -246,6 +246,23 @@ module TypeEnv = struct ~key:"print_bool" ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "bool", TyPrim "unit"))) ;; + + let env_with_gc = + let open Base.Map in + initial_env + |> set + ~key:"collect" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "unit"))) + |> set + ~key:"print_gc_status" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "unit"))) + |> set + ~key:"get_heap_start" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "int"))) + |> set + ~key:"get_heap_final" + ~data:(Scheme.Scheme (VarSet.empty, TyArrow (TyPrim "unit", TyPrim "int"))) + ;; end open ResultMonad @@ -343,6 +360,7 @@ let rec infer_pattern env = function | PatUnit -> return (Substitution.empty, TyPrim "unit", env) | PatConstruct (name, opt) -> (match name, opt with + | "()", None -> return (Substitution.empty, TyPrim "unit", env) | "None", None -> let* fresh = fresh_var in return (Substitution.empty, TyOption fresh, env) @@ -626,6 +644,7 @@ let rec infer_expr env = function | None -> fail (RHS "Empty match")) | ExpConstruct (name, opt_expr) -> (match name, opt_expr with + | "()", None -> return (Substitution.empty, TyPrim "unit") | "None", None -> let* tv = fresh_var in return (Substitution.empty, TyOption tv) diff --git a/EML/lib/frontend/runner.ml b/EML/lib/frontend/runner.ml index 7e3ba283..fdda2931 100644 --- a/EML/lib/frontend/runner.ml +++ b/EML/lib/frontend/runner.ml @@ -25,5 +25,5 @@ let run (text : string) (env : TypeEnv.t) | Ok ast -> (match Inferencer.ResultMonad.run (infer_structure env ast) with | Error e -> Error (Infer e) - | Ok (env', out_list) -> Ok (ast, env', out_list)) + | Ok (_subst, env') -> Ok (ast, env', [])) ;; diff --git a/EML/lib/middleend/cc.ml b/EML/lib/middleend/cc.ml index 50dd770d..3fd9ed79 100644 --- a/EML/lib/middleend/cc.ml +++ b/EML/lib/middleend/cc.ml @@ -22,6 +22,8 @@ let vars_in_pattern p = | PatConstruct (_, None) -> VarSet.empty | PatConstruct (_, Some q) -> walk q | PatType (q, _) -> walk q + | PatTuple (p1, p2, rest) -> + union_map_list walk (p1 :: p2 :: rest) | PatUnit | PatList _ | PatOption _ -> VarSet.empty in walk p @@ -67,6 +69,8 @@ let rec collect_free_vars = function | ExpTypeAnnotation (e, _) -> collect_free_vars e | ExpBinOper (_, e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) | ExpUnarOper (_, e) -> collect_free_vars e + | ExpTuple (e1, e2, rest) -> + union_map_list collect_free_vars (e1 :: e2 :: rest) | ExpList es -> union_map_list collect_free_vars es | ExpOption e_opt -> (match e_opt with @@ -115,6 +119,8 @@ let extend_capture_env env pat captured_set = | PatAny | PatConst _ | PatConstruct (_, None) -> acc | PatVariable name -> EnvMap.add name captured_set acc | PatConstruct (_, Some p) | PatType (p, _) -> add_captures_for_pat acc p + | PatTuple (p1, p2, rest) -> + List.fold_left add_captures_for_pat acc (p1 :: p2 :: rest) | PatUnit | PatList _ | PatOption _ -> acc in add_captures_for_pat env pat @@ -226,6 +232,19 @@ and convert_expr = function (return []) in return (ExpList es') + | ExpTuple (e1, e2, rest) -> + let* e1' = convert_expr e1 in + let* e2' = convert_expr e2 in + let* rest' = + List.fold_right + (fun e acc -> + let* e' = convert_expr e in + let* acc' = acc in + return (e' :: acc')) + rest + (return []) + in + return (ExpTuple (e1', e2', rest')) | ExpOption e_opt -> (match e_opt with | None -> return (ExpOption None) diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml index 0ceb8eeb..5595250f 100644 --- a/EML/lib/middleend/ll.ml +++ b/EML/lib/middleend/ll.ml @@ -37,6 +37,7 @@ let names_in_pattern p = | PatConstruct (_, None) -> [] | PatConstruct (_, Some q) -> collect q | PatType (q, _) -> collect q + | PatTuple (p1, p2, rest) -> List.concat (List.map collect (p1 :: p2 :: rest)) | PatUnit -> [] | PatList ps -> List.concat (List.map collect ps) | PatOption p_opt -> @@ -342,6 +343,14 @@ module Make (N : NAMING) = struct | ExpUnarOper (op, e) -> let* res = lift_expr (inner ctx) e in return { structures = res.structures; expr = ExpUnarOper (op, res.expr) } + | ExpTuple (e1, e2, rest) -> + let* first = lift_expr (inner ctx) e1 in + let* second = lift_expr (inner ctx) e2 in + let* rest_structures, rest_exprs = list rest (lift_expr (inner ctx)) in + return + { structures = first.structures @ second.structures @ rest_structures + ; expr = ExpTuple (first.expr, second.expr, rest_exprs) + } | ExpList es -> let* elem_structures, lifted_elems = list es (lift_expr (inner ctx)) in return { structures = elem_structures; expr = ExpList lifted_elems } @@ -349,8 +358,6 @@ module Make (N : NAMING) = struct | ExpOption (Some e) -> let* res = lift_expr (inner ctx) e in return { structures = res.structures; expr = ExpOption (Some res.expr) } - (* | ExpTuple (e1, e2, rest) -> *) - and lift_binds (ctx : context) binds : (structure list * (pattern * expr) list) t = fold_binds ctx binds (fun ctx _ e -> lift_expr ctx e) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index c5eaefd0..511960a4 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -1,8 +1,8 @@ SHELL := /bin/bash -ROOT := ../.. -EML_DIR := .. -RUNTIME_A := $(EML_DIR)/_build/default/lib/runtime/rv64_runtime.a +RUNTIME_A_SANDBOX := ../lib/runtime/rv64_runtime.a +RUNTIME_A_DEFAULT := ../_build/default/lib/runtime/rv64_runtime.a +EML_BIN ?= /home/danil/comp25/EML/_build/default/bin/EML.exe ARGS := $(filter-out compile,$(MAKECMDGOALS)) USE_GC := $(if $(filter 1 true yes on,$(GC)),1,0) @@ -20,7 +20,6 @@ compile: fi; \ if [[ "$$FILE" == *.s ]]; then \ ASM_FILE="$$FILE"; \ - (cd "$(EML_DIR)" && dune build lib/runtime/rv64_runtime.a); \ else \ if [[ -f "$$FILE" ]]; then SRC="$$FILE"; \ else echo "Source file not found: $$FILE" >&2; exit 1; fi; \ @@ -28,19 +27,23 @@ compile: TMP_SRC_DIR="$$(mktemp -d)"; \ trap 'rm -rf "$$TMP_SRC_DIR"' EXIT; \ ASM_FILE="$$TMP_SRC_DIR/prog.s"; \ - (cd "$(EML_DIR)" && dune build lib/runtime/rv64_runtime.a && \ - if [[ "$(USE_GC)" == "1" ]]; then \ - dune exec -- EML -- -gc -fromfile "$$SRC" -o "$$ASM_FILE"; \ - else \ - dune exec -- EML -- -fromfile "$$SRC" -o "$$ASM_FILE"; \ - fi); \ + if [[ "$(USE_GC)" == "1" ]]; then \ + "$(EML_BIN)" -gc -fromfile "$$SRC" -o "$$ASM_FILE"; \ + else \ + "$(EML_BIN)" -fromfile "$$SRC" -o "$$ASM_FILE"; \ + fi; \ + fi; \ + if [[ -f "$(RUNTIME_A_SANDBOX)" ]]; then \ + RUNTIME_A="$(RUNTIME_A_SANDBOX)"; \ + else \ + RUNTIME_A="$(RUNTIME_A_DEFAULT)"; \ fi; \ TMP_BIN_DIR="$$(mktemp -d)"; \ trap 'rm -rf "$$TMP_BIN_DIR"' EXIT; \ OBJ_FILE="$$TMP_BIN_DIR/prog.o"; \ EXE_FILE="$$TMP_BIN_DIR/prog.exe"; \ riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ - riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ + riscv64-linux-gnu-gcc "$$OBJ_FILE" "$$RUNTIME_A" -o "$$EXE_FILE"; \ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" $(EXTRA_GOALS): diff --git a/EML/tests/dune b/EML/tests/dune index 0179a688..98401366 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -11,7 +11,7 @@ (cram (applies_to riscv) (deps - (package EML) + (file ../bin/EML.exe) (file Makefile) (file ../lib/runtime/rv64_runtime.a) (source_tree gc_tests) @@ -20,7 +20,7 @@ (cram (applies_to gc_tests) (deps - (package EML) + (file ../bin/EML.exe) (file Makefile) (file ../lib/runtime/rv64_runtime.a) (source_tree gc_tests) diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t index 913731cf..480a9044 100644 --- a/EML/tests/riscv.t +++ b/EML/tests/riscv.t @@ -1,23 +1,6 @@ Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev SPDX-License-Identifier: LGPL-3.0-or-later - $ EML -o fact.s < let rec fac x = if x = 0 then 1 else x * fac (x - 1) - > - > let main = print_int (fac 4) - - $ make compile fact.s - 24 - - $ EML -o fibo.s < let rec fib x = if x <= 1 then x else fib (x - 1) + fib (x - 2) - > - > let main = print_int (fib 6) - - $ make compile fibo.s - 8 - -====================== without gc ====================== $ make compile many_tests/typed/001fac.ml 24 diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml index 737f7626..ba67ec93 100644 --- a/EML/tests/riscv_tests.ml +++ b/EML/tests/riscv_tests.ml @@ -17,9 +17,11 @@ let compile src : string = | Ok anf -> let buf = Buffer.create 1024 in let ppf = Format.formatter_of_buffer buf in - Backend.Ricsv.Runner.gen_program ppf anf; - Format.pp_print_flush ppf (); - Buffer.contents buf) + (match Backend.Ricsv.Runner.gen_program ppf anf with + | Ok () -> + Format.pp_print_flush ppf (); + Buffer.contents buf + | Error e -> "Codegen error: " ^ e)) ;; let run src = Format.printf "%s" (compile src) @@ -32,10 +34,11 @@ let%expect_test "unary_minus" = .globl x .type x, @function x: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 + sd ra, 8(fp) + sd t1, 0(fp) li t0, 11 li a0, 1 sub a0, a0, t0 @@ -43,6 +46,21 @@ x: ld ra, 8(fp) ld fp, 0(fp) ret + + .globl main + .type main, @function +main: + mv t1, fp + addi sp, sp, -16 + addi fp, sp, 0 + sd ra, 8(fp) + sd t1, 0(fp) + li a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret |}] ;; @@ -54,16 +72,32 @@ let%expect_test "unary_not" = .globl x .type x, @function x: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 + sd ra, 8(fp) + sd t1, 0(fp) li t0, 3 xori a0, t0, 3 addi sp, fp, 16 ld ra, 8(fp) ld fp, 0(fp) ret + + .globl main + .type main, @function +main: + mv t1, fp + addi sp, sp, -16 + addi fp, sp, 0 + sd ra, 8(fp) + sd t1, 0(fp) + li a0, 1 + addi sp, fp, 16 + ld ra, 8(fp) + ld fp, 0(fp) + li a0, 0 + ret |}] ;; @@ -75,10 +109,11 @@ let%expect_test "unit_main" = .globl main .type main, @function main: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 1 addi sp, fp, 16 ld ra, 8(fp) @@ -96,10 +131,11 @@ let%expect_test "mul_only" = .globl main .type main, @function main: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 + sd ra, 8(fp) + sd t1, 0(fp) li t0, 15 li t1, 17 srli t0, t0, 1 @@ -126,13 +162,14 @@ let%expect_test "double_fn" = .globl double .type double, @function double: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 - mv t0, a0 - mv t1, a0 - mv a1, a0 + sd ra, 8(fp) + sd t1, 0(fp) + sd a0, -8(fp) + ld t0, -8(fp) + ld t1, -8(fp) add a0, t0, t1 addi a0, a0, -1 addi sp, fp, 16 @@ -143,10 +180,11 @@ double: .globl main .type main, @function main: - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 0 + mv t1, fp + addi sp, sp, -200 + addi fp, sp, 184 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 43 call double addi sp, fp, 16 @@ -169,23 +207,27 @@ let%expect_test "abs_fn" = .globl abs .type abs, @function abs: - addi sp, sp, -24 - sd ra, 16(sp) - sd fp, 8(sp) - addi fp, sp, 8 - mv t0, a0 + mv t1, fp + addi sp, sp, -32 + addi fp, sp, 16 + sd ra, 8(fp) + sd t1, 0(fp) + sd a0, -8(fp) + ld t0, -8(fp) li t1, 1 - mv a1, a0 slt a0, t0, t1 - sd a0, -8(fp) + add a0, a0, a0 + addi a0, a0, 1 + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_0 ld t0, -8(fp) - beq t0, zero, else_0 - mv t0, a1 li a0, 1 sub a0, a0, t0 j end_0 else_0: - mv a0, a1 + ld a0, -8(fp) end_0: addi sp, fp, 16 ld ra, 8(fp) @@ -195,10 +237,11 @@ end_0: .globl main .type main, @function main: - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 0 + mv t1, fp + addi sp, sp, -200 + addi fp, sp, 184 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 15 call abs addi sp, fp, 16 @@ -222,13 +265,14 @@ let%expect_test "nested_calls" = .globl sq .type sq, @function sq: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 - mv t0, a0 - mv t1, a0 - mv a1, a0 + sd ra, 8(fp) + sd t1, 0(fp) + sd a0, -8(fp) + ld t0, -8(fp) + ld t1, -8(fp) srli t0, t0, 1 addi t1, t1, -1 mul a0, t0, t1 @@ -241,11 +285,11 @@ sq: .globl sum_of_squares .type sum_of_squares, @function sum_of_squares: - addi sp, sp, -32 - sd ra, 24(sp) - sd fp, 16(sp) - addi fp, sp, 16 - addi sp, sp, -16 + mv t1, fp + addi sp, sp, -400 + addi fp, sp, 384 + sd ra, 8(fp) + sd t1, 0(fp) sd a0, -8(fp) sd a1, -16(fp) ld a0, -8(fp) @@ -266,10 +310,11 @@ sum_of_squares: .globl main .type main, @function main: - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 0 + mv t1, fp + addi sp, sp, -208 + addi fp, sp, 192 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 7 li a1, 9 call sum_of_squares @@ -293,31 +338,33 @@ let%expect_test "fibonacci" = .globl fib .type fib, @function fib: - addi sp, sp, -56 - sd ra, 48(sp) - sd fp, 40(sp) - addi fp, sp, 40 - mv t0, a0 - li t1, 5 - mv a1, a0 - slt a0, t0, t1 + mv t1, fp + addi sp, sp, -432 + addi fp, sp, 416 + sd ra, 8(fp) + sd t1, 0(fp) sd a0, -8(fp) ld t0, -8(fp) - beq t0, zero, else_0 + li t1, 5 + slt a0, t0, t1 + add a0, a0, a0 + addi a0, a0, 1 + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_0 li a0, 3 j end_0 else_0: - mv t0, a1 + ld t0, -8(fp) li t1, 3 sub a0, t0, t1 addi a0, a0, 1 - sd a0, -16(fp) - addi sp, sp, -8 - sd a1, -24(fp) - ld a0, -16(fp) + sd a0, -24(fp) + ld a0, -24(fp) call fib sd a0, -32(fp) - ld t0, -24(fp) + ld t0, -8(fp) li t1, 5 sub a0, t0, t1 addi a0, a0, 1 @@ -338,10 +385,11 @@ end_0: .globl main .type main, @function main: - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 0 + mv t1, fp + addi sp, sp, -200 + addi fp, sp, 184 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 13 call fib addi sp, fp, 16 @@ -364,14 +412,17 @@ let%expect_test "is_positive" = .globl is_positive .type is_positive, @function is_positive: + mv t1, fp addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) addi fp, sp, 0 - mv t0, a0 + sd ra, 8(fp) + sd t1, 0(fp) + sd a0, -8(fp) + ld t0, -8(fp) li t1, 1 - mv a1, a0 slt a0, t1, t0 + add a0, a0, a0 + addi a0, a0, 1 addi sp, fp, 16 ld ra, 8(fp) ld fp, 0(fp) @@ -380,10 +431,11 @@ is_positive: .globl main .type main, @function main: - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 0 + mv t1, fp + addi sp, sp, -200 + addi fp, sp, 184 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 85 call is_positive addi sp, fp, 16 @@ -406,20 +458,23 @@ let%expect_test "mul3" = .globl mul3 .type mul3, @function mul3: + mv t1, fp addi sp, sp, -24 - sd ra, 16(sp) - sd fp, 8(sp) addi fp, sp, 8 - mv t0, a0 - mv t1, a1 - mv a3, a0 + sd ra, 8(fp) + sd t1, 0(fp) + sd a0, -8(fp) + sd a1, -16(fp) + sd a2, -24(fp) + ld t0, -8(fp) + ld t1, -16(fp) srli t0, t0, 1 addi t1, t1, -1 mul a0, t0, t1 addi a0, a0, 1 - sd a0, -8(fp) - ld t0, -8(fp) - mv t1, a2 + sd a0, -32(fp) + ld t0, -32(fp) + ld t1, -24(fp) srli t0, t0, 1 addi t1, t1, -1 mul a0, t0, t1 @@ -432,10 +487,11 @@ mul3: .globl main .type main, @function main: - addi sp, sp, -16 - sd ra, 8(sp) - sd fp, 0(sp) - addi fp, sp, 0 + mv t1, fp + addi sp, sp, -216 + addi fp, sp, 200 + sd ra, 8(fp) + sd t1, 0(fp) li a0, 5 li a1, 7 li a2, 9 @@ -465,26 +521,26 @@ let%expect_test "test1" = .globl large .type large, @function large: - addi sp, sp, -24 - sd ra, 16(sp) - sd fp, 8(sp) - addi fp, sp, 8 + mv t1, fp + addi sp, sp, -400 + addi fp, sp, 384 + sd ra, 8(fp) + sd t1, 0(fp) + sd a0, -8(fp) li t0, 1 - mv t1, a0 - mv a1, a0 + ld t1, -8(fp) xor a0, t0, t1 snez a0, a0 - sd a0, -8(fp) - ld t0, -8(fp) - beq t0, zero, else_0 - addi sp, sp, -8 - sd a1, -16(fp) + add a0, a0, a0 + addi a0, a0, 1 + sd a0, -16(fp) + ld t0, -16(fp) + li t1, 1 + beq t0, t1, else_0 li a0, 1 call print_int j end_0 else_0: - addi sp, sp, -8 - sd a1, -16(fp) li a0, 3 call print_int end_0: @@ -496,12 +552,14 @@ let%expect_test "test1" = .globl main .type main, @function main: - addi sp, sp, -48 - sd ra, 40(sp) - sd fp, 32(sp) - addi fp, sp, 32 + mv t1, fp + addi sp, sp, -440 + addi fp, sp, 424 + sd ra, 8(fp) + sd t1, 0(fp) li t0, 1 - beq t0, zero, else_1 + li t1, 1 + beq t0, t1, else_1 li a0, 1 j end_1 else_1: @@ -512,7 +570,8 @@ let%expect_test "test1" = end_1: sd a0, -16(fp) ld t0, -16(fp) - beq t0, zero, else_2 + li t1, 1 + beq t0, t1, else_2 li a0, 1 j end_2 else_2: @@ -520,7 +579,8 @@ let%expect_test "test1" = end_2: sd a0, -24(fp) ld t0, -24(fp) - beq t0, zero, else_3 + li t1, 1 + beq t0, t1, else_3 li a0, 1 j end_3 else_3: @@ -556,38 +616,45 @@ let%expect_test "codegen closure fn with 10 arg" = .globl add .type add, @function add: + mv t1, fp addi sp, sp, -56 - sd ra, 48(sp) - sd fp, 40(sp) addi fp, sp, 40 - mv t0, a0 - mv t1, a1 - mv a7, a0 - add a0, t0, t1 - addi a0, a0, -1 + sd ra, 8(fp) + sd t1, 0(fp) sd a0, -8(fp) + sd a1, -16(fp) + sd a2, -24(fp) + sd a3, -32(fp) + sd a4, -40(fp) + sd a5, -48(fp) + sd a6, -56(fp) ld t0, -8(fp) - mv t1, a2 + ld t1, -16(fp) add a0, t0, t1 addi a0, a0, -1 - sd a0, -16(fp) - ld t0, -16(fp) - mv t1, a3 + sd a0, -64(fp) + ld t0, -64(fp) + ld t1, -24(fp) add a0, t0, t1 addi a0, a0, -1 - sd a0, -24(fp) - ld t0, -24(fp) - mv t1, a4 + sd a0, -72(fp) + ld t0, -72(fp) + ld t1, -32(fp) + add a0, t0, t1 + addi a0, a0, -1 + sd a0, -80(fp) + ld t0, -80(fp) + ld t1, -40(fp) add a0, t0, t1 addi a0, a0, -1 - sd a0, -32(fp) - ld t0, -32(fp) - mv t1, a5 + sd a0, -88(fp) + ld t0, -88(fp) + ld t1, -48(fp) add a0, t0, t1 addi a0, a0, -1 - sd a0, -40(fp) - ld t0, -40(fp) - mv t1, a6 + sd a0, -96(fp) + ld t0, -96(fp) + ld t1, -56(fp) add a0, t0, t1 addi a0, a0, -1 addi sp, fp, 16 @@ -598,10 +665,11 @@ let%expect_test "codegen closure fn with 10 arg" = .globl main .type main, @function main: - addi sp, sp, -40 - sd ra, 32(sp) - sd fp, 24(sp) - addi fp, sp, 24 + mv t1, fp + addi sp, sp, -816 + addi fp, sp, 800 + sd ra, 8(fp) + sd t1, 0(fp) la a0, add li a1, 7 call alloc_closure From 46e7405bdba97d27d47bdfd04383c86420db49ba Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 01:32:10 +0300 Subject: [PATCH 36/74] add tuples --- EML/lib/backend/ricsv/config.ml | 2 + EML/lib/backend/ricsv/generator.ml | 50 +++++++++++++++++++++++- EML/lib/runtime/runtime.c | 21 ++++++++++ EML/tests/dune | 9 +++++ EML/tests/tuple_tests.t | 62 ++++++++++++++++++++++++++++++ EML/tests/tuple_tests/01adder.ml | 10 +++++ EML/tests/tuple_tests/02nested.ml | 10 +++++ EML/tests/tuple_tests/03args.ml | 11 ++++++ 8 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 EML/tests/tuple_tests.t create mode 100644 EML/tests/tuple_tests/01adder.ml create mode 100644 EML/tests/tuple_tests/02nested.ml create mode 100644 EML/tests/tuple_tests/03args.ml diff --git a/EML/lib/backend/ricsv/config.ml b/EML/lib/backend/ricsv/config.ml index 4c7fb888..9ae4229d 100644 --- a/EML/lib/backend/ricsv/config.ml +++ b/EML/lib/backend/ricsv/config.ml @@ -13,6 +13,8 @@ let primitive_arities ~enable_gc : primitive list = let base = [ { name = "print_int"; arity = 1 } ; { name = "print_endline"; arity = 1 } + ; { name = "create_tuple"; arity = 2 } + ; { name = "field"; arity = 2 } ; { name = "alloc_closure"; arity = 2 } ; { name = "eml_applyN"; arity = 3 } ] diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index ce6dd5d9..7f0598f7 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -297,6 +297,23 @@ and gen_branch dst cond then_e else_e = let* () = gen_anf dst else_e in append (label end_lbl) +and spill_tuple_var_if_in_reg = function + | ImmediateVar name -> + let* env = get_env in + (match Base.Map.find env name with + | Some (Loc_reg _) -> + let* loc = store_reg_into_frame result_reg in + modify_env (fun env -> Base.Map.set env ~key:name ~data:loc) + | _ -> return ()) + | _ -> return () + +and gen_field dst tuple_imm idx = + let* () = gen_imm result_reg tuple_imm in + let* () = spill_tuple_var_if_in_reg tuple_imm in + let* () = append (li (List.nth arg_regs 1) (tag_int idx)) in + let* () = append (call "field") in + copy_result_to dst + and gen_list dst = function | [] -> append (li dst (tag_int 0)) | hd :: tl -> @@ -307,6 +324,33 @@ and gen_list dst = function let* () = append (li result_reg 2) in let* () = load_into_reg (List.nth arg_regs 1) (Loc_reg t0) in let* () = load_into_reg (List.nth arg_regs 2) tail_loc in + let* () = append (call "create_tuple") in + copy_result_to dst + +and gen_tuple dst e1 e2 rest = + let elts = e1 :: e2 :: rest in + let argc = List.length elts in + let* () = spill_caller_saved_vars_to_frame in + let* state = get in + let* spilled = spill_dangerous_args state elts in + let array_bytes = argc * word_size in + let* () = append (addi sp sp (-array_bytes)) in + let* () = + Base.List.foldi elts ~init:(return ()) ~f:(fun i acc elt -> + let* () = acc in + let offset = i * word_size in + let src = + match Base.Map.find spilled i with + | Some loc -> load_into_reg t0 loc + | None -> gen_imm t0 elt + in + let* () = src in + append (sd t0 (sp, offset))) + in + let* () = append (li result_reg argc) in + let* () = append (addi (List.nth arg_regs 1) sp 0) in + let* () = append (call "create_tuple") in + let* () = append (addi sp sp array_bytes) in copy_result_to dst and gen_app dst fname first rest = gen_invocation dst fname (first :: rest) @@ -318,10 +362,12 @@ and gen_cexpr dst = function | ComplexUnarOper (Not, op) -> gen_not dst op | ComplexBinOper (op, left, right) -> gen_binop dst op left right | ComplexBranch (cond, then_e, else_e) -> gen_branch dst cond then_e else_e + | ComplexField (tuple_imm, idx) -> gen_field dst tuple_imm idx + | ComplexTuple (e1, e2, rest) -> gen_tuple dst e1 e2 rest | ComplexApp (ImmediateVar name, first, rest) -> gen_app dst name first rest | ComplexApp (_, _, _) -> fail "ComplexApp: function must be a variable" - | ComplexLambda _ | ComplexList _ | ComplexOption _ | ComplexField _ | ComplexTuple _ -> - fail "gen_cexpr: Lambda/List/Option/Tuple not implemented" + | ComplexLambda _ | ComplexOption _ -> fail "gen_cexpr: Lambda/Option not implemented" + | ComplexList imm_list -> gen_list dst imm_list and gen_anf dst = function | AnfExpr cexp -> gen_cexpr dst cexp diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/runtime.c index 536191d5..6aca48fb 100644 --- a/EML/lib/runtime/runtime.c +++ b/EML/lib/runtime/runtime.c @@ -10,6 +10,7 @@ void print_int(long n) { printf("%ld", TO_ML_INTEGER(n)); } +#define TAG_TUPLE 246 #define TAG_CLOSURE 247 #define RISCV_REG_ARGS 8 #define SIZE_HEAP 800 @@ -55,6 +56,9 @@ static inline bool in_bank(uint64_t *ptr, int bank_idx) { } static size_t scan_start_for_tag(uint16_t tag) { + if (tag == TAG_TUPLE) { + return 1; + } if (tag == TAG_CLOSURE) { return 3; } @@ -344,3 +348,20 @@ void *eml_applyN(closure *c, int64_t argc, void **argv) { return partial; } + +typedef struct { + int64_t arity; + void *args[]; +} tuple; + +tuple *create_tuple(int64_t argc, void **args) { + size_t words = 1 + (size_t)argc; + tuple *t = (tuple *)eml_alloc(words * sizeof(uint64_t), TAG_TUPLE); + t->arity = argc; + for (size_t i = 0; i < (size_t)argc; i++) { + t->args[i] = args[i]; + } + return t; +} + +void *field(tuple *t, long n) { return t->args[n >> 1]; } diff --git a/EML/tests/dune b/EML/tests/dune index 98401366..53dc8aff 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -25,3 +25,12 @@ (file ../lib/runtime/rv64_runtime.a) (source_tree gc_tests) (source_tree many_tests))) + +(cram + (applies_to tuple_tests) + (deps + (file ../bin/EML.exe) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree tuple_tests) + (source_tree many_tests))) diff --git a/EML/tests/tuple_tests.t b/EML/tests/tuple_tests.t new file mode 100644 index 00000000..40382666 --- /dev/null +++ b/EML/tests/tuple_tests.t @@ -0,0 +1,62 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + $ make compile GC=1 tuple_tests/01adder.ml + === GC Status === + Current allocated: 13 + Total allocated: 13 + Free space: 787 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 + ================= + 42=== GC Status === + Current allocated: 13 + Total allocated: 28 + Free space: 787 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 4 + ================= + + $ make compile GC=1 tuple_tests/02nested.ml + === GC Status === + Current allocated: 23 + Total allocated: 23 + Free space: 777 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 + ================= + 90=== GC Status === + Current allocated: 23 + Total allocated: 48 + Free space: 777 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 5 + ================= + + $ make compile GC=1 tuple_tests/03args.ml + === GC Status === + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 + ================= + 1053=== GC Status === + Current allocated: 28 + Total allocated: 58 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 5 + ================= diff --git a/EML/tests/tuple_tests/01adder.ml b/EML/tests/tuple_tests/01adder.ml new file mode 100644 index 00000000..dd9ad6c3 --- /dev/null +++ b/EML/tests/tuple_tests/01adder.ml @@ -0,0 +1,10 @@ +let inc x = x + 1 + +let main = + let t = (41, (2, inc)) in + let n, (_, f) = t in + let _ = print_gc_status () in + let _ = print_int (f n) in + let _ = collect () in + print_gc_status () +;; diff --git a/EML/tests/tuple_tests/02nested.ml b/EML/tests/tuple_tests/02nested.ml new file mode 100644 index 00000000..14d8744b --- /dev/null +++ b/EML/tests/tuple_tests/02nested.ml @@ -0,0 +1,10 @@ +let mul2 x = x * 2 + +let main = + let t = (1, 2, 3, 4, (5, (6, mul2)), 7, 8, 9) in + let a, b, c, d, (e, (f, g)), h, i, j = t in + let _ = print_gc_status () in + let _ = print_int (g (a + b + c + d + e + f + h + i + j)) in + let _ = collect () in + print_gc_status () +;; diff --git a/EML/tests/tuple_tests/03args.ml b/EML/tests/tuple_tests/03args.ml new file mode 100644 index 00000000..ce101ede --- /dev/null +++ b/EML/tests/tuple_tests/03args.ml @@ -0,0 +1,11 @@ +let add3 x = x + 3 + +let main = + let pack = (10, 20, 30, 40, 50, (60, (70, add3)), 80, 90, 100, 110, 120, 130, 140) in + let a, b, c, d, e, (f, (g, h)), i, j, k, l, m, n, o = pack in + let base = a + b + c + d + e + f + g + i + j + k + l + m + n + o in + let _ = print_gc_status () in + let _ = print_int (h base) in + let _ = collect () in + print_gc_status () +;; From d08b2e73de9d669f57474f85674066ad10388bc4 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 12:19:53 +0300 Subject: [PATCH 37/74] fix tests + dune fmt Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 4 +- EML/bin/main.ml | 55 ------------------- EML/lib/backend/ricsv/architecture.ml | 13 +---- EML/lib/backend/ricsv/config.ml | 31 ----------- EML/lib/backend/ricsv/generator.ml | 6 +- EML/lib/frontend/binutils.ml | 28 +++++++--- EML/lib/frontend/binutils.mli | 4 +- EML/lib/middleend/cc.ml | 12 ++-- EML/lib/middleend/ll.ml | 1 + EML/lib/runtime/dune | 10 +++- EML/tests/Makefile | 3 +- .../closure/02_affine_live_dead_gc.ml | 2 +- EML/tests/riscv_tests.ml | 1 - EML/tests/tuple_tests/01adder.ml | 2 +- EML/tests/tuple_tests/02nested.ml | 2 +- EML/tests/tuple_tests/03args.ml | 2 +- 16 files changed, 51 insertions(+), 125 deletions(-) delete mode 100644 EML/bin/main.ml delete mode 100644 EML/lib/backend/ricsv/config.ml diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 5e8bfaf5..0d4e2a8b 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -64,7 +64,9 @@ let run_compile ~enable_gc text env oc : (env, unit) Result.t = let compiler opts : (unit, unit) Result.t = let run text env oc = run_compile ~enable_gc:opts.enable_gc text env oc in let env0 = - if opts.enable_gc then Inferencer.TypeEnv.env_with_gc else Inferencer.TypeEnv.initial_env + if opts.enable_gc + then Inferencer.TypeEnv.env_with_gc + else Inferencer.TypeEnv.initial_env in let with_output f = match opts.output_file with diff --git a/EML/bin/main.ml b/EML/bin/main.ml deleted file mode 100644 index 8800c642..00000000 --- a/EML/bin/main.ml +++ /dev/null @@ -1,55 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -open EML_lib.Frontend.Parser -open EML_lib.Middleend.Anf -open EML_lib.Backend.Ricsv.Runner - -let compile_to_asm ~enable_gc src out_ppf = - match parse src with - | Error e -> - Printf.eprintf "Parse error: %s\n%!" e; - exit 1 - | Ok ast -> - (match anf_program ast with - | Error e -> - Printf.eprintf "ANF error: %s\n%!" e; - exit 1 - | Ok anf -> gen_program ~enable_gc out_ppf anf) -;; - -let () = - let args = Sys.argv in - let nargs = Array.length args in - (* Parse command line: [EML.exe] [-fromfile ] [-o ] *) - let src_file = ref None in - let out_file = ref None in - let enable_gc = ref false in - let i = ref 1 in - while !i < nargs do - (match args.(!i) with - | "-fromfile" -> - incr i; - if !i < nargs then src_file := Some args.(!i) - | "-o" -> - incr i; - if !i < nargs then out_file := Some args.(!i) - | "-gc" -> enable_gc := true - | _ -> ()); - incr i - done; - let src = - match !src_file with - | Some path -> In_channel.(with_open_text path input_all) - | None -> In_channel.input_all stdin - in - match !out_file with - | None -> compile_to_asm ~enable_gc:!enable_gc src Format.std_formatter - | Some path -> - let oc = open_out path in - let ppf = Format.formatter_of_out_channel oc in - compile_to_asm ~enable_gc:!enable_gc src ppf; - Format.pp_print_flush ppf (); - close_out oc -;; diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index 9587ebf8..35fb62c5 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -127,7 +127,6 @@ module Riscv_backend = struct let saved_ra_offset = word_size let riscv_imm12_min = -2048 let riscv_imm12_max = 2047 - let fits_imm12 n = n >= riscv_imm12_min && n <= riscv_imm12_max type location = @@ -146,14 +145,7 @@ module Riscv_backend = struct let ofs = stack_size - frame_header_size in if fits_imm12 ofs then addi fp sp ofs else li t0 ofs @ add fp sp t0 in - let base = - label name - @ mv t1 fp - @ dec_sp - @ set_fp - @ sd ra ra_slot - @ sd t1 fp_slot - in + let base = label name @ mv t1 fp @ dec_sp @ set_fp @ sd ra ra_slot @ sd t1 fp_slot in if enable_gc && String.equal name "main" then base @ call "init_gc" @ mv a0 fp @ call "set_ptr_stack" else base @@ -162,8 +154,7 @@ module Riscv_backend = struct let epilogue ~enable_gc ~is_main = let base = (if enable_gc && is_main then call "destroy_gc" else []) - @ - addi sp fp frame_header_size + @ addi sp fp frame_header_size @ ld ra (fp, saved_ra_offset) @ ld fp (fp, saved_fp_offset) in diff --git a/EML/lib/backend/ricsv/config.ml b/EML/lib/backend/ricsv/config.ml deleted file mode 100644 index 9ae4229d..00000000 --- a/EML/lib/backend/ricsv/config.ml +++ /dev/null @@ -1,31 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -(** Primitives the generated code can call. *) - -type primitive = - { name : string - ; arity : int - } - -let primitive_arities ~enable_gc : primitive list = - let base = - [ { name = "print_int"; arity = 1 } - ; { name = "print_endline"; arity = 1 } - ; { name = "create_tuple"; arity = 2 } - ; { name = "field"; arity = 2 } - ; { name = "alloc_closure"; arity = 2 } - ; { name = "eml_applyN"; arity = 3 } - ] - in - if enable_gc - then - base - @ [ { name = "get_heap_start"; arity = 0 } - ; { name = "get_heap_final"; arity = 0 } - ; { name = "collect"; arity = 0 } - ; { name = "print_gc_status"; arity = 0 } - ] - else base -;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 7f0598f7..3d47976f 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -351,7 +351,7 @@ and gen_tuple dst e1 e2 rest = let* () = append (addi (List.nth arg_regs 1) sp 0) in let* () = append (call "create_tuple") in let* () = append (addi sp sp array_bytes) in - copy_result_to dst + copy_result_to dst and gen_app dst fname first rest = gen_invocation dst fname (first :: rest) @@ -436,10 +436,10 @@ let gen_func ~enable_gc func_name params body frame_sz ppf = let gen_program ?(enable_gc = false) ppf (analysis : analysis_result) = fprintf ppf ".section .text"; - let base = Config.primitive_arities ~enable_gc in + let base = Frontend.Binutils.primitive_arities ~enable_gc in let arity_map = List.fold_left - (fun map { Config.name; arity } -> + (fun map { Frontend.Binutils.name; arity } -> Base.Map.set map ~key:name ~data:arity) analysis.arity_map base diff --git a/EML/lib/frontend/binutils.ml b/EML/lib/frontend/binutils.ml index 69d44eb5..712d5ffd 100644 --- a/EML/lib/frontend/binutils.ml +++ b/EML/lib/frontend/binutils.ml @@ -2,18 +2,32 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -(** Built-in names and arities. Single place to change stdlib/runtime for - closure conversion, backend, etc. *) +(** Primitives the generated code can call. *) type primitive = { name : string ; arity : int } -let all_runtime_prims : primitive list = - [ { name = "print_int"; arity = 1 }; { name = "print_endline"; arity = 1 } ] +let primitive_arities ~enable_gc : primitive list = + let base = + [ { name = "print_int"; arity = 1 } + ; { name = "print_endline"; arity = 1 } + ; { name = "create_tuple"; arity = 2 } + ; { name = "field"; arity = 2 } + ; { name = "alloc_closure"; arity = 2 } + ; { name = "eml_applyN"; arity = 3 } + ] + in + if enable_gc + then + base + @ [ { name = "get_heap_start"; arity = 0 } + ; { name = "get_heap_final"; arity = 0 } + ; { name = "collect"; arity = 0 } + ; { name = "print_gc_status"; arity = 0 } + ] + else base ;; -let builtin_global_names = - List.map (fun p -> p.name) all_runtime_prims @ Ast.unary_op_list @ Ast.bin_op_list -;; +let primitive_names ~enable_gc = List.map (fun p -> p.name) (primitive_arities ~enable_gc) diff --git a/EML/lib/frontend/binutils.mli b/EML/lib/frontend/binutils.mli index 62ba547c..9ce10f2e 100644 --- a/EML/lib/frontend/binutils.mli +++ b/EML/lib/frontend/binutils.mli @@ -7,5 +7,5 @@ type primitive = ; arity : int } -val all_runtime_prims : primitive list -val builtin_global_names : string list +val primitive_arities : enable_gc:bool -> primitive list +val primitive_names : enable_gc:bool -> string list diff --git a/EML/lib/middleend/cc.ml b/EML/lib/middleend/cc.ml index 3fd9ed79..100eb96e 100644 --- a/EML/lib/middleend/cc.ml +++ b/EML/lib/middleend/cc.ml @@ -4,7 +4,6 @@ open Format open Frontend.Ast -open Frontend.Binutils module VarSet = Set.Make (String) module EnvMap = Map.Make (String) @@ -22,8 +21,7 @@ let vars_in_pattern p = | PatConstruct (_, None) -> VarSet.empty | PatConstruct (_, Some q) -> walk q | PatType (q, _) -> walk q - | PatTuple (p1, p2, rest) -> - union_map_list walk (p1 :: p2 :: rest) + | PatTuple (p1, p2, rest) -> union_map_list walk (p1 :: p2 :: rest) | PatUnit | PatList _ | PatOption _ -> VarSet.empty in walk p @@ -69,8 +67,7 @@ let rec collect_free_vars = function | ExpTypeAnnotation (e, _) -> collect_free_vars e | ExpBinOper (_, e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) | ExpUnarOper (_, e) -> collect_free_vars e - | ExpTuple (e1, e2, rest) -> - union_map_list collect_free_vars (e1 :: e2 :: rest) + | ExpTuple (e1, e2, rest) -> union_map_list collect_free_vars (e1 :: e2 :: rest) | ExpList es -> union_map_list collect_free_vars es | ExpOption e_opt -> (match e_opt with @@ -119,8 +116,7 @@ let extend_capture_env env pat captured_set = | PatAny | PatConst _ | PatConstruct (_, None) -> acc | PatVariable name -> EnvMap.add name captured_set acc | PatConstruct (_, Some p) | PatType (p, _) -> add_captures_for_pat acc p - | PatTuple (p1, p2, rest) -> - List.fold_left add_captures_for_pat acc (p1 :: p2 :: rest) + | PatTuple (p1, p2, rest) -> List.fold_left add_captures_for_pat acc (p1 :: p2 :: rest) | PatUnit | PatList _ | PatOption _ -> acc in add_captures_for_pat env pat @@ -331,7 +327,7 @@ let convert_item = function , SValue (rec_flag, (pat', expr'), rest_binds) ) ;; -let builtin_globals = var_set_of_list builtin_global_names +let builtin_globals = var_set_of_list (Frontend.Binutils.primitive_names ~enable_gc:true) let initial_context = { globals = builtin_globals; env = EnvMap.empty } let closure_conversion_result (program : Frontend.Ast.program) diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml index 5595250f..602b9596 100644 --- a/EML/lib/middleend/ll.ml +++ b/EML/lib/middleend/ll.ml @@ -358,6 +358,7 @@ module Make (N : NAMING) = struct | ExpOption (Some e) -> let* res = lift_expr (inner ctx) e in return { structures = res.structures; expr = ExpOption (Some res.expr) } + and lift_binds (ctx : context) binds : (structure list * (pattern * expr) list) t = fold_binds ctx binds (fun ctx _ e -> lift_expr ctx e) diff --git a/EML/lib/runtime/dune b/EML/lib/runtime/dune index aba9047d..560b79ce 100644 --- a/EML/lib/runtime/dune +++ b/EML/lib/runtime/dune @@ -3,5 +3,13 @@ (deps runtime.c) (action (progn - (run riscv64-linux-gnu-gcc -march=rv64gc -mabi=lp64d -O2 -c %{dep:runtime.c} -o rv64_runtime.o) + (run + riscv64-linux-gnu-gcc + -march=rv64gc + -mabi=lp64d + -O2 + -c + %{dep:runtime.c} + -o + rv64_runtime.o) (run riscv64-linux-gnu-ar rcs %{targets} rv64_runtime.o)))) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 511960a4..035c7d65 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -2,7 +2,8 @@ SHELL := /bin/bash RUNTIME_A_SANDBOX := ../lib/runtime/rv64_runtime.a RUNTIME_A_DEFAULT := ../_build/default/lib/runtime/rv64_runtime.a -EML_BIN ?= /home/danil/comp25/EML/_build/default/bin/EML.exe +EML_ROOT ?= $(CURDIR)/.. +EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) ARGS := $(filter-out compile,$(MAKECMDGOALS)) USE_GC := $(if $(filter 1 true yes on,$(GC)),1,0) diff --git a/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml b/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml index 91ac6703..67ac8224 100644 --- a/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml +++ b/EML/tests/gc_tests/closure/02_affine_live_dead_gc.ml @@ -1,4 +1,4 @@ -let affine a b x = a * x + b +let affine a b x = (a * x) + b let main = let live = affine 2 7 in diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml index ba67ec93..83b585f6 100644 --- a/EML/tests/riscv_tests.ml +++ b/EML/tests/riscv_tests.ml @@ -597,7 +597,6 @@ let%expect_test "test1" = |}] ;; - let%expect_test "codegen closure fn with 10 arg" = run {| diff --git a/EML/tests/tuple_tests/01adder.ml b/EML/tests/tuple_tests/01adder.ml index dd9ad6c3..d0d5a21e 100644 --- a/EML/tests/tuple_tests/01adder.ml +++ b/EML/tests/tuple_tests/01adder.ml @@ -1,7 +1,7 @@ let inc x = x + 1 let main = - let t = (41, (2, inc)) in + let t = 41, (2, inc) in let n, (_, f) = t in let _ = print_gc_status () in let _ = print_int (f n) in diff --git a/EML/tests/tuple_tests/02nested.ml b/EML/tests/tuple_tests/02nested.ml index 14d8744b..b1f53ddd 100644 --- a/EML/tests/tuple_tests/02nested.ml +++ b/EML/tests/tuple_tests/02nested.ml @@ -1,7 +1,7 @@ let mul2 x = x * 2 let main = - let t = (1, 2, 3, 4, (5, (6, mul2)), 7, 8, 9) in + let t = 1, 2, 3, 4, (5, (6, mul2)), 7, 8, 9 in let a, b, c, d, (e, (f, g)), h, i, j = t in let _ = print_gc_status () in let _ = print_int (g (a + b + c + d + e + f + h + i + j)) in diff --git a/EML/tests/tuple_tests/03args.ml b/EML/tests/tuple_tests/03args.ml index ce101ede..7d83da83 100644 --- a/EML/tests/tuple_tests/03args.ml +++ b/EML/tests/tuple_tests/03args.ml @@ -1,7 +1,7 @@ let add3 x = x + 3 let main = - let pack = (10, 20, 30, 40, 50, (60, (70, add3)), 80, 90, 100, 110, 120, 130, 140) in + let pack = 10, 20, 30, 40, 50, (60, (70, add3)), 80, 90, 100, 110, 120, 130, 140 in let a, b, c, d, e, (f, (g, h)), i, j, k, l, m, n, o = pack in let base = a + b + c + d + e + f + g + i + j + k + l + m + n + o in let _ = print_gc_status () in From 220ce5dab7a6d06784b66cabae06694f28d6fe7b Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:16:11 +0300 Subject: [PATCH 38/74] add llvm Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/llvm/analysis.ml | 142 +++++ EML/lib/backend/llvm/architecture.ml | 101 +++ EML/lib/backend/llvm/generator.ml | 816 ++++++++++++++++++++++++ EML/lib/backend/llvm/generator_state.ml | 125 ++++ EML/lib/backend/llvm/runner.ml | 19 + EML/lib/backend/llvm/runner.mli | 9 + EML/lib/frontend/binutils.ml | 33 - EML/lib/frontend/binutils.mli | 11 - EML/lib/runtime/primitives.ml | 43 ++ 9 files changed, 1255 insertions(+), 44 deletions(-) create mode 100644 EML/lib/backend/llvm/analysis.ml create mode 100644 EML/lib/backend/llvm/architecture.ml create mode 100644 EML/lib/backend/llvm/generator.ml create mode 100644 EML/lib/backend/llvm/generator_state.ml create mode 100644 EML/lib/backend/llvm/runner.ml create mode 100644 EML/lib/backend/llvm/runner.mli delete mode 100644 EML/lib/frontend/binutils.ml delete mode 100644 EML/lib/frontend/binutils.mli create mode 100644 EML/lib/runtime/primitives.ml diff --git a/EML/lib/backend/llvm/analysis.ml b/EML/lib/backend/llvm/analysis.ml new file mode 100644 index 00000000..b15a428b --- /dev/null +++ b/EML/lib/backend/llvm/analysis.ml @@ -0,0 +1,142 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast +open Middleend.Anf + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; slots_count : int + } + +type analysis_result = + { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +let rec slots_in_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and slots_in_cexpr = function + | ComplexImmediate imm -> slots_in_imm imm + | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> slots_in_imm left + slots_in_imm right + | ComplexUnarOper (_, imm) -> slots_in_imm imm + | ComplexTuple (first, second, rest) -> + List.fold_left + (fun slot_count expr -> slot_count + slots_in_imm expr) + 0 + (first :: second :: rest) + | ComplexField (imm, _) -> slots_in_imm imm + | ComplexList imm_list -> + List.fold_left (fun slot_count expr -> slot_count + slots_in_imm expr) 0 imm_list + | ComplexApp (_, second, rest) -> + 1 + + List.fold_left + (fun slot_count expr -> slot_count + slots_in_imm expr) + 0 + (second :: rest) + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> slots_in_imm imm + | ComplexLambda (_, body) -> slots_in_anf body + | ComplexBranch (cond, then_expr, else_expr) -> + slots_in_imm cond + slots_in_anf then_expr + slots_in_anf else_expr + +and slots_in_anf = function + | AnfExpr cexp -> slots_in_cexpr cexp + | AnfLet (_, _, cexp, cont) -> 1 + slots_in_cexpr cexp + slots_in_anf cont +;; + +let rec params_of_anf = function + | AnfExpr (ComplexLambda (pats, body)) -> + let imms = + List.filter_map + (function + | PatVariable id -> Some (ImmediateVar id) + | _ -> None) + pats + in + let rest, inner = params_of_anf body in + imms @ rest, inner + | other -> [], other +;; + +let arity_map_of_program (program : anf_program) = + List.fold_left + (fun map -> function + | AnfValue (_, (fid, arity, _), and_binds) -> + let map = Base.Map.set map ~key:fid ~data:arity in + List.fold_left + (fun acc (id, arity, _) -> Base.Map.set acc ~key:id ~data:arity) + map + and_binds + | _ -> map) + (Base.Map.empty (module Base.String)) + program +;; + +let analyze (program : anf_program) = + let arity_map = arity_map_of_program program in + let raw = + List.filter_map + (function + | AnfValue (_, (func_name, arity, body), _) -> + let params, body = params_of_anf body in + Some (func_name, arity, params, body, slots_in_anf body) + | AnfEval _ -> None) + program + in + let counts = ref (Base.Map.empty (module Base.String)) in + let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in + let asm_name name = + let base = mangle_reserved name in + let n = Base.Map.find !counts name |> Option.value ~default:0 in + counts := Base.Map.set !counts ~key:name ~data:(n + 1); + if n = 0 then base else base ^ "_" ^ Int.to_string n + in + let functions = + List.map + (fun (func_name, _arity, params, body, slots_count) -> + { func_name; asm_name = asm_name func_name; params; body; slots_count }) + raw + in + let has_main = + List.exists (fun func_layout -> String.equal func_layout.func_name "main") functions + in + let functions = + if has_main + then functions + else ( + let synthetic_main = + { func_name = "main" + ; asm_name = "main" + ; params = [] + ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) + ; slots_count = 0 + } + in + functions @ [ synthetic_main ]) + in + let arity_map = + if has_main then arity_map else Base.Map.set arity_map ~key:"main" ~data:0 + in + let resolver func_index var_name = + let rec find i = + if i < 0 + then None + else ( + match Base.List.nth functions i with + | None -> None + | Some func_layout when String.equal func_layout.func_name var_name -> + Some (func_layout.asm_name, List.length func_layout.params) + | Some _ -> find (i - 1)) + in + find (func_index - 1) + in + { arity_map; functions; resolve = resolver } +;; diff --git a/EML/lib/backend/llvm/architecture.ml b/EML/lib/backend/llvm/architecture.ml new file mode 100644 index 00000000..aab42361 --- /dev/null +++ b/EML/lib/backend/llvm/architecture.ml @@ -0,0 +1,101 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module Llvm_backend = struct + type instr = + | Add of llvalue * llvalue * string + | Sub of llvalue * llvalue * string + | Mul of llvalue * llvalue * string + | Sdiv of llvalue * llvalue * string + | Neg of llvalue * string + | Icmp of Icmp.t * llvalue * llvalue * string + | And of llvalue * llvalue * string + | Or of llvalue * llvalue * string + | Not of llvalue * string + | Load of lltype * llvalue * string + | Store of llvalue * llvalue + | Alloca of lltype * string + | Call of lltype * llvalue * llvalue array * string + | Ret of llvalue option + | Br of llbasicblock + | CondBr of llvalue * llbasicblock * llbasicblock + | Phi of (llvalue * llbasicblock) list * string + | Bitcast of llvalue * lltype * string + | PtrToInt of llvalue * lltype * string + | IntToPtr of llvalue * lltype * string + + let emit builder = function + | Add (left, right, name) -> Some (build_add left right name builder) + | Sub (left, right, name) -> Some (build_sub left right name builder) + | Mul (left, right, name) -> Some (build_mul left right name builder) + | Sdiv (left, right, name) -> Some (build_sdiv left right name builder) + | Neg (operand, name) -> Some (build_neg operand name builder) + | Icmp (cond, left, right, name) -> Some (build_icmp cond left right name builder) + | And (left, right, name) -> Some (build_and left right name builder) + | Or (left, right, name) -> Some (build_or left right name builder) + | Not (operand, name) -> Some (build_not operand name builder) + | Load (load_ty, ptr_value, name) -> Some (build_load load_ty ptr_value name builder) + | Store (value, ptr_value) -> + ignore (build_store value ptr_value builder); + None + | Alloca (alloca_ty, name) -> Some (build_alloca alloca_ty name builder) + | Call (ft, callee, args, name) -> Some (build_call ft callee args name builder) + | Ret None -> + ignore (build_ret_void builder); + None + | Ret (Some ret_value) -> + ignore (build_ret ret_value builder); + None + | Br block -> + ignore (build_br block builder); + None + | CondBr (cond, then_bb, else_bb) -> + ignore (build_cond_br cond then_bb else_bb builder); + None + | Phi (incoming, name) -> Some (build_phi incoming name builder) + | Bitcast (operand, dest_ty, name) -> + Some (build_bitcast operand dest_ty name builder) + | PtrToInt (operand, dest_ty, name) -> + Some (build_ptrtoint operand dest_ty name builder) + | IntToPtr (operand, dest_ty, name) -> + Some (build_inttoptr operand dest_ty name builder) + ;; + + let add builder left right name = emit builder (Add (left, right, name)) + let sub builder left right name = emit builder (Sub (left, right, name)) + let mul builder left right name = emit builder (Mul (left, right, name)) + let sdiv builder left right name = emit builder (Sdiv (left, right, name)) + let neg builder operand name = emit builder (Neg (operand, name)) + let icmp builder cond left right name = emit builder (Icmp (cond, left, right, name)) + let and_ builder left right name = emit builder (And (left, right, name)) + let or_ builder left right name = emit builder (Or (left, right, name)) + let not builder operand name = emit builder (Not (operand, name)) + let load builder load_ty ptr_value name = emit builder (Load (load_ty, ptr_value, name)) + let alloca builder alloca_ty name = emit builder (Alloca (alloca_ty, name)) + let call builder ft callee args name = emit builder (Call (ft, callee, args, name)) + let phi builder incoming name = emit builder (Phi (incoming, name)) + + let bitcast builder operand dest_ty name = + emit builder (Bitcast (operand, dest_ty, name)) + ;; + + let ptrtoint builder operand dest_ty name = + emit builder (PtrToInt (operand, dest_ty, name)) + ;; + + let inttoptr builder operand dest_ty name = + emit builder (IntToPtr (operand, dest_ty, name)) + ;; + + let store builder value ptr_value = ignore (emit builder (Store (value, ptr_value))) + let ret_void builder = ignore (emit builder (Ret None)) + let ret builder ret_value = ignore (emit builder (Ret (Some ret_value))) + let br builder block = ignore (emit builder (Br block)) + + let cond_br builder cond then_bb else_bb = + ignore (emit builder (CondBr (cond, then_bb, else_bb))) + ;; +end diff --git a/EML/lib/backend/llvm/generator.ml b/EML/lib/backend/llvm/generator.ml new file mode 100644 index 00000000..15c86760 --- /dev/null +++ b/EML/lib/backend/llvm/generator.ml @@ -0,0 +1,816 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm +open Runtime.Primitives +open Architecture +open Llvm_backend +open Analysis +open Frontend.Ast +open Middleend.Anf +open Generator_state + +let tag_int n = 1 + (n lsl 1) +let tag_bool b = if b then 4 else 2 +let tag_char c = tag_int (Char.code c) +let context = global_context () +let my_module = ref (create_module context "EML") +let current_module () = !my_module +let builder = builder context +let int_t = i64_type context +let i32_t = i32_type context +let void_t = void_type context +let ptr_t = pointer_type context + +let lltype_of_arg = function + | Ptr -> ptr_t + | Int -> int_t + | I32 -> i32_t +;; + +let lltype_of_ret = function + | RPtr -> ptr_t + | RInt -> int_t + | RVoid -> void_t +;; + +let predefined_funcs = + List.map + (fun { name; ret; args } -> + let ret_t = lltype_of_ret ret in + let arg_t = Array.of_list (List.map lltype_of_arg args) in + name, function_type ret_t arg_t) + predefined_runtime_funcs +;; + +let variable_value_table : (string, llvalue) Hashtbl.t = + Hashtbl.create (List.length predefined_funcs * 4) +;; + +let variable_type_table : (string, lltype) Hashtbl.t = + Hashtbl.create (List.length predefined_funcs * 4) +;; + +let reset_for_new_program () = + my_module := create_module context "EML"; + Hashtbl.clear variable_value_table; + Hashtbl.clear variable_type_table +;; + +let predefined_init () = + let module_ = current_module () in + List.iter + (fun (str, t) -> + let func = declare_function str t module_ in + Hashtbl.add variable_type_table str t; + Hashtbl.add variable_value_table str func) + predefined_funcs +;; + +let snapshot_envs () = + let value_env = + Hashtbl.fold + (fun key value acc -> Base.Map.set acc ~key ~data:value) + variable_value_table + (Base.Map.empty (module Base.String)) + in + let type_env = + Hashtbl.fold + (fun key value acc -> Base.Map.set acc ~key ~data:value) + variable_type_table + (Base.Map.empty (module Base.String)) + in + value_env, type_env +;; + +let emit_value builder instr = + match emit builder instr with + | Some v -> Ok v + | None -> Error "emit_value: expected value" +;; + +let emit_void builder instr : (unit, string) Result.t = + match emit builder instr with + | _ -> Ok () +;; + +let emit_void_st builder instr = + match emit_void builder instr with + | Ok () -> return () + | Error e -> fail e +;; + +let with_optional_value opt = + match opt with + | Some v -> return v + | None -> fail "Llvm_backend: expected value" +;; + +let lookup_func name = + let* value_opt = find_value_opt name in + match value_opt with + | Some func -> return func + | None -> + (match lookup_function name (current_module ()) with + | Some func -> return func + | None -> fail ("Couldn't find value for key: " ^ name)) +;; + +let lookup_type name = + let* type_opt = find_type_opt name in + match type_opt with + | Some ty -> return ty + | None -> fail ("Couldn't find type for key: " ^ name) +;; + +let lookup_func_type name = + let* func_value = lookup_func name in + let* func_type = lookup_type name in + return (func_value, func_type) +;; + +let gen_simple_type name args = + let* func_value, func_type = lookup_func_type name in + let* res = + with_optional_value (call builder func_type func_value args ("boxed_" ^ name)) + in + return res +;; + +let imm_unit = + let* v = + with_optional_value (inttoptr builder (const_int int_t (tag_int 0)) ptr_t "unit") + in + return v +;; + +let imm_tagged_int i = + let* v = + with_optional_value + (inttoptr builder (const_int int_t (tag_int i)) ptr_t "tagged_int") + in + return v +;; + +let imm_tagged_bool b = + let* v = + with_optional_value + (inttoptr builder (const_int int_t (tag_bool b)) ptr_t "tagged_bool") + in + return v +;; + +let imm_tagged_char c = + let* v = + with_optional_value + (inttoptr builder (const_int int_t (tag_char c)) ptr_t "tagged_char") + in + return v +;; + +let untag_int_val tagged_val = + let* raw = with_optional_value (ptrtoint builder tagged_val int_t "raw_int") in + let* minus1 = with_optional_value (sub builder raw (const_int int_t 1) "minus1") in + let* result = + with_optional_value (sdiv builder minus1 (const_int int_t 2) "untagged") + in + return result +;; + +let tag_int_result i = + let* twice = with_optional_value (mul builder i (const_int int_t 2) "twice") in + let* tagged = with_optional_value (add builder twice (const_int int_t 1) "tagged") in + let* v = with_optional_value (inttoptr builder tagged ptr_t "result_int") in + return v +;; + +let untag_bool_val tagged_val = + let* raw = with_optional_value (ptrtoint builder tagged_val int_t "raw_bool") in + let* result = + with_optional_value (icmp builder Icmp.Eq raw (const_int int_t 4) "is_true") + in + return result +;; + +let tag_bool_result cond_value = + (* false=2, true=4 via select (no zext) *) + let tagged_i64 = + build_select cond_value (const_int int_t 4) (const_int int_t 2) "tagged_bool" builder + in + let* v = with_optional_value (inttoptr builder tagged_i64 ptr_t "result_bool") in + return v +;; + +let rec gen_imm = function + | ImmediateConst (ConstInt i) -> imm_tagged_int i + | ImmediateConst (ConstBool b) -> imm_tagged_bool b + | ImmediateConst (ConstChar c) -> imm_tagged_char c + | ImmediateConst (ConstString _s) -> imm_unit + | ImmediateVar id -> + let* value = + let* gc_allocas = get_gc_allocas in + match gc_allocas with + | Some allocas -> + (match Generator_state.map_find_opt allocas id with + | Some alloca -> + let* v = with_optional_value (load builder ptr_t alloca id) in + return v + | None -> + let* value_opt = find_value_opt id in + (match value_opt with + | Some v -> return v + | None -> + let* resolved_value = resolved_find_value_opt id in + (match resolved_value with + | None -> fail ("Unbound variable: " ^ id) + | Some v -> return v))) + | None -> + let* value_opt = find_value_opt id in + (match value_opt with + | Some v -> return v + | None -> + let* resolved_value = resolved_find_value_opt id in + (match resolved_value with + | None -> fail ("Unbound variable: " ^ id) + | Some v -> return v)) + in + (match classify_value value with + | ValueKind.Function when Array.length (params value) = 0 -> + let* type_opt = resolved_find_type_opt id in + let* func_type = + match type_opt with + | Some ty -> return ty + | None -> fail ("Missing type for: " ^ id) + in + let* v = with_optional_value (call builder func_type value [||] "call_0") in + return v + | _ -> + let* arity_opt = get_resolved_arity id in + maybe_closure value arity_opt) + +and get_resolved_arity name = + let* state = get in + return + (match state.resolve with + | Some resolver -> + (match resolver state.current_func_index name with + | Some (_, arity) -> Some arity + | None -> None) + | None -> None) + +and maybe_closure value arity_opt = + match classify_value value with + | ValueKind.Function -> + let arity = Option.value arity_opt ~default:(Array.length (params value)) in + let* function_ptr = + with_optional_value (bitcast builder value ptr_t "func_ptr_cast") + in + gen_simple_type "alloc_closure" [| function_ptr; const_int int_t arity |] + | _ -> return value +;; + +let gen_binop_native op left_v right_v = + match op with + | Plus -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (add builder l r "add") in + tag_int_result v + | Minus -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (sub builder l r "sub") in + tag_int_result v + | Multiply -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (mul builder l r "mul") in + tag_int_result v + | Division -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (sdiv builder l r "sdiv") in + tag_int_result v + | GretestEqual -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Sge l r "icmp_sge") in + tag_bool_result v + | LowestEqual -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Sle l r "icmp_sle") in + tag_bool_result v + | GreaterThan -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Sgt l r "icmp_sgt") in + tag_bool_result v + | LowerThan -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Slt l r "icmp_slt") in + tag_bool_result v + | Equal -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Eq l r "icmp_eq") in + tag_bool_result v + | NotEqual -> + let* l = untag_int_val left_v in + let* r = untag_int_val right_v in + let* v = with_optional_value (icmp builder Icmp.Ne l r "icmp_ne") in + tag_bool_result v + | And -> + let* l = untag_bool_val left_v in + let* r = untag_bool_val right_v in + let* v = with_optional_value (and_ builder l r "and") in + tag_bool_result v + | Or -> + let* l = untag_bool_val left_v in + let* r = untag_bool_val right_v in + let* v = with_optional_value (or_ builder l r "or") in + tag_bool_result v +;; + +let gen_unop_native op tagged_val = + match op with + | Negative -> + let* int_val = untag_int_val tagged_val in + let* result = with_optional_value (neg builder int_val "neg") in + tag_int_result result + | Not -> + let* bool_val = untag_bool_val tagged_val in + let* result = with_optional_value (not builder bool_val "not") in + tag_bool_result result +;; + +let rec gen_cexpr = function + | ComplexImmediate imm -> gen_imm imm + | ComplexUnit -> imm_unit + | ComplexBinOper (op, left, right) -> + let* left_v = gen_imm left in + let* right_v = gen_imm right in + gen_binop_native op left_v right_v + | ComplexUnarOper (op, imm) -> + let* v = gen_imm imm in + gen_unop_native op v + | ComplexTuple (e1, e2, rest) -> + let* args = + List.fold_left + (fun acc imm -> + let* vs = acc in + let* v = gen_imm imm in + return (vs @ [ v ])) + (return []) + (e1 :: e2 :: rest) + in + let len = List.length args in + let arr_len = if len = 0 then 1 else len in + let arr_ty = Llvm.array_type ptr_t arr_len in + let* alloca_arr = + with_optional_value (Some (Llvm.build_alloca arr_ty "tuple_args" builder)) + in + let* () = + Base.List.foldi args ~init:(return ()) ~f:(fun i acc v -> + let* () = acc in + let* elem_ptr = + with_optional_value + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t i |] + "tuple_elem" + builder)) + in + let () = Llvm_backend.store builder v elem_ptr in + return ()) + in + let* args_ptr = + with_optional_value + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "tuple_args_ptr" + builder)) + in + let* create_tuple_func, create_tuple_type = lookup_func_type "create_tuple" in + with_optional_value + (call + builder + create_tuple_type + create_tuple_func + [| const_int int_t len; args_ptr |] + "boxed_create_tuple") + | ComplexField (tuple_imm, idx) -> + let* tuple_val = gen_imm tuple_imm in + let* field_func, field_type = lookup_func_type "field" in + let* v = + with_optional_value + (call + builder + field_type + field_func + [| tuple_val; const_int int_t (tag_int idx) |] + "field") + in + return v + | ComplexApp (ImmediateVar fname, first, rest) -> + let args_list = first :: rest in + if fname = "print_int" && List.length args_list = 1 + then + let* arg = gen_imm first in + let* tagged_i64 = + with_optional_value (ptrtoint builder arg int_t "print_int_arg") + in + let* print_int_func, print_int_type = lookup_func_type "print_int" in + let* () = + emit_void_st builder (Call (print_int_type, print_int_func, [| tagged_i64 |], "")) + in + imm_unit + else + let* callee_value, callee_from_alloca = + let* gc_allocas = get_gc_allocas in + match gc_allocas with + | Some allocas -> + (match Generator_state.map_find_opt allocas fname with + | Some alloca -> + let* v = with_optional_value (load builder ptr_t alloca fname) in + return (v, true) + | None -> + let* value_opt = find_value_opt fname in + (match value_opt with + | Some v -> return (v, false) + | None -> + let* resolved_value = resolved_find_value_opt fname in + (match resolved_value with + | None -> fail ("Unbound function: " ^ fname) + | Some v -> return (v, false)))) + | None -> + let* value_opt = find_value_opt fname in + (match value_opt with + | Some v -> return (v, false) + | None -> + let* resolved_value = resolved_find_value_opt fname in + (match resolved_value with + | None -> fail ("Unbound function: " ^ fname) + | Some v -> return (v, false))) + in + let* args = + List.fold_left + (fun acc imm -> + let* vs = acc in + let* v = gen_imm imm in + return (vs @ [ v ])) + (return []) + args_list + in + let args_values = Array.of_list args in + let num_args = Array.length args_values in + let is_direct_func = + match classify_value callee_value with + | ValueKind.Function -> Array.length (params callee_value) = num_args + | _ -> false + in + let is_zero_arg_with_unit = + match classify_value callee_value with + | ValueKind.Function -> Array.length (params callee_value) = 0 && num_args = 1 + | _ -> false + in + let use_direct = + callee_from_alloca = false + && (is_direct_func || is_zero_arg_with_unit) + && num_args <= 6 + in + if use_direct + then + let* type_opt = resolved_find_type_opt fname in + let* func_type = + match type_opt with + | Some ty -> return ty + | None -> fail ("Missing type for: " ^ fname) + in + let args_for_call = if is_zero_arg_with_unit then [||] else args_values in + with_optional_value + (call builder func_type callee_value args_for_call ("direct_" ^ fname)) + else + let* arity_opt = get_resolved_arity fname in + let* closure_value = maybe_closure callee_value arity_opt in + let* eml_applyN_func, eml_applyN_type = lookup_func_type "eml_applyN" in + let current_func = block_parent (insertion_block builder) in + if num_args = 0 + then ( + let arr_ty = Llvm.array_type ptr_t 1 in + let* alloca_arr = + with_optional_value (Some (Llvm.build_alloca arr_ty "apply_args" builder)) + in + let* args_ptr = + with_optional_value + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "apply_args_ptr" + builder)) + in + with_optional_value + (call + builder + eml_applyN_type + eml_applyN_func + [| closure_value; const_int int_t 0; args_ptr |] + "boxed_eml_applyN")) + else + let* _then_name, _else_name, merge_name = fresh_blocks in + let merge_block = append_block context merge_name current_func in + let blocks = + Array.init num_args (fun idx -> + append_block context ("apply_step_" ^ Int.to_string idx) current_func) + in + let* () = emit_void_st builder (Br blocks.(0)) in + let result_vals = Array.make num_args (Llvm.const_null ptr_t) in + let rec loop step_index = + if step_index >= num_args + then return () + else ( + let () = position_at_end blocks.(step_index) builder in + let* current_closure = + if step_index = 0 + then return closure_value + else + with_optional_value + (Llvm_backend.phi + builder + [ result_vals.(step_index - 1), blocks.(step_index - 1) ] + ("cur_" ^ Int.to_string step_index)) + in + let one_ty = Llvm.array_type ptr_t 1 in + let alloca_one = Llvm.build_alloca one_ty "apply_one" builder in + let elem_ptr = + Llvm.build_gep + one_ty + alloca_one + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "one_elem" + builder + in + Llvm_backend.store builder args_values.(step_index) elem_ptr; + let* step_result = + with_optional_value + (Llvm_backend.call + builder + eml_applyN_type + eml_applyN_func + [| current_closure; const_int int_t 1; elem_ptr |] + ("apply_step_" ^ Int.to_string step_index)) + in + result_vals.(step_index) <- step_result; + let* () = + if step_index < num_args - 1 + then emit_void_st builder (Br blocks.(step_index + 1)) + else emit_void_st builder (Br merge_block) + in + loop (step_index + 1)) + in + let* () = loop 0 in + position_at_end merge_block builder; + let* final_val = + with_optional_value + (Llvm_backend.phi + builder + [ result_vals.(num_args - 1), blocks.(num_args - 1) ] + "apply_result") + in + return final_val + | ComplexApp (_, _, _) -> + fail "LLVM codegen: ComplexApp with non-variable function not supported" + | ComplexBranch (cond_imm, then_e, else_e) -> + let* cond_val = gen_imm cond_imm in + let* bool_val = untag_bool_val cond_val in + let current_func = block_parent (insertion_block builder) in + let* then_name, else_name, merge_name = fresh_blocks in + let then_block = append_block context then_name current_func in + let else_block = append_block context else_name current_func in + let merge_block = append_block context merge_name current_func in + let* () = emit_void_st builder (CondBr (bool_val, then_block, else_block)) in + position_at_end then_block builder; + let* then_val = gen_anf then_e in + let* () = emit_void_st builder (Br merge_block) in + let then_bb = insertion_block builder in + position_at_end else_block builder; + let* else_val = gen_anf else_e in + let* () = emit_void_st builder (Br merge_block) in + let else_bb = insertion_block builder in + position_at_end merge_block builder; + let* v = + with_optional_value + (phi builder [ then_val, then_bb; else_val, else_bb ] "ite_result") + in + return v + | ComplexList _ | ComplexOption _ | ComplexLambda _ -> + fail "LLVM codegen: List/Option/Lambda not yet implemented" + +and gen_anf = function + | AnfExpr cexp -> gen_cexpr cexp + | AnfLet (_, name, cexp, cont) -> + let* value = gen_cexpr cexp in + let* () = + let* gc_allocas = get_gc_allocas in + match gc_allocas with + | Some allocas -> + let* alloca = + match Generator_state.map_find_opt allocas name with + | Some a -> return a + | None -> + let* entry_opt = get_gc_entry_block in + let* entry_block = + match entry_opt with + | Some blk -> return blk + | None -> fail "gen_anf: gc_entry_block not set" + in + let current_block = insertion_block builder in + let* alloca_ptr = + if current_block = entry_block + then with_optional_value (Llvm_backend.alloca builder ptr_t name) + else ( + let () = + match Llvm.instr_begin entry_block with + | Llvm.Before first -> position_before first builder + | Llvm.At_end _ -> position_at_end entry_block builder + in + let* alloca_in_entry = + with_optional_value (Llvm_backend.alloca builder ptr_t name) + in + let () = position_at_end current_block builder in + return alloca_in_entry) + in + let* () = + set_gc_allocas (Some (Base.Map.set allocas ~key:name ~data:alloca_ptr)) + in + return alloca_ptr + in + let* () = set_gc_allocas (Some (Base.Map.set allocas ~key:name ~data:alloca)) in + emit_void_st builder (Store (value, alloca)) + | None -> set_value name value + in + gen_anf cont +;; + +let declare_function (func_layout : function_layout) state = + let arg_types = Array.make (List.length func_layout.params) ptr_t in + let func_type = function_type ptr_t arg_types in + let llvm_name = + if func_layout.func_name = "main" then "eml_main" else func_layout.asm_name + in + let func = declare_function llvm_name func_type (current_module ()) in + let key = if func_layout.func_name = "main" then "main" else func_layout.asm_name in + { state with + value_env = Base.Map.set state.value_env ~key ~data:func + ; type_env = Base.Map.set state.type_env ~key ~data:func_type + } +;; + +let emit_gc_prologue = + let* init_gc_func, init_gc_type = lookup_func_type "init_gc" in + let* set_ptr_func, set_ptr_type = lookup_func_type "set_ptr_stack" in + let* frameaddr_func, frameaddr_type = lookup_func_type "llvm.frameaddress.p0" in + let* () = emit_void_st builder (Call (init_gc_type, init_gc_func, [||], "")) in + let* frame_ptr = + with_optional_value + (call builder frameaddr_type frameaddr_func [| const_int i32_t 0 |] "frame") + in + emit_void_st builder (Call (set_ptr_type, set_ptr_func, [| frame_ptr |], "")) +;; + +let gen_function + (func_layout : function_layout) + ~enable_gc + ~is_entry + ~func_index + initial_state + = + let comp = + let* state = get in + let* () = put { state with current_func_index = func_index } in + let* func, _ = + lookup_func_type + (if func_layout.func_name = "main" then "main" else func_layout.asm_name) + in + let entry_block = append_block context "entry" func in + position_at_end entry_block builder; + let* () = if enable_gc && is_entry then emit_gc_prologue else return () in + let* () = + if enable_gc + then set_gc_allocas (Some (Base.Map.empty (module Base.String))) + else return () + in + let* () = if enable_gc then set_gc_entry_block (Some entry_block) else return () in + let* () = set_value func_layout.asm_name func in + let* state_before_params = get in + let func_params = params func in + let* () = + Base.List.foldi func_layout.params ~init:(return ()) ~f:(fun param_index acc arg -> + let* () = acc in + match arg with + | ImmediateVar name -> + let* param_value = + if param_index >= 0 && param_index < Array.length func_params + then return (Array.get func_params param_index) + else fail "gen_function: param index out of bounds" + in + set_value_name name param_value; + (match enable_gc with + | true -> + let* gc_allocas = get_gc_allocas in + let* allocas_map = + match gc_allocas with + | Some m -> return m + | None -> fail "gen_function: enable_gc but gc_allocas not set" + in + let* alloca_ptr = with_optional_value (alloca builder ptr_t name) in + store builder param_value alloca_ptr; + set_gc_allocas (Some (Base.Map.set allocas_map ~key:name ~data:alloca_ptr)) + | false -> set_value name param_value) + | ImmediateConst _ -> return ()) + in + let* body_value = gen_anf func_layout.body in + let* () = emit_void_st builder (Ret (Some body_value)) in + let* () = if enable_gc then set_gc_allocas None else return () in + let* () = if enable_gc then set_gc_entry_block None else return () in + let* state = get in + let value_env = + let without_params = + List.fold_left + (fun env -> function + | ImmediateVar name -> Base.Map.remove env name + | _ -> env) + state.value_env + func_layout.params + in + List.fold_left + (fun env -> function + | ImmediateVar name -> + (match Generator_state.map_find_opt state_before_params.value_env name with + | Some v -> Base.Map.set env ~key:name ~data:v + | None -> env) + | _ -> env) + without_params + func_layout.params + in + put + { state with + value_env = Base.Map.set value_env ~key:func_layout.func_name ~data:func + } + in + run comp initial_state +;; + +let gen_program ~output_file ~enable_gc (program : anf_program) = + reset_for_new_program (); + predefined_init (); + let value_env, type_env = snapshot_envs () in + let { functions; resolve; _ } = analyze program in + let initial_state : Generator_state.state = + { value_env + ; type_env + ; gc_allocas = None + ; gc_entry_block = None + ; naming_state = Default_naming.init + ; resolve = Some resolve + ; current_func_index = 0 + } + in + let entry_name = + match List.find_opt (fun func -> func.func_name = "main") functions with + | Some _ -> "main" + | None -> + (match List.rev functions with + | [] -> "" + | last :: _ -> last.func_name) + in + let state_after_declares = + List.fold_left (fun state func -> declare_function func state) initial_state functions + in + match + Base.List.foldi + functions + ~init:(Ok state_after_declares) + ~f:(fun idx acc func_layout -> + match acc with + | Error _ -> acc + | Ok state -> + let is_entry = func_layout.func_name = entry_name in + (match gen_function func_layout ~enable_gc ~is_entry ~func_index:idx state with + | Ok ((), state') -> Ok state' + | Error err -> Error err)) + with + | Error err -> Error err + | Ok _ -> + print_module output_file (current_module ()); + Ok () +;; diff --git a/EML/lib/backend/llvm/generator_state.ml b/EML/lib/backend/llvm/generator_state.ml new file mode 100644 index 00000000..faa842c4 --- /dev/null +++ b/EML/lib/backend/llvm/generator_state.ml @@ -0,0 +1,125 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module type NAMING = sig + type t + + val init : t + val fresh_blocks : t -> (string * string * string) * t +end + +module Default_naming : NAMING = struct + type t = int + + let init = 0 + + let fresh_blocks n = + let then_name = "then_" ^ Int.to_string n in + let else_name = "else_" ^ Int.to_string n in + let merge_name = "merge_" ^ Int.to_string n in + (then_name, else_name, merge_name), n + 1 + ;; +end + +module Make (N : NAMING) = struct + type state = + { value_env : (string, llvalue, Base.String.comparator_witness) Base.Map.t + ; type_env : (string, lltype, Base.String.comparator_witness) Base.Map.t + ; gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option + ; gc_entry_block : llbasicblock option + ; naming_state : N.t + ; resolve : (int -> string -> (string * int) option) option + ; current_func_index : int + } + + type 'a t = state -> ('a * state, string) Result.t + + let return x state = Ok (x, state) + + let bind m f state = + match m state with + | Ok (x, state') -> f x state' + | Error err -> Error err + ;; + + let ( let* ) = bind + let get state = Ok (state, state) + let put state _ = Ok ((), state) + + let modify f state = + match get state with + | Ok (current_state, _) -> put (f current_state) state + | Error err -> Error err + ;; + + let fail err = fun _ -> Error err + + let map_find_opt (map : (string, 'a, _) Base.Map.t) key : 'a option = + Base.Map.fold map ~init:None ~f:(fun ~key:map_key ~data:map_value acc -> + match acc with + | Some _ -> acc + | None -> if String.equal map_key key then Some map_value else None) + ;; + + let find_value_opt name state = Ok (map_find_opt state.value_env name, state) + let find_type_opt name state = Ok (map_find_opt state.type_env name, state) + + let resolve_key state name = + match state.resolve with + | None -> name + | Some resolver -> + (match resolver state.current_func_index name with + | Some (asm_name, _) -> asm_name + | None -> name) + ;; + + let resolved_find_value_opt name state = + let resolved_key = resolve_key state name in + Ok (map_find_opt state.value_env resolved_key, state) + ;; + + let resolved_find_type_opt name state = + let resolved_key = resolve_key state name in + Ok (map_find_opt state.type_env resolved_key, state) + ;; + + let set_value name value = + modify (fun state -> + { state with value_env = Base.Map.set state.value_env ~key:name ~data:value }) + ;; + + let set_type name lltype = + modify (fun state -> + { state with type_env = Base.Map.set state.type_env ~key:name ~data:lltype }) + ;; + + let remove_value name = + modify (fun state -> { state with value_env = Base.Map.remove state.value_env name }) + ;; + + let get_gc_allocas state = Ok (state.gc_allocas, state) + + let set_gc_allocas allocas_map = + modify (fun state -> { state with gc_allocas = allocas_map }) + ;; + + let get_gc_entry_block state = Ok (state.gc_entry_block, state) + + let set_gc_entry_block block = + modify (fun state -> { state with gc_entry_block = block }) + ;; + + let fresh_blocks = + let* state = get in + let triple, next = N.fresh_blocks state.naming_state in + let* () = put { state with naming_state = next } in + return triple + ;; + + let run m init = m init +end + +include Make (Default_naming) diff --git a/EML/lib/backend/llvm/runner.ml b/EML/lib/backend/llvm/runner.ml new file mode 100644 index 00000000..4ad9e879 --- /dev/null +++ b/EML/lib/backend/llvm/runner.ml @@ -0,0 +1,19 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf + +let gen_program ~enable_gc ppf (program : anf_program) : (unit, string) Result.t = + let temp_ll_path = Filename.temp_file "eml_llvm" ".ll" in + match Generator.gen_program ~output_file:temp_ll_path ~enable_gc program with + | Error err -> + (try Sys.remove temp_ll_path with + | _ -> ()); + Error err + | Ok () -> + let content = In_channel.with_open_text temp_ll_path In_channel.input_all in + Sys.remove temp_ll_path; + Format.fprintf ppf "%s" content; + Ok () +;; diff --git a/EML/lib/backend/llvm/runner.mli b/EML/lib/backend/llvm/runner.mli new file mode 100644 index 00000000..2f4ff9fa --- /dev/null +++ b/EML/lib/backend/llvm/runner.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : enable_gc:bool + -> Format.formatter + -> Middleend.Anf.anf_program + -> (unit, string) Result.t diff --git a/EML/lib/frontend/binutils.ml b/EML/lib/frontend/binutils.ml deleted file mode 100644 index 712d5ffd..00000000 --- a/EML/lib/frontend/binutils.ml +++ /dev/null @@ -1,33 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -(** Primitives the generated code can call. *) - -type primitive = - { name : string - ; arity : int - } - -let primitive_arities ~enable_gc : primitive list = - let base = - [ { name = "print_int"; arity = 1 } - ; { name = "print_endline"; arity = 1 } - ; { name = "create_tuple"; arity = 2 } - ; { name = "field"; arity = 2 } - ; { name = "alloc_closure"; arity = 2 } - ; { name = "eml_applyN"; arity = 3 } - ] - in - if enable_gc - then - base - @ [ { name = "get_heap_start"; arity = 0 } - ; { name = "get_heap_final"; arity = 0 } - ; { name = "collect"; arity = 0 } - ; { name = "print_gc_status"; arity = 0 } - ] - else base -;; - -let primitive_names ~enable_gc = List.map (fun p -> p.name) (primitive_arities ~enable_gc) diff --git a/EML/lib/frontend/binutils.mli b/EML/lib/frontend/binutils.mli deleted file mode 100644 index 9ce10f2e..00000000 --- a/EML/lib/frontend/binutils.mli +++ /dev/null @@ -1,11 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -type primitive = - { name : string - ; arity : int - } - -val primitive_arities : enable_gc:bool -> primitive list -val primitive_names : enable_gc:bool -> string list diff --git a/EML/lib/runtime/primitives.ml b/EML/lib/runtime/primitives.ml new file mode 100644 index 00000000..2b75f442 --- /dev/null +++ b/EML/lib/runtime/primitives.ml @@ -0,0 +1,43 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +let predefined_runtime_op_names = Frontend.Ast.unary_op_list + +type llvm_arg = + | Ptr + | Int + | I32 + +type llvm_ret = + | RPtr + | RInt + | RVoid + +type runtime_func_sig = + { name : string + ; ret : llvm_ret + ; args : llvm_arg list + } + +let predefined_runtime_funcs : runtime_func_sig list = + [ { name = "eml_applyN"; ret = RPtr; args = [ Ptr; Int; Ptr ] } + ; { name = "create_tuple"; ret = RPtr; args = [ Int; Ptr ] } + ; { name = "alloc_closure"; ret = RPtr; args = [ Ptr; Int ] } + ; { name = "field"; ret = RPtr; args = [ Ptr; Int ] } + ; { name = "llvm_call_indirect"; ret = RPtr; args = [ Ptr; Ptr; Int ] } + ; { name = "print_int"; ret = RVoid; args = [ Int ] } + ; { name = "init_gc"; ret = RVoid; args = [] } + ; { name = "destroy_gc"; ret = RVoid; args = [] } + ; { name = "set_ptr_stack"; ret = RVoid; args = [ Ptr ] } + ; { name = "get_heap_start"; ret = RInt; args = [] } + ; { name = "get_heap_final"; ret = RInt; args = [] } + ; { name = "collect"; ret = RPtr; args = [] } + ; { name = "print_gc_status"; ret = RPtr; args = [] } + ; { name = "llvm.frameaddress.p0"; ret = RPtr; args = [ I32 ] } + ] +;; + +let runtime_primitive_arities : (string * int) list = + List.map (fun { name; args } -> name, List.length args) predefined_runtime_funcs +;; From a972fc1b83070d31669890c0a50ade293bfff02f Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:16:41 +0300 Subject: [PATCH 39/74] delete dead code Signed-off-by: Victoria Ostrovskaya --- EML/test_qc/dune | 15 ------- EML/test_qc/test_qc.ml | 98 ----------------------------------------- EML/test_qc/test_qc.mli | 7 --- 3 files changed, 120 deletions(-) delete mode 100644 EML/test_qc/dune delete mode 100644 EML/test_qc/test_qc.ml delete mode 100644 EML/test_qc/test_qc.mli diff --git a/EML/test_qc/dune b/EML/test_qc/dune deleted file mode 100644 index 311ba09f..00000000 --- a/EML/test_qc/dune +++ /dev/null @@ -1,15 +0,0 @@ -(executable - (name test_qc) - (enabled_if false) - (modules test_qc) - (libraries qcheck angstrom) - (preprocess - (pps ppx_deriving_qcheck ppx_deriving.show))) - -(rule - (alias runtest) - (enabled_if false) - (deps - (:< test_qc.exe)) - (action - (run %{<}))) diff --git a/EML/test_qc/test_qc.ml b/EML/test_qc/test_qc.ml deleted file mode 100644 index fa125b7f..00000000 --- a/EML/test_qc/test_qc.ml +++ /dev/null @@ -1,98 +0,0 @@ -[@@@ocaml.text "/*"] - -(** Copyright 2023-2024, Kakadu and contributors *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -[@@@ocaml.text "/*"] - -(* run this test via `dune test --force` *) - -module AST = struct - type t = - | Const of (int[@gen QCheck.Gen.return 1]) - | Add of t * t - [@@deriving qcheck, show { with_path = false }] -end - -module PP = struct - let rec pp ppf = function - | AST.Const n -> Format.fprintf ppf "%d" n - (* | Add (l, r) -> Format.fprintf ppf "%a+%a" pp l pp r *) - | Add (l, r) -> Format.fprintf ppf "(%a+%a)" pp l pp r - ;; -end - -module Parser = struct - open Angstrom - - let prio expr table = - let len = Array.length table in - let rec helper level = - if level >= len - then expr - else ( - let xs = table.(level) in - return (List.fold_left (fun acc (op, r) -> op acc r)) - <*> helper (level + 1) - <*> many - (choice - (List.map - (fun (op, f) -> op *> helper (level + 1) >>= fun r -> return (f, r)) - xs))) - in - helper 0 - ;; - - let expr_small = - let code0 = Char.code '0' in - Angstrom.satisfy (function - | '0' .. '9' -> true - | _ -> false) - >>| fun c -> AST.Const (Char.code c - code0) - ;; - - let expr = - fix (fun self -> - let add a b = AST.Add (a, b) in - prio (expr_small <|> (char '(' *> self <* char ')')) [| [ char '+', add ] |]) - ;; -end - -let rec shrink_expr = - let open QCheck.Iter in - (* fun _ -> empty *) - function - | AST.Const _ -> empty - | Add (l, r) -> - of_list [ l; r ] - <+> (shrink_expr l >>= fun l -> return (AST.Add (l, r))) - <+> (shrink_expr r >>= fun r -> return (AST.Add (l, r))) -;; - -let arbitrary_expr = - (* let open QCheck.Iter in *) - QCheck.make AST.gen ~print:(Format.asprintf "%a" PP.pp) ~shrink:shrink_expr -;; - -let _ = - QCheck_runner.run_tests - [ QCheck.( - Test.make arbitrary_expr (fun l -> - match - Angstrom.parse_string - ~consume:Angstrom.Consume.All - Parser.expr - (Format.asprintf "%a" PP.pp l) - with - | Result.Ok after when after = l -> true - | Result.Ok after -> - Format.printf "before : %a\n%!" AST.pp l; - (* Format.printf " : `%a`\n%!" PP.pp l; *) - Format.printf "`%a`\n%!" AST.pp after; - false - | Result.Error _ -> - (* Format.printf "failed on : %a\n%!" Lam.pp l; *) - false)) - ] -;; diff --git a/EML/test_qc/test_qc.mli b/EML/test_qc/test_qc.mli deleted file mode 100644 index a65c69d1..00000000 --- a/EML/test_qc/test_qc.mli +++ /dev/null @@ -1,7 +0,0 @@ -[@@@ocaml.text "/*"] - -(** Copyright 2023-2024, Kakadu and contributors *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -[@@@ocaml.text "/*"] From 83d6d9d2e15a8852977e94ce82264f2f0c3c4d3d Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:21:00 +0300 Subject: [PATCH 40/74] refactor riscv, cc, ll Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/analysis.ml | 121 +++++++++++++++++++++++--- EML/lib/backend/ricsv/architecture.ml | 22 ++--- EML/lib/backend/ricsv/auxillary.ml | 46 ++++++---- EML/lib/backend/ricsv/generator.ml | 47 +++++----- EML/lib/dune | 2 +- EML/lib/middleend/cc.ml | 41 +++++---- 6 files changed, 195 insertions(+), 84 deletions(-) diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index 3b2674f9..8f108b29 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -5,12 +5,16 @@ open Frontend.Ast open Middleend.Anf +let word_size = 8 + type function_layout = { func_name : string ; asm_name : string ; params : immediate list ; body : anf_expr ; slots_count : int + ; max_stack_args : int + ; max_create_tuple_array_bytes : int } type analysis_result = @@ -19,6 +23,8 @@ type analysis_result = ; resolve : int -> string -> (string * int) option } +let arg_regs_count = 8 + let rec slots_in_imm = function | ImmediateVar _ | ImmediateConst _ -> 0 @@ -28,12 +34,12 @@ and slots_in_cexpr = function | ComplexBinOper (_, left, right) -> slots_in_imm left + slots_in_imm right | ComplexUnarOper (_, imm) -> slots_in_imm imm | ComplexTuple (first, second, rest) -> - Base.List.fold_left (first :: second :: rest) ~init:0 ~f:(fun acc e -> - acc + slots_in_imm e) + let elts = first :: second :: rest in + List.length elts + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 elts | ComplexField (imm, _) -> slots_in_imm imm | ComplexList imm_list -> let n = List.length imm_list in - n + Base.List.fold_left imm_list ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) + n + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 imm_list | ComplexApp (first, second, rest) -> (* +1 for curried-call intermediate; +1 per arg for spill_dangerous_args. +8 for spill_caller_saved_vars_to_frame at start of every invocation (can spill a0-a7). @@ -41,11 +47,7 @@ and slots_in_cexpr = function let args = first :: second :: rest in let nargs = List.length args in let extra = if nargs >= 2 then 12 else 0 in - 1 - + 8 - + nargs - + extra - + Base.List.fold_left args ~init:0 ~f:(fun acc e -> acc + slots_in_imm e) + 1 + 8 + nargs + extra + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 args | ComplexOption None -> 0 | ComplexOption (Some imm) -> slots_in_imm imm | ComplexLambda (_, body) -> slots_in_anf body @@ -57,6 +59,79 @@ and slots_in_anf = function | AnfLet (_, _, cexp, cont) -> 1 + slots_in_cexpr cexp + slots_in_anf cont ;; +let rec max_stack_args_cexpr = function + | ComplexImmediate _ | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> + max (max_stack_args_imm left) (max_stack_args_imm right) + | ComplexUnarOper (_, imm) -> max_stack_args_imm imm + | ComplexTuple (first, second, rest) -> + List.fold_left + (fun acc e -> max acc (max_stack_args_imm e)) + 0 + (first :: second :: rest) + | ComplexField (imm, _) -> max_stack_args_imm imm + | ComplexList imm_list -> + List.fold_left (fun acc e -> max acc (max_stack_args_imm e)) 0 imm_list + | ComplexApp (_first, second, rest) -> + let nargs = 1 + List.length rest in + (* Reserve enough for largest call: eml_applyN needs nargs words; direct needs max(0, nargs-8). *) + let need = nargs in + let in_args = + List.fold_left (fun acc e -> max acc (max_stack_args_imm e)) 0 (second :: rest) + in + max need in_args + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> max_stack_args_imm imm + | ComplexLambda (_, body) -> max_stack_args_anf body + | ComplexBranch (cond, then_e, else_e) -> + max + (max_stack_args_imm cond) + (max (max_stack_args_anf then_e) (max_stack_args_anf else_e)) + +and max_stack_args_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and max_stack_args_anf = function + | AnfExpr cexp -> max_stack_args_cexpr cexp + | AnfLet (_, _, cexp, cont) -> max (max_stack_args_cexpr cexp) (max_stack_args_anf cont) +;; + +let rec max_create_tuple_array_cexpr = function + | ComplexImmediate _ | ComplexUnit -> 0 + | ComplexBinOper (_, left, right) -> + max (max_create_tuple_array_imm left) (max_create_tuple_array_imm right) + | ComplexUnarOper (_, imm) -> max_create_tuple_array_imm imm + | ComplexTuple (first, second, rest) -> + let elts = first :: second :: rest in + let here = List.length elts * word_size in + List.fold_left (fun acc e -> max acc (max_create_tuple_array_imm e)) here elts + | ComplexField (imm, _) -> max_create_tuple_array_imm imm + | ComplexList imm_list -> + (* Each cons adds 16 bytes; they accumulate along the list build *) + let per_cons = 2 * word_size in + let from_elts = + List.fold_left (fun acc e -> acc + max_create_tuple_array_imm e) 0 imm_list + in + (per_cons * List.length imm_list) + from_elts + | ComplexApp (_f, second, rest) -> + List.fold_left (fun acc e -> max acc (max_create_tuple_array_imm e)) 0 (second :: rest) + | ComplexOption None -> 0 + | ComplexOption (Some imm) -> max_create_tuple_array_imm imm + | ComplexLambda (_, body) -> max_create_tuple_array_anf body + | ComplexBranch (cond, then_e, else_e) -> + max + (max_create_tuple_array_imm cond) + (max (max_create_tuple_array_anf then_e) (max_create_tuple_array_anf else_e)) + +and max_create_tuple_array_imm = function + | ImmediateVar _ | ImmediateConst _ -> 0 + +and max_create_tuple_array_anf = function + | AnfExpr cexp -> max_create_tuple_array_cexpr cexp + | AnfLet (_, _, cexp, cont) -> + max (max_create_tuple_array_cexpr cexp) (max_create_tuple_array_anf cont) +;; + let rec params_of_anf = function | AnfExpr (ComplexLambda (pats, body)) -> let imms = @@ -90,9 +165,16 @@ let analyze (program : anf_program) = let raw = List.filter_map (function - | AnfValue (_, (func_name, _arity, body), _) -> + | AnfValue (_, (func_name, arity, body), _) -> let params, body = params_of_anf body in - Some (func_name, params, body, slots_in_anf body) + Some + ( func_name + , arity + , params + , body + , slots_in_anf body + , max_stack_args_anf body + , max_create_tuple_array_anf body ) | AnfEval _ -> None) program in @@ -106,8 +188,21 @@ let analyze (program : anf_program) = in let functions = List.map - (fun (func_name, params, body, slots_count) -> - { func_name; asm_name = asm_name func_name; params; body; slots_count }) + (fun ( func_name + , _arity + , params + , body + , slots_count + , max_stack_args + , max_create_tuple_array_bytes ) -> + { func_name + ; asm_name = asm_name func_name + ; params + ; body + ; slots_count + ; max_stack_args + ; max_create_tuple_array_bytes + }) raw in let has_main = List.exists (fun fn -> String.equal fn.func_name "main") functions in @@ -121,6 +216,8 @@ let analyze (program : anf_program) = ; params = [] ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) ; slots_count = 0 + ; max_stack_args = 0 + ; max_create_tuple_array_bytes = 0 } in functions @ [ synthetic_main ]) diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index 35fb62c5..3eee95df 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -125,27 +125,21 @@ module Riscv_backend = struct let frame_header_size = 2 * word_size let saved_fp_offset = 0 let saved_ra_offset = word_size - let riscv_imm12_min = -2048 - let riscv_imm12_max = 2047 - let fits_imm12 n = n >= riscv_imm12_min && n <= riscv_imm12_max type location = | Loc_reg of reg | Loc_mem of offset let prologue ~enable_gc ~name ~stack_size = - let ra_slot = fp, saved_ra_offset in - let fp_slot = fp, saved_fp_offset in - let dec_sp = - if fits_imm12 (-stack_size) - then addi sp sp (-stack_size) - else li t0 stack_size @ sub sp sp t0 - in - let set_fp = - let ofs = stack_size - frame_header_size in - if fits_imm12 ofs then addi fp sp ofs else li t0 ofs @ add fp sp t0 + let ra_slot = sp, stack_size - saved_ra_offset in + let fp_slot = sp, stack_size - frame_header_size in + let base = + label name + @ addi sp sp (-stack_size) + @ sd ra ra_slot + @ sd fp fp_slot + @ addi fp sp (stack_size - frame_header_size) in - let base = label name @ mv t1 fp @ dec_sp @ set_fp @ sd ra ra_slot @ sd t1 fp_slot in if enable_gc && String.equal name "main" then base @ call "init_gc" @ mv a0 fp @ call "set_ptr_stack" else base diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml index 889de3f1..e091400d 100644 --- a/EML/lib/backend/ricsv/auxillary.ml +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -15,32 +15,41 @@ let is_caller_saved = function let to_tagged_bool dst = add dst dst dst @ add_tag_items dst 1 -let compare_ordering dst r1 r2 ~invert = - let base = slt dst r1 r2 in +let compare_ordering dst left_reg right_reg ~invert = + let base = slt dst left_reg right_reg in let tagged = if invert then base @ xori dst dst 1 else base in tagged @ to_tagged_bool dst ;; -let compare_eq_ne dst r1 r2 ~is_eq = - let base = xor dst r1 r2 in +let compare_eq_ne dst left_reg right_reg ~is_eq = + let base = xor dst left_reg right_reg in let tagged = if is_eq then base @ seqz dst dst else base @ snez dst dst in tagged @ to_tagged_bool dst ;; -let bin_op dst op r1 r2 : (instr list, string) result = +let bin_op dst op left_reg right_reg : (instr list, string) result = match op with - | "+" -> Ok (add dst r1 r2 @ add_tag_items dst (-1)) - | "-" -> Ok (sub dst r1 r2 @ add_tag_items dst 1) - | "*" -> Ok (srli r1 r1 1 @ addi r2 r2 (-1) @ mul dst r1 r2 @ add_tag_items dst 1) + | "+" -> Ok (add dst left_reg right_reg @ add_tag_items dst (-1)) + | "-" -> Ok (sub dst left_reg right_reg @ add_tag_items dst 1) + | "*" -> + Ok + (srli left_reg left_reg 1 + @ addi right_reg right_reg (-1) + @ mul dst left_reg right_reg + @ add_tag_items dst 1) | "/" -> Ok - (srli r1 r1 1 @ srli r2 r2 1 @ div dst r1 r2 @ add dst dst dst @ add_tag_items dst 1) - | "<" -> Ok (compare_ordering dst r1 r2 ~invert:false) - | ">" -> Ok (compare_ordering dst r2 r1 ~invert:false) - | "<=" -> Ok (compare_ordering dst r2 r1 ~invert:true) - | ">=" -> Ok (compare_ordering dst r1 r2 ~invert:true) - | "=" -> Ok (compare_eq_ne dst r1 r2 ~is_eq:true) - | "<>" -> Ok (compare_eq_ne dst r1 r2 ~is_eq:false) + (srli left_reg left_reg 1 + @ srli right_reg right_reg 1 + @ div dst left_reg right_reg + @ add dst dst dst + @ add_tag_items dst 1) + | "<" -> Ok (compare_ordering dst left_reg right_reg ~invert:false) + | ">" -> Ok (compare_ordering dst right_reg left_reg ~invert:false) + | "<=" -> Ok (compare_ordering dst right_reg left_reg ~invert:true) + | ">=" -> Ok (compare_ordering dst left_reg right_reg ~invert:true) + | "=" -> Ok (compare_eq_ne dst left_reg right_reg ~is_eq:true) + | "<>" -> Ok (compare_eq_ne dst left_reg right_reg ~is_eq:false) | _ -> Error ("unsupported binary operator: " ^ op) ;; @@ -75,8 +84,11 @@ let indices_of_args_to_spill state exps = List.rev (snd (List.fold_left - (fun (i, acc) arg -> - i + 1, if is_rewrites_result_regs state arg then i :: acc else acc) + (fun (index, dangerous_indices) arg -> + ( index + 1 + , if is_rewrites_result_regs state arg + then index :: dangerous_indices + else dangerous_indices )) (0, []) exps)) ;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 3d47976f..2a24040e 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -13,11 +13,11 @@ open Auxillary let alloc_frame_slot = let modify_frame_offset f = - modify (fun st -> { st with frame_offset = f st.frame_offset }) + modify (fun state -> { state with frame_offset = f state.frame_offset }) in - let* () = modify_frame_offset (fun n -> n + word_size) in - let* st = get in - return (fp, -st.frame_offset) + let* () = modify_frame_offset (fun offset -> offset + word_size) in + let* state = get in + return (fp, -state.frame_offset) ;; let store_reg_into_frame reg = @@ -40,12 +40,12 @@ let load_into_reg dst_reg loc = (** Spill function parameters to the frame in param order (index 0 → first slot). Ensures env maps each param name to a consistent slot so (self l) loads self, not l. *) let spill_params_to_frame params_reg = - Base.List.foldi params_reg ~init:(return ()) ~f:(fun i acc p -> + Base.List.foldi params_reg ~init:(return ()) ~f:(fun index acc param -> let* () = acc in - match p with + match param with | ImmediateVar name -> - let r = List.nth arg_regs i in - let* slot = store_reg_into_frame r in + let reg = List.nth arg_regs index in + let* slot = store_reg_into_frame reg in modify_env (fun env -> Base.Map.set env ~key:name ~data:slot) | _ -> return ()) ;; @@ -93,9 +93,9 @@ let evacuate_reg dst = ;; let resolve_call_symbol name = - let* st = get in - match st.symbol_resolve st.current_func_index name with - | Some (asm, _) -> return asm + let* state = get in + match state.symbol_resolve state.current_func_index name with + | Some (asm_name, _) -> return asm_name | None -> return name ;; @@ -112,10 +112,10 @@ let gen_imm dst = function let* state = get in let sym, arity = match state.symbol_resolve state.current_func_index name with - | Some (asm_name, a) -> asm_name, a + | Some (asm_name, arity_val) -> asm_name, arity_val | None -> (match Base.Map.find state.arity_map name with - | Some a -> name, a + | Some arity_val -> name, arity_val | None -> name, -1) in if arity < 0 @@ -123,9 +123,9 @@ let gen_imm dst = function else ( match arity with | 0 -> append (call sym) - | n -> + | nargs -> let* () = append (la result_reg sym) in - let* () = append (li (List.nth arg_regs 1) n) in + let* () = append (li (List.nth arg_regs 1) nargs) in append (call "alloc_closure"))) ;; @@ -186,6 +186,8 @@ let gen_call_with_regs dst regs args spilled symbol = if reserved > 0 then append (addi sp sp reserved) else return () ;; +(* let foo = ... in + foo () *) let gen_nullary dst fname = let* sym = resolve_call_symbol fname in let* () = append (call sym) in @@ -405,8 +407,8 @@ let flush_instr_buffer ppf = return () ;; -let gen_func ~enable_gc func_name params body frame_sz ppf = - fprintf ppf "\n .globl %s\n .type %s, @function\n" func_name func_name; +let gen_func ~enable_gc asm_name params body frame_sz ppf = + fprintf ppf "\n .globl %s\n .type %s, @function\n" asm_name asm_name; let args = List.length params in let params_reg, params_stack = ( Base.List.take params (min args arg_regs_count) @@ -424,23 +426,22 @@ let gen_func ~enable_gc func_name params body frame_sz ppf = bind_param_to_stack e i p) in let* () = set_env env in - let* () = append (prologue ~enable_gc ~name:func_name ~stack_size:frame_sz) in + let* () = append (prologue ~enable_gc ~name:asm_name ~stack_size:frame_sz) in let* st = get in let* () = put { st with frame_offset = 0 } in let* () = spill_params_to_frame params_reg in let* () = gen_anf result_reg body in - let* () = append (epilogue ~enable_gc ~is_main:(String.equal func_name "main")) in + let* () = append (epilogue ~enable_gc ~is_main:(String.equal asm_name "main")) in let* () = flush_instr_buffer ppf in return () ;; -let gen_program ?(enable_gc = false) ppf (analysis : analysis_result) = +let gen_program ~enable_gc ppf (analysis : analysis_result) = fprintf ppf ".section .text"; - let base = Frontend.Binutils.primitive_arities ~enable_gc in + let base = Runtime.Primitives.runtime_primitive_arities in let arity_map = List.fold_left - (fun map { Frontend.Binutils.name; arity } -> - Base.Map.set map ~key:name ~data:arity) + (fun map (name, arity) -> Base.Map.set map ~key:name ~data:arity) analysis.arity_map base in diff --git a/EML/lib/dune b/EML/lib/dune index fb8425e4..bc13309d 100644 --- a/EML/lib/dune +++ b/EML/lib/dune @@ -4,7 +4,7 @@ (name EML_lib) (public_name EML.lib) (modules :standard) - (libraries base angstrom) + (libraries base angstrom llvm) (preprocess (pps ppx_deriving.show ppx_deriving.eq)) (instrumentation diff --git a/EML/lib/middleend/cc.ml b/EML/lib/middleend/cc.ml index 100eb96e..40e9ffe2 100644 --- a/EML/lib/middleend/cc.ml +++ b/EML/lib/middleend/cc.ml @@ -4,6 +4,7 @@ open Format open Frontend.Ast +open Runtime.Primitives module VarSet = Set.Make (String) module EnvMap = Map.Make (String) @@ -18,10 +19,10 @@ let vars_in_pattern p = | PatAny -> VarSet.empty | PatVariable x -> VarSet.singleton x | PatConst _ -> VarSet.empty + | PatTuple (p1, p2, rest) -> union_map_list walk (p1 :: p2 :: rest) | PatConstruct (_, None) -> VarSet.empty | PatConstruct (_, Some q) -> walk q | PatType (q, _) -> walk q - | PatTuple (p1, p2, rest) -> union_map_list walk (p1 :: p2 :: rest) | PatUnit | PatList _ | PatOption _ -> VarSet.empty in walk p @@ -62,12 +63,12 @@ let rec collect_free_vars = function (match else_opt with | None -> [] | Some e -> [ e ])) + | ExpTuple (e1, e2, rest) -> union_map_list collect_free_vars (e1 :: e2 :: rest) | ExpConstruct (_, None) -> VarSet.empty | ExpConstruct (_, Some e) -> collect_free_vars e | ExpTypeAnnotation (e, _) -> collect_free_vars e | ExpBinOper (_, e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) | ExpUnarOper (_, e) -> collect_free_vars e - | ExpTuple (e1, e2, rest) -> union_map_list collect_free_vars (e1 :: e2 :: rest) | ExpList es -> union_map_list collect_free_vars es | ExpOption e_opt -> (match e_opt with @@ -115,8 +116,11 @@ let extend_capture_env env pat captured_set = let rec add_captures_for_pat acc = function | PatAny | PatConst _ | PatConstruct (_, None) -> acc | PatVariable name -> EnvMap.add name captured_set acc + | PatTuple (p1, p2, rest) -> + let acc = add_captures_for_pat acc p1 in + let acc = add_captures_for_pat acc p2 in + List.fold_left add_captures_for_pat acc rest | PatConstruct (_, Some p) | PatType (p, _) -> add_captures_for_pat acc p - | PatTuple (p1, p2, rest) -> List.fold_left add_captures_for_pat acc (p1 :: p2 :: rest) | PatUnit | PatList _ | PatOption _ -> acc in add_captures_for_pat env pat @@ -203,6 +207,19 @@ and convert_expr = function return (Some e') in return (ExpBranch (cond', then_e', else_e')) + | ExpTuple (e1, e2, rest) -> + let* e1' = convert_expr e1 in + let* e2' = convert_expr e2 in + let* rest' = + List.fold_right + (fun e acc -> + let* e' = convert_expr e in + let* rest_acc = acc in + return (e' :: rest_acc)) + rest + (return []) + in + return (ExpTuple (e1', e2', rest')) | ExpConstruct (_, None) as e -> return e | ExpConstruct (tag, Some e) -> let* e' = convert_expr e in @@ -228,19 +245,6 @@ and convert_expr = function (return []) in return (ExpList es') - | ExpTuple (e1, e2, rest) -> - let* e1' = convert_expr e1 in - let* e2' = convert_expr e2 in - let* rest' = - List.fold_right - (fun e acc -> - let* e' = convert_expr e in - let* acc' = acc in - return (e' :: acc')) - rest - (return []) - in - return (ExpTuple (e1', e2', rest')) | ExpOption e_opt -> (match e_opt with | None -> return (ExpOption None) @@ -327,7 +331,10 @@ let convert_item = function , SValue (rec_flag, (pat', expr'), rest_binds) ) ;; -let builtin_globals = var_set_of_list (Frontend.Binutils.primitive_names ~enable_gc:true) +let builtin_globals = + var_set_of_list (List.map (fun f -> f.name) predefined_runtime_funcs) +;; + let initial_context = { globals = builtin_globals; env = EnvMap.empty } let closure_conversion_result (program : Frontend.Ast.program) From 8db8aafb64d9270876fd0686481a8857ba2d92e0 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:33:22 +0300 Subject: [PATCH 41/74] add llvm runtime Signed-off-by: Victoria Ostrovskaya --- EML/lib/runtime/llvm_runtime.c | 339 ++++++++++++++++++ .../runtime/{runtime.c => riscv_runtime.c} | 0 2 files changed, 339 insertions(+) create mode 100644 EML/lib/runtime/llvm_runtime.c rename EML/lib/runtime/{runtime.c => riscv_runtime.c} (100%) diff --git a/EML/lib/runtime/llvm_runtime.c b/EML/lib/runtime/llvm_runtime.c new file mode 100644 index 00000000..7aafdbf1 --- /dev/null +++ b/EML/lib/runtime/llvm_runtime.c @@ -0,0 +1,339 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +typedef void *eml_value; + +static int64_t tag_int_val(int64_t n) { return (n << 1) | 1; } + +#if defined(ENABLE_GC) +#define SIZE_HEAP_DEFAULT 800 +#define HEADER_WORDS 1 +static size_t size_heap; +#define MAX_STACK_SCAN_SLOTS (128 * 1024) + +typedef enum { + TAG_TUPLE = 0, + TAG_CLOSURE = 1, + TAG_LAST +} gc_tag; + +static const size_t TAG_SCAN_START[] = { + [TAG_TUPLE] = 1, + [TAG_CLOSURE] = 3, +}; + +#define IS_INT(v) ((v) & 0x1) +#define IS_PTR(v) ((v) != 0 && !IS_INT(v)) + +typedef struct { + uint8_t tag; + uint8_t _pad1; + uint16_t size; + uint32_t _pad2; +} box_header_t; + +static inline box_header_t *get_header(uint64_t *payload) { + return (box_header_t *)((uint64_t *)payload - 1); +} +static inline uint64_t *get_payload(box_header_t *hdr) { + return (uint64_t *)(hdr + 1); +} + +typedef struct { + uint64_t *start[2]; + uint64_t *end[2]; + uint64_t *alloc_ptr; + int current_bank; + uint64_t allocations; + uint64_t collections; + uint64_t words_allocated_total; +} gc_state; + +static gc_state GC; +static uint64_t *PTR_STACK = NULL; +static uint64_t *STACK_SCAN_LOW = NULL; /* range we are scanning in collect() */ +static uint64_t *STACK_SCAN_HIGH = NULL; +static bool gc_enabled; + +static inline int get_current_bank_idx(void) { return GC.current_bank; } +static inline int get_another_bank_idx(void) { return GC.current_bank ^ 1; } +static inline bool in_bank(uint64_t *ptr, int bank_idx) { + return (GC.start[bank_idx] <= ptr) && (ptr < GC.end[bank_idx]); +} + +static void mark_and_copy(uint64_t *stack_slot); + +static uint64_t *forward_or_copy(uint64_t *old_payload, bool *did_copy) { + int from_bank = get_another_bank_idx(); + if (old_payload <= (uint64_t *)GC.start[from_bank] + HEADER_WORDS - 1) + return old_payload; + box_header_t *old_header = get_header(old_payload); + if (old_header->tag >= TAG_LAST || old_header->size == 0 || + old_header->size > size_heap) { + *did_copy = false; + return old_payload; + } + uint64_t possible_forward_ptr = *((uint64_t *)old_payload - 1); + if (in_bank((uint64_t *)possible_forward_ptr, get_current_bank_idx())) { + *did_copy = false; + return (uint64_t *)possible_forward_ptr; + } + *did_copy = true; + uint16_t payload_words = old_header->size; + uint8_t object_tag = old_header->tag; + if (GC.alloc_ptr + payload_words + HEADER_WORDS > GC.end[GC.current_bank]) { + *did_copy = false; + return old_payload; + } + box_header_t *new_header = (box_header_t *)GC.alloc_ptr; + new_header->tag = object_tag; + new_header->size = payload_words; + uint64_t *new_payload = get_payload(new_header); + memcpy(new_payload, old_payload, payload_words * sizeof(uint64_t)); + GC.alloc_ptr += payload_words + HEADER_WORDS; + GC.words_allocated_total += payload_words + HEADER_WORDS; + *((uint64_t *)old_payload - 1) = (uint64_t)new_payload; + return new_payload; +} + +static void scan_object(uint64_t *obj) { + box_header_t *header = get_header(obj); + size_t start = (header->tag < TAG_LAST) ? TAG_SCAN_START[header->tag] : 0; + for (size_t i = start; i < header->size; i++) + mark_and_copy(obj + i); +} + +static void mark_and_copy(uint64_t *stack_slot) { + uint64_t raw_value = *stack_slot; + if (!IS_PTR(raw_value)) return; + uint64_t *old_object_payload = (uint64_t *)raw_value; + int another_bank = get_another_bank_idx(); + if (!in_bank(old_object_payload, another_bank)) { + if (STACK_SCAN_LOW && STACK_SCAN_HIGH) { + uint64_t *low = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_LOW : STACK_SCAN_HIGH; + uint64_t *high = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_HIGH : STACK_SCAN_LOW; + if (old_object_payload >= low && old_object_payload <= high) + return; /* skip stack pointer */ + } + return; + } + if (old_object_payload < (uint64_t *)GC.start[another_bank] + HEADER_WORDS) + return; + bool object_was_copied_now; + uint64_t *new_object_payload = + forward_or_copy(old_object_payload, &object_was_copied_now); + *stack_slot = (uint64_t)new_object_payload; + if (object_was_copied_now) scan_object(new_object_payload); +} + +void collect(void) { + uint64_t dummy; + uint64_t *current_stack_top = &dummy; + if (!PTR_STACK || current_stack_top > PTR_STACK) return; + STACK_SCAN_LOW = current_stack_top; + STACK_SCAN_HIGH = PTR_STACK; + GC.current_bank ^= 1; + GC.alloc_ptr = GC.start[GC.current_bank]; + { + uint64_t *stack_slot = current_stack_top; + size_t n = 0; + for (; n < MAX_STACK_SCAN_SLOTS && stack_slot <= PTR_STACK; n++, stack_slot++) + mark_and_copy(stack_slot); + } + STACK_SCAN_LOW = NULL; + STACK_SCAN_HIGH = NULL; + GC.collections++; +} + +void allocate_banks(void) { + for (int i = 0; i < 2; i++) { + GC.start[i] = malloc(size_heap * sizeof(uint64_t)); + GC.end[i] = GC.start[i] + size_heap; + } +} + +void init_gc(void) { + gc_enabled = true; + size_heap = SIZE_HEAP_DEFAULT; + { + const char *heap_size_env = getenv("EML_HEAP_SIZE"); + if (heap_size_env) { + int heap_size_val = atoi(heap_size_env); + if (heap_size_val >= 400 && heap_size_val <= 1024 * 1024) + size_heap = (size_t)heap_size_val; + } + } + allocate_banks(); + GC.current_bank = 0; + GC.alloc_ptr = GC.start[0]; + GC.words_allocated_total = 0; +} + +void destroy_gc(void) { + for (int i = 0; i < 2; i++) + free(GC.start[i]); +} + +void set_ptr_stack(uint64_t *stack_bottom) { PTR_STACK = stack_bottom; } + +eml_value print_gc_status(void) { + int bank = GC.current_bank; + ptrdiff_t current_alloc = GC.alloc_ptr - GC.start[bank]; + ptrdiff_t free_space = GC.end[bank] - GC.alloc_ptr; + printf("=== GC Status ===\n"); + printf("Current allocated: %td\n", current_alloc); + printf("Total allocated: %" PRIu64 "\n", total); + printf("Free space: %td\n", free_space); + printf("Heap size: %d\n", SIZE_HEAP); + printf("Current bank index: %d\n", bank); + printf("GC collections: %" PRIu64 "\n", collections); + printf("GC allocations: %" PRIu64 "\n", allocations); + printf("=================\n"); + fflush(stdout); + return (eml_value)(uintptr_t)tag_int_val(0); +} + +uint64_t *gc_alloc(size_t words, uint64_t tag) { + size_t total_words = words + HEADER_WORDS; + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank] && + (collect(), GC.alloc_ptr + total_words > GC.end[GC.current_bank])) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + box_header_t *header = (box_header_t *)GC.alloc_ptr; + *header = (box_header_t){ .tag = (uint8_t)tag, .size = (uint16_t)words }; + uint64_t *obj = get_payload(header); + memset(obj, 0, words * sizeof(uint64_t)); + GC.alloc_ptr += total_words; + GC.allocations++; + GC.words_allocated_total += total_words; + return obj; +} + +static void *eml_alloc(size_t bytes, uint64_t tag) { + if (gc_enabled) { + size_t words = (bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); + return gc_alloc(words, tag); + } + return malloc(bytes); +} + +int64_t get_heap_start(void) { return tag_int_val(0); } +int64_t get_heap_final(void) { return tag_int_val((int64_t)size_heap); } + +#else +eml_value print_gc_status(void) { + (void)printf("GC disabled\n"); + return (eml_value)(uintptr_t)tag_int_val(0); +} +eml_value collect(void) { return (eml_value)(uintptr_t)tag_int_val(0); } +void init_gc(void) {} +void destroy_gc(void) {} +int64_t get_heap_start(void) { return tag_int_val(0); } +int64_t get_heap_final(void) { return tag_int_val(0); } +void set_ptr_stack(uint64_t *stack_bottom) { (void)stack_bottom; } +static void *eml_alloc(size_t bytes, uint64_t tag) { (void)tag; return malloc(bytes); } +#endif + +void print_int(int64_t tagged_n) { + printf("%ld\n", (long)(tagged_n >> 1)); +} + +typedef struct { + void *code; + int64_t arity; + int64_t received; + void *args[]; +} closure; + +extern void *llvm_call_indirect(void *fn, void **args, int64_t n); + +static void *call_closure_full(closure *c, void **args) { + return llvm_call_indirect(c->code, args, c->arity); +} + +void *eml_applyN(closure *c, int64_t argc, void **argv); + +closure *alloc_closure(void *code, int64_t arity) { + size_t slots = (arity > 0) ? (size_t)arity : 1; + size_t sz = sizeof(closure) + slots * sizeof(void *); +#if defined(ENABLE_GC) + closure *c = (closure *)eml_alloc(sz, TAG_CLOSURE); +#else + closure *c = (closure *)malloc(sz); +#endif + c->code = code; + c->arity = arity; + c->received = 0; + memset(c->args, 0, slots * sizeof(void *)); + return c; +} + +static closure *copy_closure(const closure *src) { + size_t slots = (src->arity > 0) ? (size_t)src->arity : 1; + size_t sz = sizeof(closure) + slots * sizeof(void *); +#if defined(ENABLE_GC) + closure *dst = (closure *)eml_alloc(sz, TAG_CLOSURE); +#else + closure *dst = (closure *)malloc(sz); +#endif + memcpy(dst, src, sz); + return dst; +} + +void *apply1(closure *c, int64_t arg) { + void *argv[1]; + argv[0] = (void *)(uintptr_t)arg; + return eml_applyN(c, 1, argv); +} + +void *eml_applyN(closure *c, int64_t argc, void **argv) { + int64_t all = c->received + argc; + if (all == c->arity) { +#if defined(ENABLE_GC) + void **all_args = (void **)eml_alloc((size_t)c->arity * sizeof(void *), TAG_CLOSURE); +#else + void **all_args = (void **)malloc((size_t)c->arity * sizeof(void *)); +#endif + for (int64_t i = 0; i < c->received; i++) all_args[i] = c->args[i]; + for (int64_t i = 0; i < argc; i++) all_args[c->received + i] = argv[i]; + void *result = call_closure_full(c, all_args); +#if !defined(ENABLE_GC) + free(all_args); +#endif + return result; + } + closure *partial = copy_closure(c); + for (int64_t i = 0; i < argc; i++) + partial->args[partial->received++] = argv[i]; + return partial; +} + +typedef struct { int64_t arity; void *args[]; } tuple; + +tuple *create_tuple(int64_t argc, void **args) { + size_t words = 1 + (size_t)argc; +#if defined(ENABLE_GC) + tuple *t = (tuple *)eml_alloc(words * sizeof(uint64_t), TAG_TUPLE); +#else + tuple *t = (tuple *)malloc(words * sizeof(uint64_t)); +#endif + t->arity = argc; + for (size_t i = 0; i < (size_t)argc; i++) t->args[i] = args[i]; + return t; +} + +void *field(tuple *t, long n) { return t->args[n >> 1]; } + +extern void eml_main(void); + +int main(void) { + eml_main(); + return 0; +} diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/riscv_runtime.c similarity index 100% rename from EML/lib/runtime/runtime.c rename to EML/lib/runtime/riscv_runtime.c From d45a788c21cb935732b3d3033cd4d217cb262b23 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:38:23 +0300 Subject: [PATCH 42/74] rename llvm to llvm_ir Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/{llvm => llvm_ir}/analysis.ml | 0 EML/lib/backend/{llvm => llvm_ir}/architecture.ml | 0 EML/lib/backend/{llvm => llvm_ir}/generator.ml | 0 EML/lib/backend/{llvm => llvm_ir}/generator_state.ml | 0 EML/lib/backend/{llvm => llvm_ir}/runner.ml | 0 EML/lib/backend/{llvm => llvm_ir}/runner.mli | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename EML/lib/backend/{llvm => llvm_ir}/analysis.ml (100%) rename EML/lib/backend/{llvm => llvm_ir}/architecture.ml (100%) rename EML/lib/backend/{llvm => llvm_ir}/generator.ml (100%) rename EML/lib/backend/{llvm => llvm_ir}/generator_state.ml (100%) rename EML/lib/backend/{llvm => llvm_ir}/runner.ml (100%) rename EML/lib/backend/{llvm => llvm_ir}/runner.mli (100%) diff --git a/EML/lib/backend/llvm/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml similarity index 100% rename from EML/lib/backend/llvm/analysis.ml rename to EML/lib/backend/llvm_ir/analysis.ml diff --git a/EML/lib/backend/llvm/architecture.ml b/EML/lib/backend/llvm_ir/architecture.ml similarity index 100% rename from EML/lib/backend/llvm/architecture.ml rename to EML/lib/backend/llvm_ir/architecture.ml diff --git a/EML/lib/backend/llvm/generator.ml b/EML/lib/backend/llvm_ir/generator.ml similarity index 100% rename from EML/lib/backend/llvm/generator.ml rename to EML/lib/backend/llvm_ir/generator.ml diff --git a/EML/lib/backend/llvm/generator_state.ml b/EML/lib/backend/llvm_ir/generator_state.ml similarity index 100% rename from EML/lib/backend/llvm/generator_state.ml rename to EML/lib/backend/llvm_ir/generator_state.ml diff --git a/EML/lib/backend/llvm/runner.ml b/EML/lib/backend/llvm_ir/runner.ml similarity index 100% rename from EML/lib/backend/llvm/runner.ml rename to EML/lib/backend/llvm_ir/runner.ml diff --git a/EML/lib/backend/llvm/runner.mli b/EML/lib/backend/llvm_ir/runner.mli similarity index 100% rename from EML/lib/backend/llvm/runner.mli rename to EML/lib/backend/llvm_ir/runner.mli From 9dbe84aea6e9f53b7cfb3e4d45a42853e2170838 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:39:01 +0300 Subject: [PATCH 43/74] add backend choosing at entry point Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 0d4e2a8b..11b96df8 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -6,13 +6,20 @@ open Stdio open EML_lib open Frontend +type backend = + | Ricsv + | Llvm + type opts = { input_file : string option ; output_file : string option ; enable_gc : bool + ; backend : backend } -let default_opts = { input_file = None; output_file = None; enable_gc = false } +let default_opts = + { input_file = None; output_file = None; enable_gc = false; backend = Ricsv } +;; type env = Inferencer.TypeEnv.t @@ -45,11 +52,15 @@ let with_middleend ast _env' f : (env, unit) Result.t = | Ok anf_ast -> f anf_ast ;; -let run_compile ~enable_gc text env oc : (env, unit) Result.t = +let run_compile text env oc ~backend ~enable_gc : (env, unit) Result.t = with_frontend text env oc (fun ast env' _out_list -> with_middleend ast env' (fun anf_ast -> let ppf = Format.formatter_of_out_channel oc in - let res = Backend.Ricsv.Runner.gen_program ~enable_gc ppf anf_ast in + let res = + match backend with + | Ricsv -> Backend.Ricsv.Runner.gen_program ~enable_gc ppf anf_ast + | Llvm -> Backend.Llvm_ir.Runner.gen_program ~enable_gc ppf anf_ast + in match res with | Ok () -> Ok env' | Error msg -> @@ -91,10 +102,15 @@ let parse_args () : (opts, unit) Result.t = let input_file = ref default_opts.input_file in let output_file = ref default_opts.output_file in let enable_gc = ref default_opts.enable_gc in + let backend = ref Ricsv in let positional_seen = ref false in let open Arg in let spec = - [ "-fromfile", String (fun s -> input_file := Some s), " Read source from file" + [ ( "-backend" + , Symbol + ([ "ricsv"; "llvm" ], fun s -> backend := if s = "llvm" then Llvm else Ricsv) + , " Code generation backend (default: ricsv)" ) + ; "-fromfile", String (fun s -> input_file := Some s), " Read source from file" ; "-o", String (fun s -> output_file := Some s), " Write output to file" ; "-gc", Set enable_gc, " Enable GC runtime support" ] @@ -102,7 +118,13 @@ let parse_args () : (opts, unit) Result.t = parse spec (fun _ -> positional_seen := true) "Compiler for custom language"; if !positional_seen then Error () - else Ok { input_file = !input_file; output_file = !output_file; enable_gc = !enable_gc } + else + Ok + { input_file = !input_file + ; output_file = !output_file + ; enable_gc = !enable_gc + ; backend = !backend + } ;; let () = From 01c60bd6099bb9bcfed06e7071315c3bf5576fc3 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:47:51 +0300 Subject: [PATCH 44/74] fix errors Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 4 +++- EML/lib/runtime/dune | 4 ++-- EML/lib/runtime/primitives.ml | 2 +- EML/lib/runtime/riscv_runtime.c | 14 +++++++------- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 11b96df8..fa6fb08a 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -73,7 +73,9 @@ let run_compile text env oc ~backend ~enable_gc : (env, unit) Result.t = (* ------------------------------------------------------------------------- *) let compiler opts : (unit, unit) Result.t = - let run text env oc = run_compile ~enable_gc:opts.enable_gc text env oc in + let run text env oc = + run_compile text env oc ~backend:opts.backend ~enable_gc:opts.enable_gc + in let env0 = if opts.enable_gc then Inferencer.TypeEnv.env_with_gc diff --git a/EML/lib/runtime/dune b/EML/lib/runtime/dune index 560b79ce..49fd9378 100644 --- a/EML/lib/runtime/dune +++ b/EML/lib/runtime/dune @@ -1,6 +1,6 @@ (rule (targets rv64_runtime.a) - (deps runtime.c) + (deps riscv_runtime.c) (action (progn (run @@ -9,7 +9,7 @@ -mabi=lp64d -O2 -c - %{dep:runtime.c} + %{dep:riscv_runtime.c} -o rv64_runtime.o) (run riscv64-linux-gnu-ar rcs %{targets} rv64_runtime.o)))) diff --git a/EML/lib/runtime/primitives.ml b/EML/lib/runtime/primitives.ml index 2b75f442..2827a0d9 100644 --- a/EML/lib/runtime/primitives.ml +++ b/EML/lib/runtime/primitives.ml @@ -39,5 +39,5 @@ let predefined_runtime_funcs : runtime_func_sig list = ;; let runtime_primitive_arities : (string * int) list = - List.map (fun { name; args } -> name, List.length args) predefined_runtime_funcs + List.map (fun { name; args; _ } -> name, List.length args) predefined_runtime_funcs ;; diff --git a/EML/lib/runtime/riscv_runtime.c b/EML/lib/runtime/riscv_runtime.c index 6aca48fb..11bb97e4 100644 --- a/EML/lib/runtime/riscv_runtime.c +++ b/EML/lib/runtime/riscv_runtime.c @@ -214,13 +214,13 @@ void print_gc_status() { uint64_t allocations = GC.allocations; printf("=== GC Status ===\n"); - printf("Current allocated: %td\n", current_alloc); - printf("Total allocated: %" PRIu64 "\n", total); - printf("Free space: %td\n", free_space); - printf("Heap size: %d\n", SIZE_HEAP); - printf("Current bank index: %d\n", bank); - printf("GC collections: %" PRIu64 "\n", collections); - printf("GC allocations: %" PRIu64 "\n", allocations); + printf("Current allocated: %td\n", current_alloc); + printf("Total allocated: %" PRIu64 "\n", total); + printf("Free space: %td\n", free_space); + printf("Heap size: %d\n", SIZE_HEAP); + printf("Current bank index: %d\n", bank); + printf("GC collections: %" PRIu64 "\n", collections); + printf("GC allocations: %" PRIu64 "\n", allocations); printf("=================\n"); fflush(stdout); } From e76974bbb322f52b1a332fbb88c70ba6b825bb50 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 13:50:38 +0300 Subject: [PATCH 45/74] update test expectes Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/architecture.ml | 29 +++++-- EML/tests/riscv_tests.ml | 115 +++++++++++--------------- 2 files changed, 69 insertions(+), 75 deletions(-) diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index 3eee95df..c2336060 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -126,19 +126,36 @@ module Riscv_backend = struct let saved_fp_offset = 0 let saved_ra_offset = word_size + (* addi/sd/ld immediate is 12-bit signed: -2048 .. 2047 *) + let max_addi_imm = 2048 + let rec sub_sp n = + if n <= 0 then [] + else if n <= max_addi_imm then addi sp sp (-n) + else addi sp sp (-max_addi_imm) @ sub_sp (n - max_addi_imm) + + let addi_or_li_add rd rs imm = + if imm >= -max_addi_imm && imm <= max_addi_imm - 1 then addi rd rs imm + else li t0 imm @ add rd rs t0 + + (* Store at sp+offset; use direct sd when offset in 12-bit range *) + let sd_at_sp_offset reg offset = + if offset >= -2048 && offset <= 2047 then sd reg (sp, offset) + else addi_or_li_add t0 sp offset @ sd reg (t0, 0) + type location = | Loc_reg of reg | Loc_mem of offset let prologue ~enable_gc ~name ~stack_size = - let ra_slot = sp, stack_size - saved_ra_offset in - let fp_slot = sp, stack_size - frame_header_size in + let ra_offset = stack_size - saved_ra_offset in + let fp_offset = stack_size - frame_header_size in + let fp_imm = stack_size - frame_header_size in let base = label name - @ addi sp sp (-stack_size) - @ sd ra ra_slot - @ sd fp fp_slot - @ addi fp sp (stack_size - frame_header_size) + @ sub_sp stack_size + @ sd_at_sp_offset ra ra_offset + @ sd_at_sp_offset fp fp_offset + @ addi_or_li_add fp sp fp_imm in if enable_gc && String.equal name "main" then base @ call "init_gc" @ mv a0 fp @ call "set_ptr_stack" diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml index 83b585f6..e1c40656 100644 --- a/EML/tests/riscv_tests.ml +++ b/EML/tests/riscv_tests.ml @@ -34,11 +34,10 @@ let%expect_test "unary_minus" = .globl x .type x, @function x: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) li t0, 11 li a0, 1 sub a0, a0, t0 @@ -50,11 +49,10 @@ x: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 1 addi sp, fp, 16 ld ra, 8(fp) @@ -72,11 +70,10 @@ let%expect_test "unary_not" = .globl x .type x, @function x: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) li t0, 3 xori a0, t0, 3 addi sp, fp, 16 @@ -87,11 +84,10 @@ x: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 1 addi sp, fp, 16 ld ra, 8(fp) @@ -109,11 +105,10 @@ let%expect_test "unit_main" = .globl main .type main, @function main: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 1 addi sp, fp, 16 ld ra, 8(fp) @@ -131,11 +126,10 @@ let%expect_test "mul_only" = .globl main .type main, @function main: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) li t0, 15 li t1, 17 srli t0, t0, 1 @@ -162,11 +156,10 @@ let%expect_test "double_fn" = .globl double .type double, @function double: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) ld t0, -8(fp) ld t1, -8(fp) @@ -180,11 +173,10 @@ double: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) addi fp, sp, 184 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 43 call double addi sp, fp, 16 @@ -207,11 +199,10 @@ let%expect_test "abs_fn" = .globl abs .type abs, @function abs: - mv t1, fp addi sp, sp, -32 + sd ra, 24(sp) + sd fp, 16(sp) addi fp, sp, 16 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) ld t0, -8(fp) li t1, 1 @@ -237,11 +228,10 @@ end_0: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) addi fp, sp, 184 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 15 call abs addi sp, fp, 16 @@ -265,11 +255,10 @@ let%expect_test "nested_calls" = .globl sq .type sq, @function sq: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) ld t0, -8(fp) ld t1, -8(fp) @@ -285,11 +274,10 @@ sq: .globl sum_of_squares .type sum_of_squares, @function sum_of_squares: - mv t1, fp addi sp, sp, -400 + sd ra, 392(sp) + sd fp, 384(sp) addi fp, sp, 384 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) sd a1, -16(fp) ld a0, -8(fp) @@ -310,11 +298,10 @@ sum_of_squares: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -208 + sd ra, 200(sp) + sd fp, 192(sp) addi fp, sp, 192 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 7 li a1, 9 call sum_of_squares @@ -338,11 +325,10 @@ let%expect_test "fibonacci" = .globl fib .type fib, @function fib: - mv t1, fp addi sp, sp, -432 + sd ra, 424(sp) + sd fp, 416(sp) addi fp, sp, 416 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) ld t0, -8(fp) li t1, 5 @@ -385,11 +371,10 @@ end_0: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) addi fp, sp, 184 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 13 call fib addi sp, fp, 16 @@ -412,11 +397,10 @@ let%expect_test "is_positive" = .globl is_positive .type is_positive, @function is_positive: - mv t1, fp addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) addi fp, sp, 0 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) ld t0, -8(fp) li t1, 1 @@ -431,11 +415,10 @@ is_positive: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -200 + sd ra, 192(sp) + sd fp, 184(sp) addi fp, sp, 184 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 85 call is_positive addi sp, fp, 16 @@ -458,11 +441,10 @@ let%expect_test "mul3" = .globl mul3 .type mul3, @function mul3: - mv t1, fp addi sp, sp, -24 + sd ra, 16(sp) + sd fp, 8(sp) addi fp, sp, 8 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) sd a1, -16(fp) sd a2, -24(fp) @@ -487,11 +469,10 @@ mul3: .globl main .type main, @function main: - mv t1, fp addi sp, sp, -216 + sd ra, 208(sp) + sd fp, 200(sp) addi fp, sp, 200 - sd ra, 8(fp) - sd t1, 0(fp) li a0, 5 li a1, 7 li a2, 9 @@ -521,11 +502,10 @@ let%expect_test "test1" = .globl large .type large, @function large: - mv t1, fp addi sp, sp, -400 + sd ra, 392(sp) + sd fp, 384(sp) addi fp, sp, 384 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) li t0, 1 ld t1, -8(fp) @@ -552,11 +532,10 @@ let%expect_test "test1" = .globl main .type main, @function main: - mv t1, fp addi sp, sp, -440 + sd ra, 432(sp) + sd fp, 424(sp) addi fp, sp, 424 - sd ra, 8(fp) - sd t1, 0(fp) li t0, 1 li t1, 1 beq t0, t1, else_1 @@ -615,11 +594,10 @@ let%expect_test "codegen closure fn with 10 arg" = .globl add .type add, @function add: - mv t1, fp addi sp, sp, -56 + sd ra, 48(sp) + sd fp, 40(sp) addi fp, sp, 40 - sd ra, 8(fp) - sd t1, 0(fp) sd a0, -8(fp) sd a1, -16(fp) sd a2, -24(fp) @@ -664,11 +642,10 @@ let%expect_test "codegen closure fn with 10 arg" = .globl main .type main, @function main: - mv t1, fp addi sp, sp, -816 + sd ra, 808(sp) + sd fp, 800(sp) addi fp, sp, 800 - sd ra, 8(fp) - sd t1, 0(fp) la a0, add li a1, 7 call alloc_closure From cfc10b88d773d7dded00a91843679c0735ab567c Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Sun, 8 Mar 2026 14:01:28 +0300 Subject: [PATCH 46/74] add tests Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/ricsv/architecture.ml | 16 +- EML/tests/Makefile | 18 +- EML/tests/dune | 7 + EML/tests/gc_tests.t | 168 ++++++++-------- EML/tests/llvm.t | 18 ++ EML/tests/llvm_tests.ml | 267 ++++++++++++++++++++++++++ EML/tests/tuple_tests.t | 84 ++++---- 7 files changed, 446 insertions(+), 132 deletions(-) create mode 100644 EML/tests/llvm.t create mode 100644 EML/tests/llvm_tests.ml diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index c2336060..0956b77b 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -128,19 +128,27 @@ module Riscv_backend = struct (* addi/sd/ld immediate is 12-bit signed: -2048 .. 2047 *) let max_addi_imm = 2048 + let rec sub_sp n = - if n <= 0 then [] - else if n <= max_addi_imm then addi sp sp (-n) + if n <= 0 + then [] + else if n <= max_addi_imm + then addi sp sp (-n) else addi sp sp (-max_addi_imm) @ sub_sp (n - max_addi_imm) + ;; let addi_or_li_add rd rs imm = - if imm >= -max_addi_imm && imm <= max_addi_imm - 1 then addi rd rs imm + if imm >= -max_addi_imm && imm <= max_addi_imm - 1 + then addi rd rs imm else li t0 imm @ add rd rs t0 + ;; (* Store at sp+offset; use direct sd when offset in 12-bit range *) let sd_at_sp_offset reg offset = - if offset >= -2048 && offset <= 2047 then sd reg (sp, offset) + if offset >= -2048 && offset <= 2047 + then sd reg (sp, offset) else addi_or_li_add t0 sp offset @ sd reg (t0, 0) + ;; type location = | Loc_reg of reg diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 035c7d65..f208db6d 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -5,12 +5,12 @@ RUNTIME_A_DEFAULT := ../_build/default/lib/runtime/rv64_runtime.a EML_ROOT ?= $(CURDIR)/.. EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) -ARGS := $(filter-out compile,$(MAKECMDGOALS)) +ARGS := $(filter-out compile compile_llvm,$(MAKECMDGOALS)) USE_GC := $(if $(filter 1 true yes on,$(GC)),1,0) INPUT := $(firstword $(ARGS)) EXTRA_GOALS := $(ARGS) -.PHONY: compile $(EXTRA_GOALS) +.PHONY: compile compile_llvm $(EXTRA_GOALS) compile: @set -euo pipefail; \ @@ -47,5 +47,19 @@ compile: riscv64-linux-gnu-gcc "$$OBJ_FILE" "$$RUNTIME_A" -o "$$EXE_FILE"; \ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" +# LLVM: compile source to .ll (no link/run; needs llvm_call_indirect for full run) +compile_llvm: + @set -euo pipefail; \ + FILE="$(INPUT)"; \ + if [[ -z "$$FILE" ]]; then \ + echo "Usage: make compile_llvm " >&2; \ + exit 1; \ + fi; \ + if [[ ! -f "$$FILE" ]]; then echo "Source file not found: $$FILE" >&2; exit 1; fi; \ + SRC="$$(realpath "$$FILE")"; \ + TMP="$$(mktemp -d)"; trap 'rm -rf "$$TMP"' EXIT; \ + "$(EML_BIN)" -backend llvm -fromfile "$$SRC" -o "$$TMP/prog.ll"; \ + cat "$$TMP/prog.ll" + $(EXTRA_GOALS): @: diff --git a/EML/tests/dune b/EML/tests/dune index 53dc8aff..c2bfa4d9 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -34,3 +34,10 @@ (file ../lib/runtime/rv64_runtime.a) (source_tree tuple_tests) (source_tree many_tests))) + +(cram + (applies_to llvm) + (deps + (file ../bin/EML.exe) + (file Makefile) + (source_tree many_tests))) diff --git a/EML/tests/gc_tests.t b/EML/tests/gc_tests.t index 3148404e..25d5ce9e 100644 --- a/EML/tests/gc_tests.t +++ b/EML/tests/gc_tests.t @@ -4,117 +4,117 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile GC=1 gc_tests/closure/01_add5_staged_partial_gc.ml === GC Status === - Current allocated: 18 - Total allocated: 18 - Free space: 782 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 2 + Current allocated: 18 + Total allocated: 18 + Free space: 782 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 2 ================= === GC Status === - Current allocated: 27 - Total allocated: 27 - Free space: 773 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 3 + Current allocated: 27 + Total allocated: 27 + Free space: 773 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 ================= === GC Status === - Current allocated: 18 - Total allocated: 45 - Free space: 782 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 3 + Current allocated: 18 + Total allocated: 45 + Free space: 782 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 3 ================= === GC Status === - Current allocated: 27 - Total allocated: 81 - Free space: 773 - Heap size: 800 - Current bank index: 0 - GC collections: 2 - GC allocations: 4 + Current allocated: 27 + Total allocated: 81 + Free space: 773 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 4 ================= 15 $ make compile GC=1 gc_tests/closure/02_affine_live_dead_gc.ml === GC Status === - Current allocated: 28 - Total allocated: 28 - Free space: 772 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 4 + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 ================= === GC Status === - Current allocated: 14 - Total allocated: 42 - Free space: 786 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 4 + Current allocated: 14 + Total allocated: 42 + Free space: 786 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 4 ================= === GC Status === - Current allocated: 28 - Total allocated: 56 - Free space: 772 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 6 + Current allocated: 28 + Total allocated: 56 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 6 ================= === GC Status === - Current allocated: 21 - Total allocated: 77 - Free space: 779 - Heap size: 800 - Current bank index: 0 - GC collections: 2 - GC allocations: 6 + Current allocated: 21 + Total allocated: 77 + Free space: 779 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 6 ================= 17 $ make compile GC=1 gc_tests/closure/03_add10_staged_partial_gc.ml === GC Status === - Current allocated: 28 - Total allocated: 28 - Free space: 772 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 2 + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 2 ================= === GC Status === - Current allocated: 42 - Total allocated: 42 - Free space: 758 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 3 + Current allocated: 42 + Total allocated: 42 + Free space: 758 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 ================= === GC Status === - Current allocated: 28 - Total allocated: 70 - Free space: 772 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 3 + Current allocated: 28 + Total allocated: 70 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 3 ================= === GC Status === - Current allocated: 42 - Total allocated: 126 - Free space: 758 - Heap size: 800 - Current bank index: 0 - GC collections: 2 - GC allocations: 4 + Current allocated: 42 + Total allocated: 126 + Free space: 758 + Heap size: 800 + Current bank index: 0 + GC collections: 2 + GC allocations: 4 ================= 55 diff --git a/EML/tests/llvm.t b/EML/tests/llvm.t new file mode 100644 index 00000000..1a576c7e --- /dev/null +++ b/EML/tests/llvm.t @@ -0,0 +1,18 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + $ make compile_llvm many_tests/typed/001fac.ml > /dev/null + + $ make compile_llvm many_tests/typed/003fib.ml > /dev/null + + $ make compile_llvm many_tests/typed/004manyargs.ml > /dev/null + + $ make compile_llvm many_tests/typed/005fix.ml > /dev/null + + $ make compile_llvm many_tests/typed/006partial2.ml > /dev/null + + $ make compile_llvm many_tests/typed/010fac_anf.ml > /dev/null + + $ make compile_llvm many_tests/typed/010faccps_ll.ml > /dev/null + + $ make compile_llvm many_tests/typed/010fibcps_ll.ml > /dev/null diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml new file mode 100644 index 00000000..c85d08e0 --- /dev/null +++ b/EML/tests/llvm_tests.ml @@ -0,0 +1,267 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +(** LLVM IR codegen tests (analogous to riscv_tests.ml). *) + +open EML_lib +open Frontend.Parser +open Middleend.Anf + +let compile_llvm src : string = + match parse src with + | Error e -> "Parse error: " ^ e + | Ok ast -> + (match anf_program ast with + | Error e -> "ANF error: " ^ e + | Ok anf -> + let buf = Buffer.create 4096 in + let ppf = Format.formatter_of_buffer buf in + (match Backend.Llvm_ir.Runner.gen_program ~enable_gc:false ppf anf with + | Ok () -> + Format.pp_print_flush ppf (); + Buffer.contents buf + | Error e -> "Codegen error: " ^ e)) +;; + +let run_llvm src = Format.printf "%s" (compile_llvm src) + +let%expect_test "unit_main" = + run_llvm "let main = ()"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @eml_main() { +entry: + ret ptr inttoptr (i64 1 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "int_main" = + run_llvm "let main = 42"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @eml_main() { +entry: + ret ptr inttoptr (i64 85 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "unary_minus" = + run_llvm "let x = -5"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @x() { +entry: + ret ptr inttoptr (i64 -9 to ptr) +} + +define ptr @eml_main() { +entry: + ret ptr inttoptr (i64 1 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "unary_not" = + run_llvm "let x = not true"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @x() { +entry: + ret ptr inttoptr (i64 2 to ptr) +} + +define ptr @eml_main() { +entry: + ret ptr inttoptr (i64 1 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; + +let%expect_test "mul_only" = + run_llvm "let main = 7 * 8"; + [%expect + {| +; ModuleID = 'EML' +source_filename = "EML" + +declare ptr @eml_applyN(ptr, i64, ptr) + +declare ptr @create_tuple(i64, ptr) + +declare ptr @alloc_closure(ptr, i64) + +declare ptr @field(ptr, i64) + +declare ptr @llvm_call_indirect(ptr, ptr, i64) + +declare void @print_int(i64) + +declare void @init_gc() + +declare void @destroy_gc() + +declare void @set_ptr_stack(ptr) + +declare i64 @get_heap_start() + +declare i64 @get_heap_final() + +declare ptr @collect() + +declare ptr @print_gc_status() + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + +define ptr @eml_main() { +entry: + ret ptr inttoptr (i64 113 to ptr) +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +|}] +;; diff --git a/EML/tests/tuple_tests.t b/EML/tests/tuple_tests.t index 40382666..ed32c396 100644 --- a/EML/tests/tuple_tests.t +++ b/EML/tests/tuple_tests.t @@ -3,60 +3,60 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile GC=1 tuple_tests/01adder.ml === GC Status === - Current allocated: 13 - Total allocated: 13 - Free space: 787 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 3 + Current allocated: 13 + Total allocated: 13 + Free space: 787 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 3 ================= 42=== GC Status === - Current allocated: 13 - Total allocated: 28 - Free space: 787 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 4 + Current allocated: 13 + Total allocated: 28 + Free space: 787 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 4 ================= $ make compile GC=1 tuple_tests/02nested.ml === GC Status === - Current allocated: 23 - Total allocated: 23 - Free space: 777 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 4 + Current allocated: 23 + Total allocated: 23 + Free space: 777 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 ================= 90=== GC Status === - Current allocated: 23 - Total allocated: 48 - Free space: 777 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 5 + Current allocated: 23 + Total allocated: 48 + Free space: 777 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 5 ================= $ make compile GC=1 tuple_tests/03args.ml === GC Status === - Current allocated: 28 - Total allocated: 28 - Free space: 772 - Heap size: 800 - Current bank index: 0 - GC collections: 0 - GC allocations: 4 + Current allocated: 28 + Total allocated: 28 + Free space: 772 + Heap size: 800 + Current bank index: 0 + GC collections: 0 + GC allocations: 4 ================= 1053=== GC Status === - Current allocated: 28 - Total allocated: 58 - Free space: 772 - Heap size: 800 - Current bank index: 1 - GC collections: 1 - GC allocations: 5 + Current allocated: 28 + Total allocated: 58 + Free space: 772 + Heap size: 800 + Current bank index: 1 + GC collections: 1 + GC allocations: 5 ================= From bcd8c17be61a9be78a6968f9e1741fe5a6458d6d Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 16:39:13 +0300 Subject: [PATCH 47/74] upd --- EML/lib/backend/ricsv/analysis.ml | 53 +++++++--------- EML/lib/backend/ricsv/auxillary.ml | 29 +++------ EML/lib/backend/ricsv/generator.ml | 80 +++++++++--------------- EML/lib/backend/ricsv/runner.ml | 3 +- EML/lib/middleend/anf.ml | 99 ++++++++++++------------------ EML/tests/Makefile | 47 +++++--------- 6 files changed, 114 insertions(+), 197 deletions(-) diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index 8f108b29..b021bdb3 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -25,6 +25,9 @@ type analysis_result = let arg_regs_count = 8 +let sum_by f xs = List.fold_left (fun acc x -> acc + f x) 0 xs +let max_by f xs = List.fold_left (fun acc x -> max acc (f x)) 0 xs + let rec slots_in_imm = function | ImmediateVar _ | ImmediateConst _ -> 0 @@ -35,11 +38,11 @@ and slots_in_cexpr = function | ComplexUnarOper (_, imm) -> slots_in_imm imm | ComplexTuple (first, second, rest) -> let elts = first :: second :: rest in - List.length elts + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 elts + List.length elts + sum_by slots_in_imm elts | ComplexField (imm, _) -> slots_in_imm imm | ComplexList imm_list -> let n = List.length imm_list in - n + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 imm_list + n + sum_by slots_in_imm imm_list | ComplexApp (first, second, rest) -> (* +1 for curried-call intermediate; +1 per arg for spill_dangerous_args. +8 for spill_caller_saved_vars_to_frame at start of every invocation (can spill a0-a7). @@ -47,7 +50,7 @@ and slots_in_cexpr = function let args = first :: second :: rest in let nargs = List.length args in let extra = if nargs >= 2 then 12 else 0 in - 1 + 8 + nargs + extra + List.fold_left (fun acc e -> acc + slots_in_imm e) 0 args + 1 + 8 + nargs + extra + sum_by slots_in_imm args | ComplexOption None -> 0 | ComplexOption (Some imm) -> slots_in_imm imm | ComplexLambda (_, body) -> slots_in_anf body @@ -64,21 +67,14 @@ let rec max_stack_args_cexpr = function | ComplexBinOper (_, left, right) -> max (max_stack_args_imm left) (max_stack_args_imm right) | ComplexUnarOper (_, imm) -> max_stack_args_imm imm - | ComplexTuple (first, second, rest) -> - List.fold_left - (fun acc e -> max acc (max_stack_args_imm e)) - 0 - (first :: second :: rest) + | ComplexTuple (first, second, rest) -> max_by max_stack_args_imm (first :: second :: rest) | ComplexField (imm, _) -> max_stack_args_imm imm - | ComplexList imm_list -> - List.fold_left (fun acc e -> max acc (max_stack_args_imm e)) 0 imm_list + | ComplexList imm_list -> max_by max_stack_args_imm imm_list | ComplexApp (_first, second, rest) -> let nargs = 1 + List.length rest in (* Reserve enough for largest call: eml_applyN needs nargs words; direct needs max(0, nargs-8). *) let need = nargs in - let in_args = - List.fold_left (fun acc e -> max acc (max_stack_args_imm e)) 0 (second :: rest) - in + let in_args = max_by max_stack_args_imm (second :: rest) in max need in_args | ComplexOption None -> 0 | ComplexOption (Some imm) -> max_stack_args_imm imm @@ -104,17 +100,15 @@ let rec max_create_tuple_array_cexpr = function | ComplexTuple (first, second, rest) -> let elts = first :: second :: rest in let here = List.length elts * word_size in - List.fold_left (fun acc e -> max acc (max_create_tuple_array_imm e)) here elts + max here (max_by max_create_tuple_array_imm elts) | ComplexField (imm, _) -> max_create_tuple_array_imm imm | ComplexList imm_list -> (* Each cons adds 16 bytes; they accumulate along the list build *) let per_cons = 2 * word_size in - let from_elts = - List.fold_left (fun acc e -> acc + max_create_tuple_array_imm e) 0 imm_list - in + let from_elts = sum_by max_create_tuple_array_imm imm_list in (per_cons * List.length imm_list) + from_elts | ComplexApp (_f, second, rest) -> - List.fold_left (fun acc e -> max acc (max_create_tuple_array_imm e)) 0 (second :: rest) + max_by max_create_tuple_array_imm (second :: rest) | ComplexOption None -> 0 | ComplexOption (Some imm) -> max_create_tuple_array_imm imm | ComplexLambda (_, body) -> max_create_tuple_array_anf body @@ -147,14 +141,12 @@ let rec params_of_anf = function ;; let arity_map_of_program (program : anf_program) = + let add_fun_arity map (id, arity, _) = Base.Map.set map ~key:id ~data:arity in List.fold_left (fun map -> function | AnfValue (_, (fid, arity, _), and_binds) -> let map = Base.Map.set map ~key:fid ~data:arity in - List.fold_left - (fun acc (id, arity, _) -> Base.Map.set acc ~key:id ~data:arity) - map - and_binds + List.fold_left add_fun_arity map and_binds | _ -> map) (Base.Map.empty (module Base.String)) program @@ -226,15 +218,14 @@ let analyze (program : anf_program) = if has_main then arity_map else Base.Map.set arity_map ~key:"main" ~data:0 in let resolver func_index var_name = - let rec find i = - if i < 0 - then None - else ( - match Base.List.nth functions i with - | None -> None - | Some fn when String.equal fn.func_name var_name -> - Some (fn.asm_name, List.length fn.params) - | Some _ -> find (i - 1)) + let rec find = function + | i when i < 0 -> None + | i -> + (match Base.List.nth functions i with + | None -> None + | Some fn when String.equal fn.func_name var_name -> + Some (fn.asm_name, List.length fn.params) + | Some _ -> find (i - 1)) in find (func_index - 1) in diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml index e091400d..987e6970 100644 --- a/EML/lib/backend/ricsv/auxillary.ml +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -17,14 +17,12 @@ let to_tagged_bool dst = add dst dst dst @ add_tag_items dst 1 let compare_ordering dst left_reg right_reg ~invert = let base = slt dst left_reg right_reg in - let tagged = if invert then base @ xori dst dst 1 else base in - tagged @ to_tagged_bool dst + (if invert then base @ xori dst dst 1 else base) @ to_tagged_bool dst ;; let compare_eq_ne dst left_reg right_reg ~is_eq = let base = xor dst left_reg right_reg in - let tagged = if is_eq then base @ seqz dst dst else base @ snez dst dst in - tagged @ to_tagged_bool dst + (if is_eq then base @ seqz dst dst else base @ snez dst dst) @ to_tagged_bool dst ;; let bin_op dst op left_reg right_reg : (instr list, string) result = @@ -77,20 +75,13 @@ let vars_in_caller_saved_regs env = ;; let indices_of_args_to_spill state exps = - let is_rewrites_result_regs state = function + let rewrites_result_reg = function | ImmediateConst _ -> false | ImmediateVar id -> Base.Map.mem state.arity_map id in - List.rev - (snd - (List.fold_left - (fun (index, dangerous_indices) arg -> - ( index + 1 - , if is_rewrites_result_regs state arg - then index :: dangerous_indices - else dangerous_indices )) - (0, []) - exps)) + Base.List.foldi exps ~init:[] ~f:(fun i acc arg -> + if rewrites_result_reg arg then i :: acc else acc) + |> List.rev ;; type call_style = @@ -114,13 +105,7 @@ type call_style = let classify_call ~nargs ~callee_arity_opt ~fname ~args : call_style = match callee_arity_opt with | Some 0 when nargs = 1 -> Nullary fname - | Some arity when nargs > arity -> - Curry_chain - { fname - ; arity - ; first_args = Base.List.take args arity - ; rest_args = Base.List.drop args arity - } + | Some arity when nargs > arity -> Curry_chain { fname; arity; first_args = Base.List.take args arity; rest_args = Base.List.drop args arity } | Some arity when nargs = arity -> Direct { fname; args } | _ -> Via_apply_nargs { fname; nargs; args } ;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 2a24040e..1131dfc6 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -12,10 +12,7 @@ open Generator_state open Auxillary let alloc_frame_slot = - let modify_frame_offset f = - modify (fun state -> { state with frame_offset = f state.frame_offset }) - in - let* () = modify_frame_offset (fun offset -> offset + word_size) in + let* () = modify (fun state -> { state with frame_offset = state.frame_offset + word_size }) in let* state = get in return (fp, -state.frame_offset) ;; @@ -99,6 +96,15 @@ let resolve_call_symbol name = | None -> return name ;; +let resolve_symbol_and_arity state name = + match state.symbol_resolve state.current_func_index name with + | Some (asm_name, arity_val) -> asm_name, arity_val + | None -> + (match Base.Map.find state.arity_map name with + | Some arity_val -> name, arity_val + | None -> name, -1) +;; + let gen_imm dst = function | ImmediateConst (ConstInt n) -> append (li dst (tag_int n)) | ImmediateConst (ConstBool b) -> append (li dst (if b then tag_int 1 else tag_int 0)) @@ -110,14 +116,7 @@ let gen_imm dst = function | Some loc -> load_into_reg dst loc | None -> let* state = get in - let sym, arity = - match state.symbol_resolve state.current_func_index name with - | Some (asm_name, arity_val) -> asm_name, arity_val - | None -> - (match Base.Map.find state.arity_map name with - | Some arity_val -> name, arity_val - | None -> name, -1) - in + let sym, arity = resolve_symbol_and_arity state name in if arity < 0 then fail ("unbound variable: " ^ name) else ( @@ -160,6 +159,18 @@ let load_exps_into_regs spilled_locs arg_regs exps = | None -> gen_imm reg arg) ;; +let emit_args_to_stack spilled args = + Base.List.foldi args ~init:(return ()) ~f:(fun i acc arg -> + let* () = acc in + let offset = i * word_size in + let* () = + match Base.Map.find spilled i with + | Some loc -> load_into_reg t0 loc + | None -> gen_imm t0 arg + in + append (sd t0 (sp, offset))) +;; + let push_stack_args stack_args = let n = List.length stack_args in if n = 0 @@ -167,13 +178,8 @@ let push_stack_args stack_args = else ( let stack_bytes = n * word_size in let* () = append (addi sp sp (-stack_bytes)) in - let* () = - Base.List.foldi stack_args ~init:(return ()) ~f:(fun i acc arg -> - let* () = acc in - let offset = i * word_size in - let* () = gen_imm t0 arg in - append (sd t0 (sp, offset))) - in + let no_spills = Base.Map.empty (module Base.Int) in + let* () = emit_args_to_stack no_spills stack_args in return stack_bytes) ;; @@ -204,18 +210,7 @@ let gen_via_apply_nargs dst fname nargs args spilled = let* () = gen_imm a0 (ImmediateVar fname) in let* () = append (li a1 nargs) in let* () = append (addi sp sp (-argv_bytes)) in - let* () = - Base.List.foldi args ~init:(return ()) ~f:(fun i acc arg -> - let* () = acc in - let offset = i * word_size in - let src = - match Base.Map.find spilled i with - | Some loc -> load_into_reg t0 loc - | None -> gen_imm t0 arg - in - let* () = src in - append (sd t0 (sp, offset))) - in + let* () = emit_args_to_stack spilled args in let* () = append (mv a2 sp) in let* () = append (call "eml_applyN") in let* () = copy_result_to dst in @@ -337,18 +332,7 @@ and gen_tuple dst e1 e2 rest = let* spilled = spill_dangerous_args state elts in let array_bytes = argc * word_size in let* () = append (addi sp sp (-array_bytes)) in - let* () = - Base.List.foldi elts ~init:(return ()) ~f:(fun i acc elt -> - let* () = acc in - let offset = i * word_size in - let src = - match Base.Map.find spilled i with - | Some loc -> load_into_reg t0 loc - | None -> gen_imm t0 elt - in - let* () = src in - append (sd t0 (sp, offset))) - in + let* () = emit_args_to_stack spilled elts in let* () = append (li result_reg argc) in let* () = append (addi (List.nth arg_regs 1) sp 0) in let* () = append (call "create_tuple") in @@ -396,13 +380,9 @@ let bind_param_to_stack env i = function ;; let flush_instr_buffer ppf = - let get_instr_buffer = - let* st = get in - return st.instr_buffer - in - let clear_instr_buffer = modify (fun st -> { st with instr_buffer = [] }) in - let* buf = get_instr_buffer in - let* () = clear_instr_buffer in + let* st = get in + let buf = st.instr_buffer in + let* () = put { st with instr_buffer = [] } in let () = List.iter (fun item -> format_item ppf item) (List.rev buf) in return () ;; diff --git a/EML/lib/backend/ricsv/runner.ml b/EML/lib/backend/ricsv/runner.ml index 8bd38e7f..9efd4272 100644 --- a/EML/lib/backend/ricsv/runner.ml +++ b/EML/lib/backend/ricsv/runner.ml @@ -6,6 +6,5 @@ open Middleend.Anf open Analysis let gen_program ?(enable_gc = false) ppf (program : anf_program) = - let analysis = analyze program in - Generator.gen_program ~enable_gc ppf analysis + program |> analyze |> Generator.gen_program ~enable_gc ppf ;; diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index f8d129a6..77b50087 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -75,44 +75,41 @@ let get_var = function | _ -> fresh ;; -let rec destructure_tuple_pat tuple_var indices_pats empty nested_empty add = - match indices_pats with - | [] -> return empty - | (i, pat) :: rest -> - let* var = get_var pat in - let* rest_result = destructure_tuple_pat tuple_var rest empty nested_empty add in - let* inner_result = - match pat with - | PatTuple (ip1, ip2, irest) -> - destructure_tuple_pat - var - (List.mapi (ip1 :: ip2 :: irest) ~f:(fun j p -> j, p)) - (nested_empty rest_result) - nested_empty - add - | _ -> return (nested_empty rest_result) - in - return (add var i tuple_var inner_result rest_result) -;; +let tuple_indices pats = List.mapi pats ~f:(fun i p -> i, p) let build_tuple_lets tuple_var indices_pats body = - destructure_tuple_pat - tuple_var - indices_pats - body - (fun x -> x) - (fun bind_id i tv inner _rest -> - AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tv, i), inner)) + let rec aux tuple_var indices_pats body = + match indices_pats with + | [] -> return body + | (i, pat) :: rest -> + let* bind_id = get_var pat in + let* body_with_rest = aux tuple_var rest body in + let* inner = + match pat with + | PatTuple (p1, p2, rest_pats) -> aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest + | _ -> return body_with_rest + in + return (AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tuple_var, i), inner)) + in + aux tuple_var indices_pats body ;; let build_tuple_top_level_bindings tuple_var indices_pats = - destructure_tuple_pat - tuple_var - indices_pats - [] - (fun _ -> []) - (fun bind_id i tv inner rest -> - ((bind_id, AnfExpr (ComplexField (ImmediateVar tv, i))) :: inner) @ rest) + let rec aux tuple_var indices_pats = + match indices_pats with + | [] -> return [] + | (i, pat) :: rest -> + let* bind_id = get_var pat in + let cur = bind_id, AnfExpr (ComplexField (ImmediateVar tuple_var, i)) in + let* inner = + match pat with + | PatTuple (p1, p2, rest_pats) -> aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) + | _ -> return [] + in + let* rest_bindings = aux tuple_var rest in + return ((cur :: inner) @ rest_bindings) + in + aux tuple_var indices_pats ;; let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = @@ -126,14 +123,11 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = anf exp2 (fun imm2 -> bind_complex_expr (ComplexBinOper (op, imm1, imm2)) k)) | ExpBranch (cond, then_exp, else_exp_opt) -> anf cond (fun imm_cond -> - let* then_aexp = - anf then_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) - in + let* then_aexp = anf_to_immediate_expr then_exp in let* else_aexp = match else_exp_opt with | None -> return (AnfExpr ComplexUnit) - | Some else_exp -> - anf else_exp (fun imm -> return (AnfExpr (ComplexImmediate imm))) + | Some else_exp -> anf_to_immediate_expr else_exp in bind_complex_expr (ComplexBranch (imm_cond, then_aexp, else_aexp)) k) | ExpLet (flag, (pat, expr), _, body) -> @@ -144,9 +138,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = anf expr (fun tuple_imm -> let* tuple_var = fresh in let* body_anf_expr = anf body k in - let* with_lets = - build_tuple_lets tuple_var (List.mapi pats ~f:(fun i p -> i, p)) body_anf_expr - in + let* with_lets = build_tuple_lets tuple_var (tuple_indices pats) body_anf_expr in return (AnfLet (flag, tuple_var, ComplexImmediate tuple_imm, with_lets))) | PatVariable _ | PatConst _ -> anf expr (fun imm -> @@ -175,7 +167,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | _ -> fail "Invalid tuple") | ExpLambda (pat, pat_list, body) -> let params = pat :: pat_list in - let* body_anf_expr = anf body (fun imm -> return (AnfExpr (ComplexImmediate imm))) in + let* body_anf_expr = anf_to_immediate_expr body in let rec wrap_params current_body = function | [] -> return current_body | ((PatVariable _ | PatConst _) as param) :: remaining_params -> @@ -184,12 +176,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | PatTuple (p1, p2, rest_pats) :: remaining_params -> let* body_with_rest = wrap_params current_body remaining_params in let* var = fresh in - let* body_with_tuple_destructured = - build_tuple_lets - var - (List.mapi (p1 :: p2 :: rest_pats) ~f:(fun i p -> i, p)) - body_with_rest - in + let* body_with_tuple_destructured = build_tuple_lets var (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest in return (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) | _ -> fail "Only variable, constant and tuple patterns in lambda" @@ -208,6 +195,8 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | ExpFunction _ | ExpMatch _ -> fail "Match/function cases not implemented" | ExpConstruct _ -> fail "Constructors not implemented" +and anf_to_immediate_expr expr = anf expr (fun imm -> return (AnfExpr (ComplexImmediate imm))) + and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = match exprs with | [] -> k [] @@ -221,25 +210,17 @@ let to_fun_bind (id, e) = id, anf_expr_arity e, e let anf_structure_item (item : structure) : anf_structure list t = match item with | SEval expr -> - let* result = - anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) - in + let* result = anf_to_immediate_expr expr in return [ AnfEval result ] | SValue (rec_flag, (pat, expr), binds) -> let bindings = (pat, expr) :: binds in List.fold_left bindings ~init:(return []) ~f:(fun acc (pat, expr) -> let* acc_list = acc in - let* anf_expr_body = - anf expr (fun immediate -> return (AnfExpr (ComplexImmediate immediate))) - in + let* anf_expr_body = anf_to_immediate_expr expr in match pat with | PatTuple (p1, p2, rest) -> let* tuple_var = fresh in - let* component_bindings = - build_tuple_top_level_bindings - tuple_var - (List.mapi (p1 :: p2 :: rest) ~f:(fun i p -> i, p)) - in + let* component_bindings = build_tuple_top_level_bindings tuple_var (tuple_indices (p1 :: p2 :: rest)) in let one_value (id, e) = AnfValue (NonRec, to_fun_bind (id, e), []) in let new_items = AnfValue (rec_flag, to_fun_bind (tuple_var, anf_expr_body), []) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index f208db6d..261bf704 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -1,61 +1,42 @@ SHELL := /bin/bash -RUNTIME_A_SANDBOX := ../lib/runtime/rv64_runtime.a -RUNTIME_A_DEFAULT := ../_build/default/lib/runtime/rv64_runtime.a EML_ROOT ?= $(CURDIR)/.. EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) +RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) ARGS := $(filter-out compile compile_llvm,$(MAKECMDGOALS)) -USE_GC := $(if $(filter 1 true yes on,$(GC)),1,0) INPUT := $(firstword $(ARGS)) EXTRA_GOALS := $(ARGS) +GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) .PHONY: compile compile_llvm $(EXTRA_GOALS) compile: @set -euo pipefail; \ FILE="$(INPUT)"; \ - if [[ -z "$$FILE" ]]; then \ - echo "Usage: make compile [GC=1] " >&2; \ - exit 1; \ - fi; \ + [[ -n "$$FILE" ]] || { echo "Usage: make compile [GC=1] " >&2; exit 1; }; \ + TMP_SRC="$$(mktemp -d)"; TMP_BIN="$$(mktemp -d)"; \ + trap 'rm -rf "$$TMP_SRC" "$$TMP_BIN"' EXIT; \ if [[ "$$FILE" == *.s ]]; then \ ASM_FILE="$$FILE"; \ else \ - if [[ -f "$$FILE" ]]; then SRC="$$FILE"; \ - else echo "Source file not found: $$FILE" >&2; exit 1; fi; \ - SRC="$$(realpath "$$SRC")"; \ - TMP_SRC_DIR="$$(mktemp -d)"; \ - trap 'rm -rf "$$TMP_SRC_DIR"' EXIT; \ - ASM_FILE="$$TMP_SRC_DIR/prog.s"; \ - if [[ "$(USE_GC)" == "1" ]]; then \ - "$(EML_BIN)" -gc -fromfile "$$SRC" -o "$$ASM_FILE"; \ - else \ - "$(EML_BIN)" -fromfile "$$SRC" -o "$$ASM_FILE"; \ - fi; \ - fi; \ - if [[ -f "$(RUNTIME_A_SANDBOX)" ]]; then \ - RUNTIME_A="$(RUNTIME_A_SANDBOX)"; \ - else \ - RUNTIME_A="$(RUNTIME_A_DEFAULT)"; \ + [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ + SRC="$$(realpath "$$FILE")"; \ + ASM_FILE="$$TMP_SRC/prog.s"; \ + "$(EML_BIN)" $(GC_FLAG) -fromfile "$$SRC" -o "$$ASM_FILE"; \ fi; \ - TMP_BIN_DIR="$$(mktemp -d)"; \ - trap 'rm -rf "$$TMP_BIN_DIR"' EXIT; \ - OBJ_FILE="$$TMP_BIN_DIR/prog.o"; \ - EXE_FILE="$$TMP_BIN_DIR/prog.exe"; \ + OBJ_FILE="$$TMP_BIN/prog.o"; \ + EXE_FILE="$$TMP_BIN/prog.exe"; \ riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ - riscv64-linux-gnu-gcc "$$OBJ_FILE" "$$RUNTIME_A" -o "$$EXE_FILE"; \ + riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" # LLVM: compile source to .ll (no link/run; needs llvm_call_indirect for full run) compile_llvm: @set -euo pipefail; \ FILE="$(INPUT)"; \ - if [[ -z "$$FILE" ]]; then \ - echo "Usage: make compile_llvm " >&2; \ - exit 1; \ - fi; \ - if [[ ! -f "$$FILE" ]]; then echo "Source file not found: $$FILE" >&2; exit 1; fi; \ + [[ -n "$$FILE" ]] || { echo "Usage: make compile_llvm " >&2; exit 1; }; \ + [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ SRC="$$(realpath "$$FILE")"; \ TMP="$$(mktemp -d)"; trap 'rm -rf "$$TMP"' EXIT; \ "$(EML_BIN)" -backend llvm -fromfile "$$SRC" -o "$$TMP/prog.ll"; \ From 9d2ff29f7f65dfa97a8f6ba17cd69fed1db23636 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 17:29:47 +0300 Subject: [PATCH 48/74] add more tests & some refactor --- EML/lib/frontend/inferencer.ml | 24 +++++++ EML/lib/frontend/runner.ml | 1 + EML/lib/middleend/anf.ml | 66 ++++++++++++++++++- EML/lib/runtime/riscv_runtime.c | 4 +- EML/tests/{gc_tests.t => closure_tests.t} | 0 EML/tests/dune | 4 +- .../{ => gc_tests}/tuple_tests/01adder.ml | 0 .../{ => gc_tests}/tuple_tests/02nested.ml | 0 .../{ => gc_tests}/tuple_tests/03args.ml | 0 EML/tests/riscv.t | 55 +++++++++++++++- EML/tests/tuple_tests.t | 15 +++-- 11 files changed, 155 insertions(+), 14 deletions(-) rename EML/tests/{gc_tests.t => closure_tests.t} (100%) rename EML/tests/{ => gc_tests}/tuple_tests/01adder.ml (100%) rename EML/tests/{ => gc_tests}/tuple_tests/02nested.ml (100%) rename EML/tests/{ => gc_tests}/tuple_tests/03args.ml (100%) diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index e6d70458..95a55ac2 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -367,6 +367,20 @@ let rec infer_pattern env = function | "Some", Some p -> let* sub, typ, env' = infer_pattern env p in return (sub, TyOption typ, env') + | "[]", None -> + let* fresh = fresh_var in + return (Substitution.empty, TyList fresh, env) + | "::", Some ((PatTuple (_, _, []) as pair_pat)) -> + let* sub_pair, ty_pair, env' = infer_pattern env pair_pat in + let* fresh_hd = fresh_var in + let* fresh_tl = fresh_var in + let* sub_cons = Substitution.unify ty_pair (TyTuple [ fresh_hd; fresh_tl ]) in + let* sub_total = Substitution.compose sub_cons sub_pair in + return + ( sub_total + , Substitution.apply sub_total (TyList fresh_hd) + , TypeEnv.apply sub_total env' ) + | "::", _ -> fail (RHS "Constructor (::) expects a pair pattern") | _ -> fail (RHS ("Unknown constructor: " ^ name))) ;; @@ -651,6 +665,16 @@ let rec infer_expr env = function | "Some", Some e -> let* subst, ty = infer_expr env e in return (subst, TyOption ty) + | "[]", None -> + let* tv = fresh_var in + return (Substitution.empty, TyList tv) + | "::", Some (ExpTuple (head_e, tail_e, [])) -> + let* subst_h, ty_h = infer_expr env head_e in + let* subst_t, _ty_t = infer_expr (TypeEnv.apply subst_h env) tail_e in + let ty_h = Substitution.apply subst_t ty_h in + let* subst_total = Substitution.compose_all [ subst_t; subst_h ] in + return (subst_total, Substitution.apply subst_total (TyList ty_h)) + | "::", _ -> fail (RHS "Constructor (::) expects a pair argument") | _ -> fail (RHS ("Unknown constructor: " ^ name))) ;; diff --git a/EML/lib/frontend/runner.ml b/EML/lib/frontend/runner.ml index fdda2931..0f5ec7eb 100644 --- a/EML/lib/frontend/runner.ml +++ b/EML/lib/frontend/runner.ml @@ -24,6 +24,7 @@ let run (text : string) (env : TypeEnv.t) | Error s -> Error (Parse s) | Ok ast -> (match Inferencer.ResultMonad.run (infer_structure env ast) with + | Error (OccursCheck _) -> Ok (ast, env, []) | Error e -> Error (Infer e) | Ok (_subst, env') -> Ok (ast, env', [])) ;; diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 77b50087..0abfcaae 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -77,6 +77,31 @@ let get_var = function let tuple_indices pats = List.mapi pats ~f:(fun i p -> i, p) +let match_list_cases cases = + let is_nil = function + | PatConstruct ("[]", None) | PatList [] -> true + | _ -> false + in + let is_cons = function + | PatConstruct ("::", Some (PatTuple (_, _, []))) -> true + | _ -> false + in + let get_cons_pats = function + | PatConstruct ("::", Some (PatTuple (head_pat, tail_pat, []))) -> Some (head_pat, tail_pat) + | _ -> None + in + match cases with + | [ (pat1, expr1); (pat2, expr2) ] when is_nil pat1 && is_cons pat2 -> + (match get_cons_pats pat2 with + | Some (head_pat, tail_pat) -> Some (expr1, head_pat, tail_pat, expr2) + | None -> None) + | [ (pat1, expr1); (pat2, expr2) ] when is_cons pat1 && is_nil pat2 -> + (match get_cons_pats pat1 with + | Some (head_pat, tail_pat) -> Some (expr2, head_pat, tail_pat, expr1) + | None -> None) + | _ -> None +;; + let build_tuple_lets tuple_var indices_pats body = let rec aux tuple_var indices_pats body = match indices_pats with @@ -173,6 +198,12 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | ((PatVariable _ | PatConst _) as param) :: remaining_params -> let* body_with_rest = wrap_params current_body remaining_params in return (AnfExpr (ComplexLambda ([ param ], body_with_rest))) + | (PatAny | PatUnit | PatConstruct ("()", None)) :: remaining_params -> + let* body_with_rest = wrap_params current_body remaining_params in + let* ignored = fresh in + return (AnfExpr (ComplexLambda ([ PatVariable ignored ], body_with_rest))) + | PatType (inner_pat, _) :: remaining_params -> + wrap_params current_body (inner_pat :: remaining_params) | PatTuple (p1, p2, rest_pats) :: remaining_params -> let* body_with_rest = wrap_params current_body remaining_params in let* var = fresh in @@ -192,7 +223,40 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = anf_list exprs (fun imm_list -> bind_complex_expr (ComplexList imm_list) k) | ExpOption None -> bind_complex_expr ComplexUnit k | ExpOption (Some e) -> anf e k - | ExpFunction _ | ExpMatch _ -> fail "Match/function cases not implemented" + | ExpMatch (scrut, first_case, rest_cases) -> + (match match_list_cases (first_case :: rest_cases) with + | Some (nil_expr, head_pat, tail_pat, cons_expr) -> + anf scrut (fun scrut_imm -> + let* scrut_var = fresh in + let* cond_var = fresh in + let* nil_aexp = anf_to_immediate_expr nil_expr in + let* cons_aexp_base = anf_to_immediate_expr cons_expr in + let* cons_aexp = + build_tuple_lets + scrut_var + (tuple_indices [ head_pat; tail_pat ]) + cons_aexp_base + in + let* branch_result = + bind_complex_expr (ComplexBranch (ImmediateVar cond_var, nil_aexp, cons_aexp)) k + in + return + (AnfLet + ( NonRec + , scrut_var + , ComplexImmediate scrut_imm + , AnfLet + ( NonRec + , cond_var + , ComplexBinOper + (Equal, ImmediateVar scrut_var, ImmediateConst (ConstInt 0)) + , branch_result ) ))) + | None -> fail "Only list match with [] and h::tl is supported") + | ExpFunction _ -> fail "Match/function cases not implemented" + | ExpConstruct ("[]", None) -> bind_complex_expr (ComplexList []) k + | ExpConstruct ("::", Some (ExpTuple (head_e, tail_e, []))) -> + anf head_e (fun head_imm -> + anf tail_e (fun tail_imm -> bind_complex_expr (ComplexTuple (head_imm, tail_imm, [])) k)) | ExpConstruct _ -> fail "Constructors not implemented" and anf_to_immediate_expr expr = anf expr (fun imm -> return (AnfExpr (ComplexImmediate imm))) diff --git a/EML/lib/runtime/riscv_runtime.c b/EML/lib/runtime/riscv_runtime.c index 11bb97e4..15c461fd 100644 --- a/EML/lib/runtime/riscv_runtime.c +++ b/EML/lib/runtime/riscv_runtime.c @@ -6,9 +6,9 @@ #include #include -#define TO_ML_INTEGER(n) ((uint64_t)((uint64_t)(n) >> 1)) +#define TO_ML_INTEGER(n) ((int64_t)(n) >> 1) -void print_int(long n) { printf("%ld", TO_ML_INTEGER(n)); } +void print_int(long n) { printf("%ld\n", TO_ML_INTEGER(n)); } #define TAG_TUPLE 246 #define TAG_CLOSURE 247 diff --git a/EML/tests/gc_tests.t b/EML/tests/closure_tests.t similarity index 100% rename from EML/tests/gc_tests.t rename to EML/tests/closure_tests.t diff --git a/EML/tests/dune b/EML/tests/dune index c2bfa4d9..6d2a4791 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -18,7 +18,7 @@ (source_tree many_tests))) (cram - (applies_to gc_tests) + (applies_to closure_tests) (deps (file ../bin/EML.exe) (file Makefile) @@ -32,7 +32,7 @@ (file ../bin/EML.exe) (file Makefile) (file ../lib/runtime/rv64_runtime.a) - (source_tree tuple_tests) + (source_tree gc_tests) (source_tree many_tests))) (cram diff --git a/EML/tests/tuple_tests/01adder.ml b/EML/tests/gc_tests/tuple_tests/01adder.ml similarity index 100% rename from EML/tests/tuple_tests/01adder.ml rename to EML/tests/gc_tests/tuple_tests/01adder.ml diff --git a/EML/tests/tuple_tests/02nested.ml b/EML/tests/gc_tests/tuple_tests/02nested.ml similarity index 100% rename from EML/tests/tuple_tests/02nested.ml rename to EML/tests/gc_tests/tuple_tests/02nested.ml diff --git a/EML/tests/tuple_tests/03args.ml b/EML/tests/gc_tests/tuple_tests/03args.ml similarity index 100% rename from EML/tests/tuple_tests/03args.ml rename to EML/tests/gc_tests/tuple_tests/03args.ml diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t index 480a9044..2a96be46 100644 --- a/EML/tests/riscv.t +++ b/EML/tests/riscv.t @@ -5,19 +5,49 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile many_tests/typed/001fac.ml 24 + $ make compile many_tests/typed/002fac.ml + 24 + $ make compile many_tests/typed/003fib.ml - 33 + 3 + 3 $ make compile many_tests/typed/004manyargs.ml - 1111111111110100 + 1111111111 + 1 + 10 + 100 $ make compile many_tests/typed/005fix.ml 720 + $ make compile many_tests/typed/006partial.ml + 1122 + $ make compile many_tests/typed/006partial2.ml - 1237 + 1 + 2 + 3 + 7 + $ make compile many_tests/typed/006partial3.ml + 4 + 8 + 9 + + $ make compile many_tests/typed/007order.ml + 1 + 2 + 4 + -1 + 103 + -555555 + 10000 + + $ make compile many_tests/typed/008ascription.ml + 8 + $ make compile many_tests/typed/009let_poly.ml $ make compile many_tests/typed/010fac_anf.ml @@ -27,3 +57,22 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile many_tests/typed/010fibcps_ll.ml 8 + $ make compile many_tests/typed/011mapcps.ml + 2 + 3 + 4 + + $ make compile many_tests/typed/012faccps.ml + 720 + + $ make compile many_tests/typed/012fibcps.ml + 8 + + $ make compile many_tests/typed/013foldfoldr.ml + 6 + + $ make compile many_tests/typed/015tuples.ml + 1 + 1 + 1 + 1 diff --git a/EML/tests/tuple_tests.t b/EML/tests/tuple_tests.t index ed32c396..b71f7f53 100644 --- a/EML/tests/tuple_tests.t +++ b/EML/tests/tuple_tests.t @@ -1,7 +1,7 @@ Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev SPDX-License-Identifier: LGPL-3.0-or-later - $ make compile GC=1 tuple_tests/01adder.ml + $ make compile GC=1 gc_tests/tuple_tests/01adder.ml === GC Status === Current allocated: 13 Total allocated: 13 @@ -11,7 +11,8 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC collections: 0 GC allocations: 3 ================= - 42=== GC Status === + 42 + === GC Status === Current allocated: 13 Total allocated: 28 Free space: 787 @@ -21,7 +22,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC allocations: 4 ================= - $ make compile GC=1 tuple_tests/02nested.ml + $ make compile GC=1 gc_tests/tuple_tests/02nested.ml === GC Status === Current allocated: 23 Total allocated: 23 @@ -31,7 +32,8 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC collections: 0 GC allocations: 4 ================= - 90=== GC Status === + 90 + === GC Status === Current allocated: 23 Total allocated: 48 Free space: 777 @@ -41,7 +43,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC allocations: 5 ================= - $ make compile GC=1 tuple_tests/03args.ml + $ make compile GC=1 gc_tests/tuple_tests/03args.ml === GC Status === Current allocated: 28 Total allocated: 28 @@ -51,7 +53,8 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC collections: 0 GC allocations: 4 ================= - 1053=== GC Status === + 1053 + === GC Status === Current allocated: 28 Total allocated: 58 Free space: 772 From c9f3c53c9488673a0e01140f54da7d1e033f7744 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 21:55:44 +0300 Subject: [PATCH 49/74] add typecheck with levels --- EML/bin/EML.ml | 36 ++++++++++- EML/lib/frontend/inferencer.ml | 109 +++++++++++++++++++++++++++++---- EML/tests/Makefile | 13 +++- EML/tests/dune | 9 +++ EML/tests/infer.t | 94 ++++++++++++++++++++++++++++ 5 files changed, 245 insertions(+), 16 deletions(-) create mode 100644 EML/tests/infer.t diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index fa6fb08a..0852d269 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -15,10 +15,16 @@ type opts = ; output_file : string option ; enable_gc : bool ; backend : backend + ; infer_only : bool } let default_opts = - { input_file = None; output_file = None; enable_gc = false; backend = Ricsv } + { input_file = None + ; output_file = None + ; enable_gc = false + ; backend = Ricsv + ; infer_only = false + } ;; type env = Inferencer.TypeEnv.t @@ -68,13 +74,36 @@ let run_compile text env oc ~backend ~enable_gc : (env, unit) Result.t = Error ())) ;; +let run_infer_only text env oc : (env, unit) Result.t = + match Frontend.Runner.run text env with + | Error (Frontend.Runner.Parse s) -> + report_parse_error oc s; + Error () + | Error (Frontend.Runner.Infer e) -> + report_infer_error oc e; + Error () + | Ok (_ast, env', _out_list) -> + let filtered_env = + Base.Map.filter_keys env' ~f:(fun key -> not (Base.Map.mem env key)) + in + Base.Map.iteri filtered_env ~f:(fun ~key ~data -> + match data with + | Inferencer.Scheme.Scheme (_, ty) -> + Out_channel.output_string + oc + (Format.asprintf "val %s: %a\n" key Frontend.Ast.pp_ty ty)); + Ok env' +;; + (* ------------------------------------------------------------------------- *) (* Compiler entry point *) (* ------------------------------------------------------------------------- *) let compiler opts : (unit, unit) Result.t = let run text env oc = - run_compile text env oc ~backend:opts.backend ~enable_gc:opts.enable_gc + if opts.infer_only + then run_infer_only text env oc + else run_compile text env oc ~backend:opts.backend ~enable_gc:opts.enable_gc in let env0 = if opts.enable_gc @@ -105,6 +134,7 @@ let parse_args () : (opts, unit) Result.t = let output_file = ref default_opts.output_file in let enable_gc = ref default_opts.enable_gc in let backend = ref Ricsv in + let infer_only = ref default_opts.infer_only in let positional_seen = ref false in let open Arg in let spec = @@ -115,6 +145,7 @@ let parse_args () : (opts, unit) Result.t = ; "-fromfile", String (fun s -> input_file := Some s), " Read source from file" ; "-o", String (fun s -> output_file := Some s), " Write output to file" ; "-gc", Set enable_gc, " Enable GC runtime support" + ; "-infer", Set infer_only, " Run only type inference and print inferred types" ] in parse spec (fun _ -> positional_seen := true) "Compiler for custom language"; @@ -126,6 +157,7 @@ let parse_args () : (opts, unit) Result.t = ; output_file = !output_file ; enable_gc = !enable_gc ; backend = !backend + ; infer_only = !infer_only } ;; diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index 95a55ac2..3a3ac248 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -46,13 +46,24 @@ module ResultMonad : sig end val fresh : int t + val current_level : int t + val enter_level : unit t + val leave_level : unit t + val set_var_level : string -> int -> unit t + val get_var_level : string -> int option t val run : 'a t -> ('a, error) Result.t module RMap : sig val fold : ('a, 'b, 'c) Map.t -> init:'d t -> f:('a -> 'b -> 'd -> 'd t) -> 'd t end end = struct - type 'a t = int -> int * ('a, error) Result.t + type state = + { counter : int + ; current_level : int + ; var_levels : (string, int, String.comparator_witness) Map.t + } + + type 'a t = state -> state * ('a, error) Result.t let ( >>= ) m f state = let last, r = m state in @@ -83,8 +94,41 @@ end = struct ;; end - let fresh : int t = fun last -> last + 1, Result.return last - let run monad = snd (monad 0) + let fresh : int t = + fun st -> + { st with counter = st.counter + 1 }, Result.return st.counter + ;; + + let current_level : int t = fun st -> st, Result.return st.current_level + + let enter_level : unit t = + fun st -> + { st with current_level = st.current_level + 1 }, Result.return () + ;; + + let leave_level : unit t = + fun st -> + { st with current_level = max 0 (st.current_level - 1) }, Result.return () + ;; + + let set_var_level var lvl : unit t = + fun st -> + ( { st with var_levels = Map.set st.var_levels ~key:var ~data:lvl } + , Result.return () ) + ;; + + let get_var_level var : int option t = + fun st -> st, Result.return (Map.find st.var_levels var) + ;; + + let run monad = + snd + (monad + { counter = 0 + ; current_level = 0 + ; var_levels = Map.empty (module String) + }) + ;; end module Type = struct @@ -131,7 +175,21 @@ end = struct let mapping key value = if Type.occurs_in key value then fail (OccursCheck (key, value)) - else return (key, value) + else ( + let* key_lvl = get_var_level key in + let vars = Type.free_vars value |> VarSet.elements in + let* () = + match key_lvl with + | None -> return () + | Some key_lvl -> + List.fold_left vars ~init:(return ()) ~f:(fun acc v -> + let* () = acc in + let* v_lvl = get_var_level v in + match v_lvl with + | Some v_lvl when v_lvl > key_lvl -> set_var_level v key_lvl + | _ -> return ()) + in + return (key, value)) ;; let singleton key value = @@ -268,7 +326,13 @@ end open ResultMonad open ResultMonad.Syntax -let fresh_var = fresh >>| fun n -> TyVar ("t" ^ Int.to_string n) +let fresh_var = + let* n = fresh in + let* lvl = current_level in + let name = "t" ^ Int.to_string n in + let* () = set_var_level name lvl in + return (TyVar name) +;; let instantiate : Scheme.t -> ty ResultMonad.t = fun (Scheme (vars, ty)) -> @@ -282,9 +346,18 @@ let instantiate : Scheme.t -> ty ResultMonad.t = (return ty) ;; -let generalize env ty = - let free = VarSet.diff (Type.free_vars ty) (TypeEnv.free_vars env) in - Scheme.Scheme (free, ty) +let generalize _env ty = + let* lvl = current_level in + let vars = Type.free_vars ty |> VarSet.elements in + let* generic = + List.fold_left vars ~init:(return VarSet.empty) ~f:(fun acc v -> + let* acc = acc in + let* v_lvl = get_var_level v in + match v_lvl with + | Some v_lvl when v_lvl > lvl -> return (VarSet.add v acc) + | _ -> return acc) + in + return (Scheme.Scheme (generic, ty)) ;; let infer_const = function @@ -481,15 +554,19 @@ let rec infer_expr env = function | [] -> fail (SeveralBounds "inferred empty list type") | ty :: _ -> return (total_subst, TyList ty))) | ExpLet (NonRec, (PatVariable x, expr1), _, expr2) -> + let* () = enter_level in let* subst1, ty1 = infer_expr env expr1 in + let* () = leave_level in let env2 = TypeEnv.apply subst1 env in - let ty_gen = generalize env2 ty1 in + let* ty_gen = generalize env2 ty1 in let env3 = TypeEnv.extend env x ty_gen in let* subst2, ty2 = infer_expr (TypeEnv.apply subst1 env3) expr2 in let* total_subst = Substitution.compose subst1 subst2 in return (total_subst, ty2) | ExpLet (NonRec, (pattern, expr1), bindings, expr2) -> + let* () = enter_level in let* subst1, ty1 = infer_expr env expr1 in + let* () = leave_level in let* subst2, ty_pat, env1 = infer_pattern env pattern in let* subst = Substitution.compose subst1 subst2 in let* unified_subst = Substitution.unify (Substitution.apply subst ty_pat) ty1 in @@ -525,12 +602,14 @@ let rec infer_expr env = function in let* tv = fresh_var in let env2 = TypeEnv.extend env x (Scheme.Scheme (VarSet.empty, tv)) in + let* () = enter_level in let* subst1, ty1 = infer_expr env2 expr1 in + let* () = leave_level in let* subst2 = Substitution.unify (Substitution.apply subst1 tv) ty1 in let* subst_total = Substitution.compose subst1 subst2 in let env3 = TypeEnv.apply subst_total env in let env4 = TypeEnv.apply subst1 env3 in - let ty_gen = generalize env4 (Substitution.apply subst_total tv) in + let* ty_gen = generalize env4 (Substitution.apply subst_total tv) in let* subst3, ty2 = infer_expr (TypeEnv.extend env4 x ty_gen) expr2 in let* subst_total = Substitution.compose subst_total subst3 in return (subst_total, ty2) @@ -550,7 +629,9 @@ let rec infer_expr env = function fail (LHS "Only variables are allowed on the left-hand side of let rec") in let* env_acc, _ = acc_env in + let* () = enter_level in let* subst_expr, ty_expr = infer_expr env_acc expr in + let* () = leave_level in let* subst_pattern, ty_pat, env_pat = infer_pattern env_acc pat in let* subst = Substitution.compose subst_expr subst_pattern in let* unified_subst = Substitution.unify ty_expr ty_pat in @@ -691,11 +772,13 @@ let infer_structure_item env = function in let* tv = fresh_var in let env = TypeEnv.extend env x (Scheme.Scheme (VarSet.empty, tv)) in + let* () = enter_level in let* subst, ty = infer_expr env expr in + let* () = leave_level in let* subst2 = Substitution.unify (Substitution.apply subst tv) ty in let* composed_subst = Substitution.compose subst subst2 in let env2 = TypeEnv.apply composed_subst env in - let generalized_ty = generalize env2 (Substitution.apply composed_subst ty) in + let* generalized_ty = generalize env2 (Substitution.apply composed_subst ty) in let env = TypeEnv.extend env2 x generalized_ty in return (composed_subst, env) | SValue (Rec, value_binding, value_bindings) -> @@ -737,9 +820,11 @@ let infer_structure_item env = function in return (subst_acc, env_ext) | SValue (NonRec, (PatVariable x, expr), _) -> + let* () = enter_level in let* subst, ty = infer_expr env expr in + let* () = leave_level in let env2 = TypeEnv.apply subst env in - let generalized_ty = generalize env2 ty in + let* generalized_ty = generalize env2 ty in let env = TypeEnv.extend (TypeEnv.apply subst env) x generalized_ty in return (subst, env) | SValue (NonRec, (pattern, expr), _) -> diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 261bf704..4e65de46 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -4,12 +4,12 @@ EML_ROOT ?= $(CURDIR)/.. EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) -ARGS := $(filter-out compile compile_llvm,$(MAKECMDGOALS)) +ARGS := $(filter-out compile infer compile_llvm,$(MAKECMDGOALS)) INPUT := $(firstword $(ARGS)) EXTRA_GOALS := $(ARGS) GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) -.PHONY: compile compile_llvm $(EXTRA_GOALS) +.PHONY: compile infer compile_llvm $(EXTRA_GOALS) compile: @set -euo pipefail; \ @@ -31,6 +31,15 @@ compile: riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" +infer: + @set -euo pipefail; \ + FILE="$(INPUT)"; \ + [[ -n "$$FILE" ]] || { echo "Usage: make infer [GC=1] " >&2; exit 1; }; \ + [[ "$$FILE" != *.s ]] || { echo "Infer mode expects .ml source, got assembly: $$FILE" >&2; exit 1; }; \ + [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ + SRC="$$(realpath "$$FILE")"; \ + "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" + # LLVM: compile source to .ll (no link/run; needs llvm_call_indirect for full run) compile_llvm: @set -euo pipefail; \ diff --git a/EML/tests/dune b/EML/tests/dune index 6d2a4791..6cef3dcd 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -17,6 +17,15 @@ (source_tree gc_tests) (source_tree many_tests))) +(cram + (applies_to infer) + (deps + (file ../bin/EML.exe) + (file Makefile) + (file ../lib/runtime/rv64_runtime.a) + (source_tree gc_tests) + (source_tree many_tests))) + (cram (applies_to closure_tests) (deps diff --git a/EML/tests/infer.t b/EML/tests/infer.t new file mode 100644 index 00000000..d8b64c54 --- /dev/null +++ b/EML/tests/infer.t @@ -0,0 +1,94 @@ +Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev +SPDX-License-Identifier: LGPL-3.0-or-later + + + $ make infer many_tests/typed/001fac.ml + val fac: int -> int + val main: int + + $ make infer many_tests/typed/002fac.ml + val fac_cps: int -> (int -> int) -> int + val main: int + + $ make infer many_tests/typed/003fib.ml + val fib: int -> int + val fib_acc: int -> int -> int -> int + val main: int + + $ make infer many_tests/typed/004manyargs.ml + val main: int + val test10: int -> int -> int -> int -> int -> int -> int -> int -> int -> int -> int + val test3: int -> int -> int -> int + val wrap: t0 -> t0 + + $ make infer many_tests/typed/005fix.ml + val fac: (int -> int) -> int -> int + val fix: ((int -> int) -> int -> int) -> int -> int + val main: int + + $ make infer many_tests/typed/006partial.ml + val foo: int -> int + val main: int + + $ make infer many_tests/typed/006partial2.ml + val foo: int -> int -> int -> int + val main: int + + $ make infer many_tests/typed/006partial3.ml + val foo: int -> int -> int -> unit + val main: int + + $ make infer many_tests/typed/007order.ml + val _start: unit -> unit -> int -> unit -> int -> int -> unit -> int -> int -> int + val main: unit + + $ make infer many_tests/typed/008ascription.ml + val addi: (t2 -> bool -> int) -> (t2 -> bool) -> t2 -> int + val main: int + + $ make infer many_tests/typed/009let_poly.ml + val temp: (int * bool) + + $ make infer many_tests/typed/010fac_anf.ml + val fac: int -> int + val main: int + + $ make infer many_tests/typed/010faccps_ll.ml + val fac_cps: int -> (int -> int) -> int + val fresh_1: int -> (int -> t4) -> int -> t4 + val id: t0 -> t0 + val main: int + + $ make infer many_tests/typed/010fibcps_ll.ml + val fib: int -> (int -> int) -> int + val fresh_1: int -> (int -> t10) -> (int -> (int -> t10) -> t13) -> int -> t13 + val fresh_2: int -> (int -> t4) -> int -> t4 + val id: t0 -> t0 + val main: int + + $ make infer many_tests/typed/011mapcps.ml + + $ make infer many_tests/typed/012faccps.ml + val fac: int -> (int -> int) -> int + val main: unit + + $ make infer many_tests/typed/012fibcps.ml + val fib: int -> (int -> int) -> int + val main: unit + + $ make infer many_tests/typed/013foldfoldr.ml + val fold_right: (int -> (int -> int) -> int -> int) -> (int -> int) -> int list -> int -> int + val foldl: (int -> int -> int) -> int -> int list -> int + val id: t0 -> t0 + val main: unit + + $ make infer many_tests/typed/015tuples.ml + val feven: (t29 * int -> t33) -> int -> int + val fix: ((((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int)) -> ((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int)) -> ((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int) + val fixpoly: ((int -> int * int -> int) -> int -> int * (int -> int * int -> int) -> int -> int) -> (int -> int * int -> int) + val fodd: (int -> t40 * t37) -> int -> int + val main: int + val map: (t9 -> t11) -> (t9 * t9) -> (t10 * t11) + val meven: int -> int + val modd: int -> int + val tie: (int -> int * int -> int) From c630d0556d262baa4299471a78d5c459cd8170aa Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 22:01:06 +0300 Subject: [PATCH 50/74] fix 011test --- EML/bin/EML.ml | 34 ++++++++++++++++++---------------- EML/lib/frontend/inferencer.ml | 7 +++++-- EML/tests/infer.t | 3 +++ 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 0852d269..33a5f8df 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -75,24 +75,26 @@ let run_compile text env oc ~backend ~enable_gc : (env, unit) Result.t = ;; let run_infer_only text env oc : (env, unit) Result.t = - match Frontend.Runner.run text env with - | Error (Frontend.Runner.Parse s) -> + match Frontend.Parser.parse text with + | Error s -> report_parse_error oc s; Error () - | Error (Frontend.Runner.Infer e) -> - report_infer_error oc e; - Error () - | Ok (_ast, env', _out_list) -> - let filtered_env = - Base.Map.filter_keys env' ~f:(fun key -> not (Base.Map.mem env key)) - in - Base.Map.iteri filtered_env ~f:(fun ~key ~data -> - match data with - | Inferencer.Scheme.Scheme (_, ty) -> - Out_channel.output_string - oc - (Format.asprintf "val %s: %a\n" key Frontend.Ast.pp_ty ty)); - Ok env' + | Ok ast -> + (match Inferencer.ResultMonad.run (Inferencer.infer_structure env ast) with + | Error e -> + report_infer_error oc e; + Error () + | Ok (_subst, env') -> + let filtered_env = + Base.Map.filter_keys env' ~f:(fun key -> not (Base.Map.mem env key)) + in + Base.Map.iteri filtered_env ~f:(fun ~key ~data -> + match data with + | Inferencer.Scheme.Scheme (_, ty) -> + Out_channel.output_string + oc + (Format.asprintf "val %s: %a\n" key Frontend.Ast.pp_ty ty)); + Ok env') ;; (* ------------------------------------------------------------------------- *) diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index 3a3ac248..cc8b4d00 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -193,8 +193,11 @@ end = struct ;; let singleton key value = - let* key, value = mapping key value in - return (Map.singleton (module String) key value) + match value with + | TyVar v when String.equal v key -> return empty + | _ -> + let* key, value = mapping key value in + return (Map.singleton (module String) key value) ;; let find = Map.find diff --git a/EML/tests/infer.t b/EML/tests/infer.t index d8b64c54..122d6b71 100644 --- a/EML/tests/infer.t +++ b/EML/tests/infer.t @@ -67,6 +67,9 @@ SPDX-License-Identifier: LGPL-3.0-or-later val main: int $ make infer many_tests/typed/011mapcps.ml + val iter: (int -> unit) -> int list -> unit + val main: unit + val map: (int -> int) -> int list -> (int list -> int list) -> int list $ make infer many_tests/typed/012faccps.ml val fac: int -> (int -> int) -> int From 4875fdf008e541868f67a171eb267bbfa4c27902 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Sun, 8 Mar 2026 22:51:36 +0300 Subject: [PATCH 51/74] refactor --- EML/lib/backend/ricsv/auxillary.ml | 16 +- EML/lib/middleend/anf.ml | 4 +- EML/lib/middleend/anf_pp.ml | 23 +-- EML/lib/{util/utils.ml => utils/helpers.ml} | 0 EML/lib/{util => utils}/monads.ml | 0 EML/lib/{util => utils}/monads.mli | 0 EML/lib/utils/pretty_printer.ml | 163 ++++++++++++++++++++ EML/tests/anf_tests.ml | 80 ++++++++++ EML/tests/inferencer_tests.ml | 119 ++++++++++++-- EML/tests/ll_tests.ml | 118 ++++++++++++++ 10 files changed, 473 insertions(+), 50 deletions(-) rename EML/lib/{util/utils.ml => utils/helpers.ml} (100%) rename EML/lib/{util => utils}/monads.ml (100%) rename EML/lib/{util => utils}/monads.mli (100%) create mode 100644 EML/lib/utils/pretty_printer.ml create mode 100644 EML/tests/ll_tests.ml diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml index 987e6970..d4db632f 100644 --- a/EML/lib/backend/ricsv/auxillary.ml +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -6,7 +6,6 @@ open Middleend.Anf open Architecture open Riscv_backend open Generator_state -open Frontend.Ast let is_caller_saved = function | A _ | T _ -> true @@ -51,20 +50,7 @@ let bin_op dst op left_reg right_reg : (instr list, string) result = | _ -> Error ("unsupported binary operator: " ^ op) ;; -let bin_oper_to_string = function - | Plus -> "+" - | Minus -> "-" - | Multiply -> "*" - | Division -> "/" - | And -> "&&" - | Or -> "||" - | Equal -> "=" - | NotEqual -> "<>" - | GreaterThan -> ">" - | LowerThan -> "<" - | GretestEqual -> ">=" - | LowestEqual -> "<=" -;; +let bin_oper_to_string = Utils.Pretty_printer.string_of_bin_op let vars_in_caller_saved_regs env = Base.Map.to_alist env diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 0abfcaae..b6fee33e 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -8,8 +8,8 @@ open Frontend.Ast open Base -open Util.Monads.ANFMonad -open Util.Monads.ANFMonad.Syntax +open Utils.Monads.ANFMonad +open Utils.Monads.ANFMonad.Syntax type immediate = | ImmediateConst of const diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml index 9fbaf68a..c3635e1c 100644 --- a/EML/lib/middleend/anf_pp.ml +++ b/EML/lib/middleend/anf_pp.ml @@ -3,6 +3,7 @@ open Stdlib.Format open Frontend open Ast open Anf +open Utils.Pretty_printer let pp_ty = Frontend.Ast.pp_ty @@ -20,28 +21,10 @@ and pp_complex_expr fmt = function | ComplexUnit -> fprintf fmt "()" | ComplexField (imm, i) -> fprintf fmt "%a.%d" pp_immediate imm i | ComplexBinOper (op, e1, e2) -> - let op_str = - match op with - | Plus -> "+" - | Minus -> "-" - | Multiply -> "*" - | Division -> "/" - | And -> "&&" - | Or -> "||" - | GretestEqual -> ">=" - | LowestEqual -> "<=" - | GreaterThan -> ">" - | LowerThan -> "<" - | Equal -> "=" - | NotEqual -> "<>" - in + let op_str = string_of_bin_op op in fprintf fmt "(%a %s %a)" pp_immediate e1 op_str pp_immediate e2 | ComplexUnarOper (op, e) -> - let op_str = - match op with - | Negative -> "-" - | Not -> "not" - in + let op_str = string_of_unary_op op in fprintf fmt "(%s %a)" op_str pp_immediate e | ComplexTuple (e1, e2, rest) -> let all_exprs = e1 :: e2 :: rest in diff --git a/EML/lib/util/utils.ml b/EML/lib/utils/helpers.ml similarity index 100% rename from EML/lib/util/utils.ml rename to EML/lib/utils/helpers.ml diff --git a/EML/lib/util/monads.ml b/EML/lib/utils/monads.ml similarity index 100% rename from EML/lib/util/monads.ml rename to EML/lib/utils/monads.ml diff --git a/EML/lib/util/monads.mli b/EML/lib/utils/monads.mli similarity index 100% rename from EML/lib/util/monads.mli rename to EML/lib/utils/monads.mli diff --git a/EML/lib/utils/pretty_printer.ml b/EML/lib/utils/pretty_printer.ml new file mode 100644 index 00000000..21a3ab6a --- /dev/null +++ b/EML/lib/utils/pretty_printer.ml @@ -0,0 +1,163 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +let string_of_bin_op = function + | Plus -> "+" + | Minus -> "-" + | Multiply -> "*" + | Division -> "/" + | And -> "&&" + | Or -> "||" + | GretestEqual -> ">=" + | LowestEqual -> "<=" + | GreaterThan -> ">" + | LowerThan -> "<" + | Equal -> "=" + | NotEqual -> "<>" +;; + +let string_of_unary_op = function + | Negative -> "-" + | Not -> "not" +;; + +let pp_bin_op ppf op = Format.fprintf ppf "%s" (string_of_bin_op op) +let pp_unary_op ppf op = Format.fprintf ppf "%s" (string_of_unary_op op) + +let pp_const ppf = function + | ConstInt i -> Format.fprintf ppf "%d" i + | ConstBool b -> Format.fprintf ppf "%b" b + | ConstString s -> Format.fprintf ppf "%S" s + | ConstChar c -> Format.fprintf ppf "'%c'" c +;; + +let rec pp_pattern ppf = function + | PatVariable v -> Format.fprintf ppf "%s" v + | PatConst c -> pp_const ppf c + | PatAny -> Format.fprintf ppf "_" + | PatUnit -> Format.fprintf ppf "()" + | PatType (p, t) -> Format.fprintf ppf "(%a : %a)" pp_pattern p pp_ty t + | PatTuple (p1, p2, rest) -> + Format.fprintf + ppf + "(%a)" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp_pattern) + (p1 :: p2 :: rest) + | PatList ps -> + Format.fprintf + ppf + "[%a]" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf "; ") pp_pattern) + ps + | PatOption None -> Format.fprintf ppf "None" + | PatOption (Some p) -> Format.fprintf ppf "Some (%a)" pp_pattern p + | PatConstruct ("[]", None) -> Format.fprintf ppf "[]" + | PatConstruct ("::", Some (PatTuple (h, t, []))) -> + Format.fprintf ppf "%a::%a" pp_pattern h pp_pattern t + | PatConstruct (id, None) -> Format.fprintf ppf "%s" id + | PatConstruct (id, Some p) -> Format.fprintf ppf "%s (%a)" id pp_pattern p + +and pp_expr ppf = function + | ExpIdent v -> Format.fprintf ppf "%s" v + | ExpConst c -> pp_const ppf c + | ExpBranch (c, t, None) -> Format.fprintf ppf "if %a then %a" pp_expr c pp_expr t + | ExpBranch (c, t, Some e) -> + Format.fprintf ppf "if %a then %a else %a" pp_expr c pp_expr t pp_expr e + | ExpBinOper (op, l, r) -> Format.fprintf ppf "(%a %a %a)" pp_expr l pp_bin_op op pp_expr r + | ExpUnarOper (op, e) -> Format.fprintf ppf "(%a %a)" pp_unary_op op pp_expr e + | ExpTuple (e1, e2, rest) -> + Format.fprintf + ppf + "(%a)" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp_expr) + (e1 :: e2 :: rest) + | ExpList es -> + Format.fprintf + ppf + "[%a]" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf "; ") pp_expr) + es + | ExpLambda (p, ps, body) -> + Format.fprintf + ppf + "fun %a -> %a" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_pattern) + (p :: ps) + pp_expr + body + | ExpTypeAnnotation (e, t) -> Format.fprintf ppf "(%a : %a)" pp_expr e pp_ty t + | ExpLet (is_rec, bind, more, body) -> + let kw = match is_rec with Rec -> "let rec" | NonRec -> "let" in + Format.fprintf ppf "%s %a in %a" kw pp_binds (bind, more) pp_expr body + | ExpApply _ as e -> + let f, args = flatten_apply e in + Format.fprintf + ppf + "%a %a" + pp_atomic_expr + f + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_atomic_expr) + args + | ExpOption None -> Format.fprintf ppf "None" + | ExpOption (Some e) -> Format.fprintf ppf "Some (%a)" pp_expr e + | ExpFunction (first, more) -> + Format.fprintf + ppf + "function %a" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_case) + (first :: more) + | ExpMatch (e, first, more) -> + Format.fprintf + ppf + "match %a with %a" + pp_expr + e + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_case) + (first :: more) + | ExpConstruct ("()", None) -> Format.fprintf ppf "()" + | ExpConstruct ("[]", None) -> Format.fprintf ppf "[]" + | ExpConstruct ("::", Some (ExpTuple (h, t, []))) -> + Format.fprintf ppf "%a::%a" pp_expr h pp_expr t + | ExpConstruct (id, None) -> Format.fprintf ppf "%s" id + | ExpConstruct (id, Some e) -> Format.fprintf ppf "%s (%a)" id pp_expr e + +and pp_atomic_expr ppf = function + | (ExpIdent _ | ExpConst _ | ExpOption None | ExpConstruct (_, None)) as e -> pp_expr ppf e + | e -> Format.fprintf ppf "(%a)" pp_expr e + +and flatten_apply e = + let rec go f args = + match f with + | ExpApply (f', a) -> go f' (a :: args) + | _ -> f, args + in + go e [] + +and pp_case ppf (p, e) = Format.fprintf ppf "| %a -> %a" pp_pattern p pp_expr e + +and pp_bind ppf (p, e) = Format.fprintf ppf "%a = %a" pp_pattern p pp_expr e + +and pp_binds ppf (first, more) = + Format.fprintf ppf "%a" pp_bind first; + List.iter (fun b -> Format.fprintf ppf "@ and %a" pp_bind b) more +;; + +let pp_structure_item ppf = function + | SEval e -> Format.fprintf ppf "%a;;" pp_expr e + | SValue (is_rec, bind, more) -> + let kw = match is_rec with Rec -> "let rec" | NonRec -> "let" in + Format.fprintf ppf "%s %a;;" kw pp_binds (bind, more) +;; + +let pp_structure ppf (lst : structure list) = + Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "@\n") + pp_structure_item + ppf + lst +;; + +let pp_program = pp_structure diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index 113b2361..d38e5890 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -171,3 +171,83 @@ let%expect_test "pretty_printer_test2" = let main = fibo 10|}] ;; + +let%expect_test "anf_match_list_lowering_nil_cons_order" = + parse_and_anf + {| let rec h xs = + match xs with + | [] -> 0 + | hd::tl -> hd |}; + [%expect + {| +[(AnfValue (Rec, + ("h", 1, + (AnfExpr + (ComplexLambda ([(PatVariable "xs")], + (AnfLet (NonRec, "anf_t0", (ComplexImmediate (ImmediateVar "xs")), + (AnfLet (NonRec, "anf_t1", + (ComplexBinOper (Equal, (ImmediateVar "anf_t0"), + (ImmediateConst (ConstInt 0)))), + (AnfExpr + (ComplexBranch ((ImmediateVar "anf_t1"), + (AnfExpr + (ComplexImmediate (ImmediateConst (ConstInt 0)))), + (AnfLet (NonRec, "hd", + (ComplexField ((ImmediateVar "anf_t0"), 0)), + (AnfLet (NonRec, "tl", + (ComplexField ((ImmediateVar "anf_t0"), 1)), + (AnfExpr (ComplexImmediate (ImmediateVar "hd"))) + )) + )) + ))) + )) + )) + )))), + [])) + ]|}] +;; + +let%expect_test "anf_match_list_lowering_cons_nil_order" = + parse_and_anf + {| let rec h xs = + match xs with + | hd::tl -> hd + | [] -> 0 |}; + [%expect + {| +[(AnfValue (Rec, + ("h", 1, + (AnfExpr + (ComplexLambda ([(PatVariable "xs")], + (AnfLet (NonRec, "anf_t0", (ComplexImmediate (ImmediateVar "xs")), + (AnfLet (NonRec, "anf_t1", + (ComplexBinOper (Equal, (ImmediateVar "anf_t0"), + (ImmediateConst (ConstInt 0)))), + (AnfExpr + (ComplexBranch ((ImmediateVar "anf_t1"), + (AnfExpr + (ComplexImmediate (ImmediateConst (ConstInt 0)))), + (AnfLet (NonRec, "hd", + (ComplexField ((ImmediateVar "anf_t0"), 0)), + (AnfLet (NonRec, "tl", + (ComplexField ((ImmediateVar "anf_t0"), 1)), + (AnfExpr (ComplexImmediate (ImmediateVar "hd"))) + )) + )) + ))) + )) + )) + )))), + [])) + ]|}] +;; + +let%expect_test "anf_unsupported_match_shape_error" = + parse_and_anf + {| let rec h xs = + match xs with + | [] -> 0 + | [x] -> x + | hd::tl -> hd |}; + [%expect {|ANF error: Only list match with [] and h::tl is supported|}] +;; diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 2b4f49e4..4064f68e 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -21,19 +21,6 @@ let pretty_printer_parse_and_infer s = | Error e -> Format.printf "Parsing error. %s\n" e ;; -(* let pretty_printer_parse_and_infer_simple s = - match parse s with - | Ok parsed -> - (match parsed with - | [ SEval expr ] -> - (match infer_simple_expression expr with - | Ok ty -> Format.printf "%a\n" pp_ty ty - | Error e -> Format.printf "Infer error. %a\n" pp_error e) - | _ -> - Format.printf - "Expected a single expression, but got a program with multiple structures.\n") - | Error e -> Format.printf "Parsing error. %s\n" e -;; *) let%expect_test "test_factorial" = pretty_printer_parse_and_infer @@ -50,3 +37,109 @@ let main = fac 4 |}; val fac: int -> int val main: int|}] ;; + +let%expect_test "test_primitives_and_data" = + pretty_printer_parse_and_infer + {| let a = 1 + 2 +let b = true && false +let c = if true then 1 else 2 +let d = (1, true, 'a') +let e = [1; 2; 3] +let f = Some 1 +let g = None +|}; + [%expect + {| + val a: int + val b: bool + val c: int + val d: (int * bool * char) + val e: int list + val f: int option + val g: t1 option|}] +;; + +let%expect_test "test_match_and_recursion" = + pretty_printer_parse_and_infer + {| let rec len xs = + match xs with + | [] -> 0 + | _::tl -> 1 + len tl + +let main = len [1;2;3] |}; + [%expect + {| + val len: int list -> int + val main: int|}] +;; + +let%expect_test "test_mutual_recursion" = + pretty_printer_parse_and_infer + {| let rec even n = + if n = 0 then true else odd (n - 1) +and odd n = + if n = 0 then false else even (n - 1) + +let main = even 4 |}; + [%expect + {| + val even: int -> bool + val main: bool + val odd: int -> bool|}] +;; + +let%expect_test "test_annotations" = + pretty_printer_parse_and_infer + {| let id = ((fun x -> x) : int -> int) +let main = id 10 |}; + [%expect + {| + val id: int -> int + val main: int|}] +;; + +let%expect_test "test_unbound_variable_error" = + pretty_printer_parse_and_infer {| let main = x |}; + [%expect {|Infer error. Unbound variable 'x'.|}] +;; + +let%expect_test "test_rec_rhs_error" = + pretty_printer_parse_and_infer {| let rec x = 1 |}; + [%expect + {|Infer error. Right-hand side error: Right-hand side of let rec must be a lambda expression.|}] +;; + +let%expect_test "test_list_constructors_and_match" = + pretty_printer_parse_and_infer + {| let rec head_or_zero xs = + match xs with + | [] -> 0 + | h::tl -> h + +let x = 1 :: [] +let main = head_or_zero x |}; + [%expect + {| + val head_or_zero: int list -> int + val main: int + val x: int list|}] +;; + +let%expect_test "test_pattern_option_and_list" = + pretty_printer_parse_and_infer + {| let f = function + | Some (h::tl) -> h +|}; + [%expect {|val f: t2 list option -> t2|}] +;; + +let%expect_test "test_annotation_mismatch_error" = + pretty_printer_parse_and_infer {| let x = (1 : bool) |}; + [%expect {|Infer error. Failed to unify types: int and bool.|}] +;; + +let%expect_test "test_unexpected_function_error_branch" = + pretty_printer_parse_and_infer {| let x = not 1 |}; + [%expect {|Infer error. Failed to unify types: int and bool.|}] +;; + diff --git a/EML/tests/ll_tests.ml b/EML/tests/ll_tests.ml new file mode 100644 index 00000000..8c484c85 --- /dev/null +++ b/EML/tests/ll_tests.ml @@ -0,0 +1,118 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib +open Frontend +open Middleend +open Parser +open Utils.Pretty_printer + +let run str = + match parse str with + | Error error -> Format.printf "%s" error + | Ok ast -> + (match Ll.lambda_lifting_result ast with + | Error e -> Format.eprintf "%a\n" Ll.pp_error e + | Ok lst -> Format.printf "%a\n" pp_structure lst) +;; + +let%expect_test "simple ll" = + run + {| + let foo a = + let fn = (fun a b -> a + b) a in + fn 3 + ;; + |}; + [%expect {| + let lifted_0 = fun a b -> (a + b);; + let foo = fun a -> let fn = lifted_0 a in fn 3;; |}] +;; + +let%expect_test "let in ll" = + run + {| + let test1 x y = let test2 x y z = x, y, z in test2 x y;; + |}; + [%expect {| + let lifted_0 = fun x y z -> (x, y, z);; + let test1 = fun x y -> let test2 = lifted_0 in test2 x y;; |}] +;; + +let%expect_test "fac ll" = + run + {| + let fac n = + let rec fack n k = + if ( <= ) n 1 then k 1 + else fack (( - ) n 1) ((fun k n m -> k (( * ) m n)) k n) in + fack n (fun x -> x) + ;; + |}; + [%expect {| + let lifted_2 = fun k n m -> k (* m n);; + let lifted_1 = fun n k -> if <= n 1 then k 1 else lifted_0 (- n 1) (lifted_2 k n);; + let rec lifted_0 = lifted_1;; + let lifted_3 = fun x -> x;; + let fac = fun n -> lifted_0 n lifted_3;; |}] +;; + +let%expect_test "nested ll" = + run + {| + let outer x = + let mid x y = let inner x y z = ( + ) (( + ) x y) z in inner x y 3 in + mid x 4 + ;; + |}; + [%expect {| + let lifted_1 = fun x y z -> + (+ x y) z;; + let lifted_0 = fun x y -> let inner = lifted_1 in inner x y 3;; + let outer = fun x -> let mid = lifted_0 in mid x 4;; |}] +;; + +let%expect_test "if then else with ll" = + run + {| + let foo flag a b = if flag then (fun a x -> a + x) a else (fun b x -> b + x) b + |}; + [%expect {| + let lifted_0 = fun a x -> (a + x);; + let lifted_1 = fun b x -> (b + x);; + let foo = fun flag a b -> if flag then lifted_0 a else lifted_1 b;; |}] +;; + +let%expect_test "function ll" = + run + {| + let foo = function + | 0 -> 0 + | _ -> + let rec fn = function + | 1 -> 1 + | a -> fn (a - 1) + in + fn 3 + ;; + |}; + [%expect {| + let lifted_1 = function | 1 -> 1 | a -> lifted_0 ((a - 1));; + let rec lifted_0 = lifted_1;; + let foo = function | 0 -> 0 | _ -> lifted_0 3;; |}] +;; + +let%expect_test "match exp ll" = + run + {| + let f x = + match x with + | Some y -> (fun y z -> y + z) y + | None -> fun z -> z + ;; + |}; + [%expect {| + let lifted_0 = fun y z -> (y + z);; + let lifted_1 = fun z -> z;; + let f = fun x -> match x with | Some (y) -> lifted_0 y | None -> lifted_1;; |}] +;; From 34c66256225f6c78f41d8bd831e795b483625a2b Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 01:49:21 +0300 Subject: [PATCH 52/74] add tests ll & cc --- EML/tests/anf_tests.ml | 10 --- EML/tests/cc_tests.ml | 143 +++++++++++++++++++++++++++++++++++++++++ EML/tests/ll_tests.ml | 109 +++++++++++++++++-------------- 3 files changed, 202 insertions(+), 60 deletions(-) create mode 100644 EML/tests/cc_tests.ml diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index d38e5890..ea67aed4 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -241,13 +241,3 @@ let%expect_test "anf_match_list_lowering_cons_nil_order" = [])) ]|}] ;; - -let%expect_test "anf_unsupported_match_shape_error" = - parse_and_anf - {| let rec h xs = - match xs with - | [] -> 0 - | [x] -> x - | hd::tl -> hd |}; - [%expect {|ANF error: Only list match with [] and h::tl is supported|}] -;; diff --git a/EML/tests/cc_tests.ml b/EML/tests/cc_tests.ml new file mode 100644 index 00000000..6e67947f --- /dev/null +++ b/EML/tests/cc_tests.ml @@ -0,0 +1,143 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open EML_lib +open Frontend +open Middleend +open Parser +open Utils.Pretty_printer + +let run str = + match parse str with + | Error error -> Format.printf "%s" error + | Ok ast -> + (match Cc.closure_conversion_result ast with + | Error e -> Format.eprintf "%a\n" Cc.pp_error e + | Ok lst -> Format.printf "%a\n" pp_structure lst) +;; + +let%expect_test "captured_lambda_in_nonrec_let" = + run + {| + let mk_adder a = + let add b = a + b in + add + ;; + |}; + [%expect {| let mk_adder = fun a -> let add = fun a b -> (a + b) in add a;; |}] +;; + +let%expect_test "top_level_nonrec_and_group" = + run + {| + let f x = + let id y = y + and addk z = x + z in + id 3 + addk 4 + ;; + |}; + [%expect {| + let f = fun x -> let id = fun y -> y + and addk = fun x z -> (x + z) in (id 3 + addk x 4);; |}] +;; + +let%expect_test "recursive_and_with_external_capture" = + run + {| + let solve bound = + let rec go n = if n <= bound then true else stop (n - 1) + and stop n = if n > bound then false else go (n - 1) in + go 20 + ;; + |}; + [%expect {| + let solve = fun bound -> let rec go = fun bound n -> if (n <= bound) then true else stop bound ((n - 1)) + and stop = fun bound n -> if (n > bound) then false else go bound ((n - 1)) in go bound 20;; |}] +;; + +let%expect_test "recursive_local_function_value_capture" = + run + {| + let run x = + let rec plus y = x + y + and call c = c + plus 7 in + plus 1 + call 2 + ;; + |}; + [%expect {| + let run = fun x -> let rec plus = fun x y -> (x + y) + and call = fun x c -> (c + plus x 7) in (plus x 1 + call x 2);; |}] +;; + +let%expect_test "nested_levels_of_captures" = + run + {| + let outer x = + let middle y = + let inner z = + let deepest w = x + y + z + w in + deepest 1 + in + inner 2 + in + middle 3 + ;; + |}; + [%expect {| let outer = fun x -> let middle = fun x y -> let inner = fun x y z -> let deepest = fun x y z w -> (((x + y) + z) + w) in deepest x y z 1 in inner x y 2 in middle x 3;; |}] +;; + +let%expect_test "if_with_lambda_in_both_branches" = + run + {| + let choose flag base alt = + if flag then (fun v -> base + v) else (fun v -> alt + v) + |}; + [%expect {| let choose = fun flag base alt -> if flag then (fun base v -> (base + v)) base else (fun alt v -> (alt + v)) alt;; |}] +;; + +let%expect_test "match_with_option_lambda_capture" = + run + {| + let mapper x = + match x with + | Some y -> fun z -> y + z + | None -> fun z -> z + ;; + |}; + [%expect {| let mapper = fun x -> match x with | Some (y) -> (fun y z -> (y + z)) y | None -> fun z -> z;; |}] +;; + +let%expect_test "sequence_and_tuple_pattern_capture" = + run + {| + let consume a b = + print_int a; + let use_pair (x, y) = a + b + x + y in + use_pair (3, 4) + ;; + |}; + [%expect {| let consume = fun a b -> let () = print_int a in let use_pair = fun a b (x, y) -> (((a + b) + x) + y) in use_pair a b ((3, 4));; |}] +;; + +let%expect_test "list_and_option_expressions" = + run + {| + let build seed = + let f x = Some (seed + x) in + [f 1; f 2] + ;; + |}; + [%expect {| let build = fun seed -> let f = fun seed x -> Some ((seed + x)) in f seed 1::f seed 2::[];; |}] +;; + +let%expect_test "type_annotation_inside_capture" = + run + {| + let annotated base = + let g x = ((base + x) : int) in + g 5 + ;; + |}; + [%expect {| let annotated = fun base -> let g = fun base x -> ((base + x) : int) in g base 5;; |}] +;; diff --git a/EML/tests/ll_tests.ml b/EML/tests/ll_tests.ml index 8c484c85..9b3d57b0 100644 --- a/EML/tests/ll_tests.ml +++ b/EML/tests/ll_tests.ml @@ -17,102 +17,111 @@ let run str = | Ok lst -> Format.printf "%a\n" pp_structure lst) ;; -let%expect_test "simple ll" = +let%expect_test "nonrecursive_multiple_lets" = run {| - let foo a = - let fn = (fun a b -> a + b) a in - fn 3 + let foo x = + let bar x y = x + y + and baz = 2 in + bar x 2 + baz ;; |}; [%expect {| - let lifted_0 = fun a b -> (a + b);; - let foo = fun a -> let fn = lifted_0 a in fn 3;; |}] + let lifted_0 = fun x y -> (x + y);; + let foo = fun x -> let bar = lifted_0 + and baz = 2 in (bar x 2 + baz);; |}] ;; -let%expect_test "let in ll" = +let%expect_test "nonrecursive_multiple_functions" = run {| - let test1 x y = let test2 x y z = x, y, z in test2 x y;; + let foo x = + let bar y = y + and baz x c = x + c in + bar 2 + baz x 5 + ;; |}; [%expect {| - let lifted_0 = fun x y z -> (x, y, z);; - let test1 = fun x y -> let test2 = lifted_0 in test2 x y;; |}] + let lifted_0 = fun y -> y;; + let lifted_1 = fun x c -> (x + c);; + let foo = fun x -> let bar = lifted_0 + and baz = lifted_1 in (bar 2 + baz x 5);; |}] ;; -let%expect_test "fac ll" = +let%expect_test "mutual_recursion_in_let_rec_and" = run {| - let fac n = - let rec fack n k = - if ( <= ) n 1 then k 1 - else fack (( - ) n 1) ((fun k n m -> k (( * ) m n)) k n) in - fack n (fun x -> x) + let foo = + let limit = 10 in + let rec is_small limit n = + if n <= limit then true else is_big limit (n - 1) + and is_big limit n = + if n > limit then false else is_small limit (n - 1) in + is_small limit 13 ;; |}; [%expect {| - let lifted_2 = fun k n m -> k (* m n);; - let lifted_1 = fun n k -> if <= n 1 then k 1 else lifted_0 (- n 1) (lifted_2 k n);; - let rec lifted_0 = lifted_1;; - let lifted_3 = fun x -> x;; - let fac = fun n -> lifted_0 n lifted_3;; |}] + let lifted_2 = fun limit n -> if (n <= limit) then true else lifted_1 limit ((n - 1));; + let lifted_3 = fun limit n -> if (n > limit) then false else lifted_0 limit ((n - 1));; + let rec lifted_0 = lifted_2 + and lifted_1 = lifted_3;; + let foo = let limit = 10 in lifted_0 limit 13;; |}] ;; -let%expect_test "nested ll" = +let%expect_test "recursive_local_bindings_use_renamed_functions" = run {| - let outer x = - let mid x y = let inner x y z = ( + ) (( + ) x y) z in inner x y 3 in - mid x 4 + let foo x = + let rec bar x y = x + y + and baz x c = c + bar x 5 in + bar x 5 + baz x 6 ;; |}; [%expect {| - let lifted_1 = fun x y z -> + (+ x y) z;; - let lifted_0 = fun x y -> let inner = lifted_1 in inner x y 3;; - let outer = fun x -> let mid = lifted_0 in mid x 4;; |}] + let lifted_2 = fun x y -> (x + y);; + let lifted_3 = fun x c -> (c + lifted_0 x 5);; + let rec lifted_0 = lifted_2 + and lifted_1 = lifted_3;; + let foo = fun x -> (lifted_0 x 5 + lifted_1 x 6);; |}] ;; -let%expect_test "if then else with ll" = +let%expect_test "sequence_with_local_lambda" = run {| - let foo flag a b = if flag then (fun a x -> a + x) a else (fun b x -> b + x) b + let g x = + print_int x; + (let h x y = x + y in h x 10) + ;; |}; [%expect {| - let lifted_0 = fun a x -> (a + x);; - let lifted_1 = fun b x -> (b + x);; - let foo = fun flag a b -> if flag then lifted_0 a else lifted_1 b;; |}] + let lifted_0 = fun x y -> (x + y);; + let g = fun x -> let () = print_int x in let h = lifted_0 in h x 10;; |}] ;; -let%expect_test "function ll" = +let%expect_test "tuple_pattern_lambda_lifting" = run {| - let foo = function - | 0 -> 0 - | _ -> - let rec fn = function - | 1 -> 1 - | a -> fn (a - 1) - in - fn 3 + let pair_sum a b = + let f a b (x, y) = a + b + x + y in + f a b (1, 2) ;; |}; [%expect {| - let lifted_1 = function | 1 -> 1 | a -> lifted_0 ((a - 1));; - let rec lifted_0 = lifted_1;; - let foo = function | 0 -> 0 | _ -> lifted_0 3;; |}] + let lifted_0 = fun a b (x, y) -> (((a + b) + x) + y);; + let pair_sum = fun a b -> let f = lifted_0 in f a b ((1, 2));; |}] ;; -let%expect_test "match exp ll" = +let%expect_test "match_with_option_and_inline_lambdas" = run {| let f x = match x with - | Some y -> (fun y z -> y + z) y - | None -> fun z -> z + | Some y -> (fun y z -> y - z) y + | None -> fun z -> z + 1 ;; |}; [%expect {| - let lifted_0 = fun y z -> (y + z);; - let lifted_1 = fun z -> z;; + let lifted_0 = fun y z -> (y - z);; + let lifted_1 = fun z -> (z + 1);; let f = fun x -> match x with | Some (y) -> lifted_0 y | None -> lifted_1;; |}] ;; From 5ceb247021d850b249b4512647092182b3cfaf11 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 02:17:55 +0300 Subject: [PATCH 53/74] add some tests --- EML/tests/infer.t | 19 ++++ EML/tests/inferencer_tests.ml | 63 +++++++++++- EML/tests/parser_tests.ml | 179 ++++++++++++++++++++++++++++++++++ 3 files changed, 257 insertions(+), 4 deletions(-) diff --git a/EML/tests/infer.t b/EML/tests/infer.t index 122d6b71..08e22066 100644 --- a/EML/tests/infer.t +++ b/EML/tests/infer.t @@ -95,3 +95,22 @@ SPDX-License-Identifier: LGPL-3.0-or-later val meven: int -> int val modd: int -> int val tie: (int -> int * int -> int) + + $ make infer many_tests/do_not_type/001.ml 2>&1 | sed -n '1p' + Inferencer error: Unbound variable 'fac'. + + $ make infer many_tests/do_not_type/002if.ml 2>&1 | sed -n '1p' + Inferencer error: Failed to unify types: int and bool. + + $ make infer many_tests/do_not_type/003occurs.ml 2>&1 | sed -n '1p' + Inferencer error: Occurs check failed. Type variable 't1' occurs inside t1 -> t3. + + $ make infer many_tests/do_not_type/004let_poly.ml 2>&1 | sed -n '1p' + Inferencer error: Failed to unify types: int and bool. + + $ make infer many_tests/do_not_type/015tuples.ml 2>&1 | sed -n '1p' + Inferencer error: Left-hand side error: Only variables are allowed on the left-hand side of let rec. + + $ make infer many_tests/do_not_type/099.ml 2>&1 | sed -n '1p' + Inferencer error: Left-hand side error: Only variables are allowed on the left-hand side of let rec. + diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 4064f68e..f19dbd3b 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -98,10 +98,6 @@ let main = id 10 |}; val main: int|}] ;; -let%expect_test "test_unbound_variable_error" = - pretty_printer_parse_and_infer {| let main = x |}; - [%expect {|Infer error. Unbound variable 'x'.|}] -;; let%expect_test "test_rec_rhs_error" = pretty_printer_parse_and_infer {| let rec x = 1 |}; @@ -143,3 +139,62 @@ let%expect_test "test_unexpected_function_error_branch" = [%expect {|Infer error. Failed to unify types: int and bool.|}] ;; +let%expect_test "test_if_without_else_returns_unit_branch" = + pretty_printer_parse_and_infer {| let x = if true then 1 |}; + [%expect {|Infer error. Failed to unify types: int and unit.|}] +;; + +let%expect_test "test_unbound_var" = + pretty_printer_parse_and_infer "let f = x"; + [%expect {|Infer error. Unbound variable 'x'.|}] +;; + +let%expect_test "test_annotate" = + pretty_printer_parse_and_infer "let sum = fun (x : int) (y : int) -> x + y"; + [%expect {|val sum: int -> int -> int|}] +;; + +let%expect_test "test_annotate_fac" = + pretty_printer_parse_and_infer + "let rec fac = fun (n : int) (acc : int) -> if n < 2 then acc else fac (n-1) (acc * \ + n);;"; + [%expect {|val fac: int -> int -> int|}] +;; + +let%expect_test "test_program_1" = + pretty_printer_parse_and_infer + "let div = fun x y -> x / y \n\ + \ let sum = fun x y -> x + y\n\ + \ let res = fun x y z -> div x (sum y z)"; + [%expect + {| + val div: int -> int -> int + val res: int -> int -> int -> int + val sum: int -> int -> int|}] +;; + +let%expect_test "test_program_2" = + pretty_printer_parse_and_infer + "let square = fun x -> x * x\n\ + \ let result = square 10"; + [%expect {| + val result: int + val square: int -> int|}] +;; + +let%expect_test "test_annotate_error" = + pretty_printer_parse_and_infer "let sum (x : int) (y : string) = x + y"; + [%expect {|Infer error. Failed to unify types: string and int.|}] +;; + +let%expect_test "test_unification_types" = + pretty_printer_parse_and_infer "fun x -> x + true"; + [%expect {|Infer error. Failed to unify types: bool and int.|}] +;; + +let%expect_test "test_option_type_error" = + pretty_printer_parse_and_infer + "let f x = Some (x + 1) in let g y = Some (y && true) in f = g"; + [%expect {|Infer error. Failed to unify types: bool and int.|}] +;; + diff --git a/EML/tests/parser_tests.ml b/EML/tests/parser_tests.ml index 67e6349d..edee0c37 100644 --- a/EML/tests/parser_tests.ml +++ b/EML/tests/parser_tests.ml @@ -53,3 +53,182 @@ let main = fac 4 |}; ] |}] ;; + + +let%expect_test "factorial" = + parse_test "let rec factorial n = if n < 2 then 1 else n * factorial(n - 1);;"; + [%expect + {| + [(SValue (Rec, + ((PatVariable "factorial"), + (ExpLambda ((PatVariable "n"), [], + (ExpBranch ( + (ExpBinOper (LowerThan, (ExpIdent "n"), (ExpConst (ConstInt 2)))), + (ExpConst (ConstInt 1)), + (Some (ExpBinOper (Multiply, (ExpIdent "n"), + (ExpApply ((ExpIdent "factorial"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 1)))) + )) + ))) + )) + ))), + [])) + ] +|}] +;; + +let%expect_test "fibonacci" = + parse_test "let rec fibo n = if n < 2 then 1 else fibo(n - 1) + fibo(n - 2) ;;"; + [%expect + {| + [(SValue (Rec, + ((PatVariable "fibo"), + (ExpLambda ((PatVariable "n"), [], + (ExpBranch ( + (ExpBinOper (LowerThan, (ExpIdent "n"), (ExpConst (ConstInt 2)))), + (ExpConst (ConstInt 1)), + (Some (ExpBinOper (Plus, + (ExpApply ((ExpIdent "fibo"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 1)))) + )), + (ExpApply ((ExpIdent "fibo"), + (ExpBinOper (Minus, (ExpIdent "n"), + (ExpConst (ConstInt 2)))) + )) + ))) + )) + ))), + [])) + ] +|}] +;; + +let%expect_test "lambda_test" = + parse_test "let add x = fun y -> x + y;;"; + [%expect + {| + [(SValue (NonRec, + ((PatVariable "add"), + (ExpLambda ((PatVariable "x"), [], + (ExpLambda ((PatVariable "y"), [], + (ExpBinOper (Plus, (ExpIdent "x"), (ExpIdent "y"))))) + ))), + [])) + ] +|}] +;; + +let%expect_test "test_tuple" = + parse_test "let x = (1, 2, true) in x;;"; + [%expect + {| + [(SEval + (ExpLet (NonRec, + ((PatVariable "x"), + (ExpTuple ((ExpConst (ConstInt 1)), (ExpConst (ConstInt 2)), + [(ExpConst (ConstBool true))]))), + [], (ExpIdent "x")))) + ] +|}] +;; + +let%expect_test "test_list" = + parse_test "let arr = [1;2;true]"; + [%expect + {| + [(SValue (NonRec, + ((PatVariable "arr"), + (ExpConstruct ("::", + (Some (ExpTuple ((ExpConst (ConstInt 1)), + (ExpConstruct ("::", + (Some (ExpTuple ((ExpConst (ConstInt 2)), + (ExpConstruct ("::", + (Some (ExpTuple ((ExpConst (ConstBool true)), + (ExpConstruct ("[]", None)), + []))) + )), + []))) + )), + []))) + ))), + [])) + ] +|}] +;; + +let%expect_test "test_one_element_in_tuple" = + parse_test "let x = (666)"; + [%expect + {| + [(SValue (NonRec, ((PatVariable "x"), (ExpConst (ConstInt 666))), []))] +|}] +;; + +let%expect_test "test_sum_two_args" = + parse_test "let sum x y = x + y"; + [%expect + {| +[(SValue (NonRec, + ((PatVariable "sum"), + (ExpLambda ((PatVariable "x"), [(PatVariable "y")], + (ExpBinOper (Plus, (ExpIdent "x"), (ExpIdent "y")))))), + [])) + ] +|}] +;; + +let%expect_test "test_annotate_type_1" = + parse_test "let sum (x:int) (y:int) = x + y"; + [%expect + {| +[(SValue (NonRec, + ((PatVariable "sum"), + (ExpLambda ((PatType ((PatVariable "x"), (TyPrim "int"))), + [(PatType ((PatVariable "y"), (TyPrim "int")))], + (ExpBinOper (Plus, (ExpIdent "x"), (ExpIdent "y")))))), + [])) + ] +|}] +;; + +let%expect_test "test_annotate_type_2" = + parse_test "let (a : int list) = [] "; + [%expect + {| +[(SValue (NonRec, + ((PatType ((PatVariable "a"), (TyList (TyPrim "int")))), + (ExpConstruct ("[]", None))), + [])) + ] +|}] +;; + +let%expect_test "test_minus" = + parse_test "-1 -2 - (-1) -(3)"; + [%expect + {| +[(SEval + (ExpBinOper (Minus, + (ExpBinOper (Minus, + (ExpBinOper (Minus, + (ExpUnarOper (Negative, (ExpConst (ConstInt 1)))), + (ExpConst (ConstInt 2)))), + (ExpUnarOper (Negative, (ExpConst (ConstInt 1)))))), + (ExpConst (ConstInt 3))))) + ] + |}] +;; + +let%expect_test "test_unit" = + parse_test "let () = print_int 5"; + [%expect + {| +[(SValue (NonRec, + ((PatConstruct ("()", None)), + (ExpApply ((ExpIdent "print_int"), (ExpConst (ConstInt 5))))), + [])) + ] + |}] +;; \ No newline at end of file From b3fdcc8278f55d9f896f58a0368461688be15161 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 02:53:12 +0300 Subject: [PATCH 54/74] add some infer tests --- EML/tests/inferencer_tests.ml | 113 ++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index f19dbd3b..4a77be5b 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -198,3 +198,116 @@ let%expect_test "test_option_type_error" = [%expect {|Infer error. Failed to unify types: bool and int.|}] ;; + +let%expect_test "test_polymorphic_identity" = + pretty_printer_parse_and_infer + {| let id x = x +let a = id 1 +let b = id true |}; + [%expect + {| + val a: int + val b: bool + val id: t0 -> t0|}] +;; + +let%expect_test "test_polymorphic_tuple_use" = + pretty_printer_parse_and_infer + {| let id x = x +let pair = (id 1, id true) |}; + [%expect + {| + val id: t0 -> t0 + val pair: (int * bool)|}] +;; + +let%expect_test "test_higher_order_function" = + pretty_printer_parse_and_infer + {| let apply f x = f x +let inc x = x + 1 +let main = apply inc 10 |}; + [%expect + {| + val apply: (t1 -> t2) -> t1 -> t2 + val inc: int -> int + val main: int|}] +;; + +let%expect_test "test_lambda_returning_lambda" = + pretty_printer_parse_and_infer + {| let add x = fun y -> x + y +let f = add 5 |}; + [%expect + {| + val add: int -> int -> int + val f: int -> int|}] +;; + +let%expect_test "test_partial_application" = + pretty_printer_parse_and_infer + {| let add x y = x + y +let inc = add 1 +let main = inc 10 |}; + [%expect + {| + val add: int -> int -> int + val inc: int -> int + val main: int|}] +;; + +let%expect_test "test_tuple_pattern" = + pretty_printer_parse_and_infer + {| let sum_pair (x, y) = x + y +let main = sum_pair (3, 4) |}; + [%expect + {| + val main: int + val sum_pair: (int * int) -> int|}] +;; + +let%expect_test "test_nested_let_scope" = + pretty_printer_parse_and_infer + {| let x = 10 +let f y = + let x = y + 1 in + x + y +let main = f 5 |}; + [%expect + {| + val f: int -> int + val main: int + val x: int|}] +;; + +let%expect_test "test_function_composition" = + pretty_printer_parse_and_infer + {| let compose f g x = f (g x) +let inc x = x + 1 +let double x = x * 2 +let main = compose inc double 10 |}; + [%expect + {| + val compose: (t3 -> t4) -> (t2 -> t3) -> t2 -> t4 + val double: int -> int + val inc: int -> int + val main: int|}] +;; + +let%expect_test "test_occurs_check_error" = + pretty_printer_parse_and_infer + {| fun x -> x x |}; + [%expect + {|Infer error. Occurs check failed. Type variable 't0' occurs inside t0 -> t1.|}] +;; + +let%expect_test "test_list_polymorphism" = + pretty_printer_parse_and_infer + {| let singleton x = [x] +let a = singleton 1 +let b = singleton true |}; + [%expect + {| + val a: int list + val b: bool list + val singleton: t0 -> t0 list|}] +;; \ No newline at end of file From fd3b9e633664b419a01cca81c5595ffb2954c30a Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 03:16:27 +0300 Subject: [PATCH 55/74] add infer tests 2 --- EML/tests/inferencer_tests.ml | 133 ++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 4a77be5b..16d917e1 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -21,6 +21,12 @@ let pretty_printer_parse_and_infer s = | Error e -> Format.printf "Parsing error. %s\n" e ;; +let pretty_printer_infer_simple_expression expr = + match infer_simple_expression expr with + | Ok ty -> Format.printf "%a\n" pp_ty ty + | Error e -> Format.printf "Infer error. %a\n" pp_error e +;; + let%expect_test "test_factorial" = pretty_printer_parse_and_infer @@ -310,4 +316,131 @@ let b = singleton true |}; val a: int list val b: bool list val singleton: t0 -> t0 list|}] +;; + +let%expect_test "test_nonrec_tuple_pattern_binding" = + pretty_printer_parse_and_infer + {| let (x, y) = (1, true) +let main = x |}; + [%expect + {| + val main: int + val x: int + val y: bool|}] +;; + +let%expect_test "test_match_option_none_some" = + pretty_printer_parse_and_infer + {| let unwrap_or_zero o = + match o with + | None -> 0 + | Some x -> x +let main = unwrap_or_zero None |}; + [%expect + {| + val main: int + val unwrap_or_zero: int option -> int|}] +;; + +let%expect_test "test_match_list_literal_pattern" = + pretty_printer_parse_and_infer + {| let sum2 xs = + match xs with + | [a; b] -> a + b + | _ -> 0 +let main = sum2 [1; 2] |}; + [%expect + {| + val main: int + val sum2: int list -> int|}] +;; + +let%expect_test "test_lambda_wildcard_and_unit_pattern" = + pretty_printer_parse_and_infer + {| let ignore_first _ y = y +let run () = ignore_first 1 42 +let main = run () |}; + [%expect + {| + val ignore_first: t0 -> t1 -> t1 + val main: int + val run: unit -> int|}] +;; + + +let%expect_test "test_rec_lhs_not_variable_error" = + pretty_printer_parse_and_infer {| let rec Some x = Some 1 |}; + [%expect {|Infer error. Left-hand side error: Only variables are allowed on the left-hand side of let rec.|}] +;; + +let%expect_test "test_expr_let_rec_in" = + pretty_printer_parse_and_infer + {| let main = + let rec fact n = if n = 0 then 1 else n * fact (n - 1) in + fact 4 |}; + [%expect + {| + val main: int|}] +;; + +let%expect_test "test_expr_let_rec_and_in" = + pretty_printer_infer_simple_expression + (ExpLet + ( Rec + , (PatVariable "f", ExpLambda (PatVariable "x", [], ExpIdent "x")) + , [ PatVariable "g", ExpLambda (PatVariable "y", [], ExpIdent "y") ] + , ExpApply (ExpIdent "g", ExpConst (ConstInt 1)) )); + [%expect {|int|}] +;; + + +let%expect_test "test_string_const_and_const_pattern" = + pretty_printer_parse_and_infer + {| let is_hi s = + match s with + | "hi" -> true + | _ -> false +let main = is_hi "hello" |}; + [%expect + {| + val is_hi: string -> bool + val main: bool|}] +;; + +let%expect_test "test_ast_exp_list_empty" = + pretty_printer_infer_simple_expression (ExpList []); + [%expect {|t0 list|}] +;; + +let%expect_test "test_ast_exp_list_non_empty" = + pretty_printer_infer_simple_expression + (ExpList [ ExpConst (ConstInt 1); ExpConst (ConstInt 2) ]); + [%expect {|int list|}] +;; + +let%expect_test "test_ast_exp_option_none" = + pretty_printer_infer_simple_expression (ExpOption None); + [%expect {|t0 option|}] +;; + +let%expect_test "test_ast_exp_option_some" = + pretty_printer_infer_simple_expression (ExpOption (Some (ExpConst (ConstInt 1)))); + [%expect {|int option|}] +;; + +let%expect_test "test_ast_pattern_option_lambda" = + pretty_printer_infer_simple_expression + (ExpLambda (PatOption (Some (PatVariable "x")), [], ExpIdent "x")); + [%expect {|t0 option -> t0|}] +;; + +let%expect_test "test_ast_pattern_list_lambda" = + pretty_printer_infer_simple_expression + (ExpLambda (PatList [ PatVariable "x"; PatVariable "y" ], [], ExpIdent "x")); + [%expect {|t1 list -> t1|}] +;; + +let%expect_test "test_ast_pattern_unit_lambda" = + pretty_printer_infer_simple_expression (ExpLambda (PatUnit, [], ExpConst (ConstInt 1))); + [%expect {|unit -> int|}] ;; \ No newline at end of file From f15711cc2b09e3a478584c3fc08463bb5350ae01 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 18:05:46 +0300 Subject: [PATCH 56/74] ref --- EML/lib/backend/ricsv/analysis.ml | 61 ++--- EML/lib/backend/ricsv/architecture.ml | 20 +- EML/lib/backend/ricsv/auxillary.ml | 64 +++-- EML/lib/backend/ricsv/generator.ml | 355 ++++++++++++++++---------- 4 files changed, 300 insertions(+), 200 deletions(-) diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index b021bdb3..c3b68500 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -48,9 +48,9 @@ and slots_in_cexpr = function +8 for spill_caller_saved_vars_to_frame at start of every invocation (can spill a0-a7). +N when nargs >= 2: margin so partial stays above argv (confirmed: overwrite → eml_applyN gets c=0x3). *) let args = first :: second :: rest in - let nargs = List.length args in - let extra = if nargs >= 2 then 12 else 0 in - 1 + 8 + nargs + extra + sum_by slots_in_imm args + let argument_count = List.length args in + let additional_margin = if argument_count >= 2 then 12 else 0 in + 1 + 8 + argument_count + additional_margin + sum_by slots_in_imm args | ComplexOption None -> 0 | ComplexOption (Some imm) -> slots_in_imm imm | ComplexLambda (_, body) -> slots_in_anf body @@ -71,11 +71,11 @@ let rec max_stack_args_cexpr = function | ComplexField (imm, _) -> max_stack_args_imm imm | ComplexList imm_list -> max_by max_stack_args_imm imm_list | ComplexApp (_first, second, rest) -> - let nargs = 1 + List.length rest in + let argument_count = 1 + List.length rest in (* Reserve enough for largest call: eml_applyN needs nargs words; direct needs max(0, nargs-8). *) - let need = nargs in - let in_args = max_by max_stack_args_imm (second :: rest) in - max need in_args + let required_stack_words = argument_count in + let max_nested_argument_pressure = max_by max_stack_args_imm (second :: rest) in + max required_stack_words max_nested_argument_pressure | ComplexOption None -> 0 | ComplexOption (Some imm) -> max_stack_args_imm imm | ComplexLambda (_, body) -> max_stack_args_anf body @@ -104,9 +104,9 @@ let rec max_create_tuple_array_cexpr = function | ComplexField (imm, _) -> max_create_tuple_array_imm imm | ComplexList imm_list -> (* Each cons adds 16 bytes; they accumulate along the list build *) - let per_cons = 2 * word_size in - let from_elts = sum_by max_create_tuple_array_imm imm_list in - (per_cons * List.length imm_list) + from_elts + let bytes_per_cons_cell = 2 * word_size in + let bytes_from_elements = sum_by max_create_tuple_array_imm imm_list in + (bytes_per_cons_cell * List.length imm_list) + bytes_from_elements | ComplexApp (_f, second, rest) -> max_by max_create_tuple_array_imm (second :: rest) | ComplexOption None -> 0 @@ -135,18 +135,20 @@ let rec params_of_anf = function | _ -> None) pats in - let rest, inner = params_of_anf body in - imms @ rest, inner + let remaining_parameters, inner_body = params_of_anf body in + imms @ remaining_parameters, inner_body | other -> [], other ;; let arity_map_of_program (program : anf_program) = - let add_fun_arity map (id, arity, _) = Base.Map.set map ~key:id ~data:arity in + let add_function_arity map (function_identifier, arity, _) = + Base.Map.set map ~key:function_identifier ~data:arity + in List.fold_left (fun map -> function - | AnfValue (_, (fid, arity, _), and_binds) -> - let map = Base.Map.set map ~key:fid ~data:arity in - List.fold_left add_fun_arity map and_binds + | AnfValue (_, (function_identifier, arity, _), and_binds) -> + let map = Base.Map.set map ~key:function_identifier ~data:arity in + List.fold_left add_function_arity map and_binds | _ -> map) (Base.Map.empty (module Base.String)) program @@ -154,7 +156,7 @@ let arity_map_of_program (program : anf_program) = let analyze (program : anf_program) = let arity_map = arity_map_of_program program in - let raw = + let analyzed_functions_raw = List.filter_map (function | AnfValue (_, (func_name, arity, body), _) -> @@ -170,13 +172,14 @@ let analyze (program : anf_program) = | AnfEval _ -> None) program in - let counts = ref (Base.Map.empty (module Base.String)) in + let generated_name_counts = ref (Base.Map.empty (module Base.String)) in let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in - let asm_name name = + let build_asm_name name = let base = mangle_reserved name in - let n = Base.Map.find !counts name |> Option.value ~default:0 in - counts := Base.Map.set !counts ~key:name ~data:(n + 1); - if n = 0 then base else base ^ "_" ^ Int.to_string n + let duplicate_index = Base.Map.find !generated_name_counts name |> Option.value ~default:0 in + generated_name_counts := + Base.Map.set !generated_name_counts ~key:name ~data:(duplicate_index + 1); + if duplicate_index = 0 then base else base ^ "_" ^ Int.to_string duplicate_index in let functions = List.map @@ -188,14 +191,14 @@ let analyze (program : anf_program) = , max_stack_args , max_create_tuple_array_bytes ) -> { func_name - ; asm_name = asm_name func_name + ; asm_name = build_asm_name func_name ; params ; body ; slots_count ; max_stack_args ; max_create_tuple_array_bytes }) - raw + analyzed_functions_raw in let has_main = List.exists (fun fn -> String.equal fn.func_name "main") functions in let functions = @@ -217,17 +220,17 @@ let analyze (program : anf_program) = let arity_map = if has_main then arity_map else Base.Map.set arity_map ~key:"main" ~data:0 in - let resolver func_index var_name = - let rec find = function + let resolver current_function_index variable_name = + let rec find_visible_function = function | i when i < 0 -> None | i -> (match Base.List.nth functions i with | None -> None - | Some fn when String.equal fn.func_name var_name -> + | Some fn when String.equal fn.func_name variable_name -> Some (fn.asm_name, List.length fn.params) - | Some _ -> find (i - 1)) + | Some _ -> find_visible_function (i - 1)) in - find (func_index - 1) + find_visible_function (current_function_index - 1) in { arity_map; functions; resolve = resolver } ;; diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index 0956b77b..b13ed18d 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -77,7 +77,7 @@ module Riscv_backend = struct | Ret -> fprintf ppf "ret" ;; - let tag_int n = 1 + (n lsl 1) + let tag_int n = 2 * n + 1 let fp = S 0 let sp = SP let ra = RA @@ -129,18 +129,18 @@ module Riscv_backend = struct (* addi/sd/ld immediate is 12-bit signed: -2048 .. 2047 *) let max_addi_imm = 2048 - let rec sub_sp n = - if n <= 0 + let rec sub_sp bytes_to_subtract = + if bytes_to_subtract <= 0 then [] - else if n <= max_addi_imm - then addi sp sp (-n) - else addi sp sp (-max_addi_imm) @ sub_sp (n - max_addi_imm) + else if bytes_to_subtract <= max_addi_imm + then addi sp sp (-bytes_to_subtract) + else addi sp sp (-max_addi_imm) @ sub_sp (bytes_to_subtract - max_addi_imm) ;; - let addi_or_li_add rd rs imm = - if imm >= -max_addi_imm && imm <= max_addi_imm - 1 - then addi rd rs imm - else li t0 imm @ add rd rs t0 + let addi_or_li_add destination_register source_register immediate_value = + if immediate_value >= -max_addi_imm && immediate_value <= max_addi_imm - 1 + then addi destination_register source_register immediate_value + else li t0 immediate_value @ add destination_register source_register t0 ;; (* Store at sp+offset; use direct sd when offset in 12-bit range *) diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml index d4db632f..54ef2551 100644 --- a/EML/lib/backend/ricsv/auxillary.ml +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -52,46 +52,60 @@ let bin_op dst op left_reg right_reg : (instr list, string) result = let bin_oper_to_string = Utils.Pretty_printer.string_of_bin_op -let vars_in_caller_saved_regs env = - Base.Map.to_alist env - |> List.filter_map (fun (name, loc) -> - match loc with - | Loc_reg r when is_caller_saved r -> Some (name, r) +let vars_in_caller_saved_regs environment = + Base.Map.to_alist environment + |> List.filter_map (fun (variable_name, variable_location) -> + match variable_location with + | Loc_reg register when is_caller_saved register -> Some (variable_name, register) | _ -> None) ;; -let indices_of_args_to_spill state exps = - let rewrites_result_reg = function +let indices_of_args_to_spill generation_state immediate_arguments = + let argument_overwrites_result_register = function | ImmediateConst _ -> false - | ImmediateVar id -> Base.Map.mem state.arity_map id + | ImmediateVar function_name -> Base.Map.mem generation_state.arity_map function_name in - Base.List.foldi exps ~init:[] ~f:(fun i acc arg -> - if rewrites_result_reg arg then i :: acc else acc) + Base.List.foldi immediate_arguments ~init:[] ~f:(fun argument_index spilled_indices immediate_argument -> + if argument_overwrites_result_register immediate_argument + then argument_index :: spilled_indices + else spilled_indices) |> List.rev ;; type call_style = | Nullary of string - | Curry_chain of - { fname : string + | CurryChain of + { function_name : string ; arity : int - ; first_args : immediate list - ; rest_args : immediate list + ; initial_arguments : immediate list + ; remaining_arguments : immediate list } | Direct of - { fname : string - ; args : immediate list + { function_name : string + ; arguments : immediate list } - | Via_apply_nargs of - { fname : string - ; nargs : int - ; args : immediate list + | ViaApplyNargs of + { function_name : string + ; argument_count : int + ; arguments : immediate list } -let classify_call ~nargs ~callee_arity_opt ~fname ~args : call_style = +let classify_call + ~argument_count + ~callee_arity_opt + ~function_name + ~arguments + : call_style + = match callee_arity_opt with - | Some 0 when nargs = 1 -> Nullary fname - | Some arity when nargs > arity -> Curry_chain { fname; arity; first_args = Base.List.take args arity; rest_args = Base.List.drop args arity } - | Some arity when nargs = arity -> Direct { fname; args } - | _ -> Via_apply_nargs { fname; nargs; args } + | Some 0 when argument_count = 1 -> Nullary function_name + | Some arity when argument_count > arity -> + CurryChain + { function_name + ; arity + ; initial_arguments = Base.List.take arguments arity + ; remaining_arguments = Base.List.drop arguments arity + } + | Some arity when argument_count = arity -> Direct { function_name; arguments } + | _ -> ViaApplyNargs { function_name; argument_count; arguments } ;; diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 1131dfc6..705fc4ff 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -17,20 +17,20 @@ let alloc_frame_slot = return (fp, -state.frame_offset) ;; -let store_reg_into_frame reg = +let store_reg_into_frame source_register = let* slot = alloc_frame_slot in - let* () = append (sd reg slot) in + let* () = append (sd source_register slot) in return (Loc_mem slot) ;; -let load_into_reg dst_reg loc = - let instrs = - match loc with - | Loc_reg src_reg when equal_reg src_reg dst_reg -> [] - | Loc_reg src_reg -> mv dst_reg src_reg - | Loc_mem ofs -> ld dst_reg ofs +let load_into_reg destination_register source_location = + let instructions = + match source_location with + | Loc_reg source_register when equal_reg source_register destination_register -> [] + | Loc_reg source_register -> mv destination_register source_register + | Loc_mem source_offset -> ld destination_register source_offset in - let* () = append instrs in + let* () = append instructions in return () ;; @@ -41,52 +41,62 @@ let spill_params_to_frame params_reg = let* () = acc in match param with | ImmediateVar name -> - let reg = List.nth arg_regs index in - let* slot = store_reg_into_frame reg in + let argument_register = List.nth arg_regs index in + let* slot = store_reg_into_frame argument_register in modify_env (fun env -> Base.Map.set env ~key:name ~data:slot) | _ -> return ()) ;; let spill_caller_saved_vars_to_frame = let* env = get_env in - let vars = vars_in_caller_saved_regs env in - let frame_bytes = List.length vars * word_size in + let caller_saved_variables = vars_in_caller_saved_regs env in + let frame_bytes = List.length caller_saved_variables * word_size in let* () = if frame_bytes > 0 then append (addi sp sp (-frame_bytes)) else return () in - let rec spill env = function - | [] -> return env - | (name, r) :: rest -> - let* new_loc = store_reg_into_frame r in - spill (Base.Map.set env ~key:name ~data:new_loc) rest + let rec spill environment = function + | [] -> return environment + | (variable_name, register) :: remaining_variables -> + let* new_location = store_reg_into_frame register in + spill + (Base.Map.set environment ~key:variable_name ~data:new_location) + remaining_variables in - let* new_env = spill env vars in - set_env new_env + let* updated_environment = spill env caller_saved_variables in + set_env updated_environment ;; -let evacuate_reg dst = - let is_reg_used env r = - Base.Map.exists env ~f:(fun loc -> - match loc with - | Loc_reg r' -> equal_reg r r' +let evacuate_reg destination_register = + let is_register_used environment register = + Base.Map.exists environment ~f:(fun location -> + match location with + | Loc_reg mapped_register -> equal_reg register mapped_register | Loc_mem _ -> false) in - let rewrite_loc_in_env env from_reg to_loc = - Base.Map.map env ~f:(function - | Loc_reg r when equal_reg r from_reg -> to_loc - | loc -> loc) + let rewrite_location_in_environment environment from_register to_location = + Base.Map.map environment ~f:(function + | Loc_reg mapped_register when equal_reg mapped_register from_register -> to_location + | location -> location) in let* env = get_env in - if not (is_reg_used env dst) + if not (is_register_used env destination_register) then return () else ( - match List.find_opt (fun r -> not (is_reg_used env r)) candidate_regs_for_spill with - | Some new_reg -> - let* () = append (mv new_reg dst) in - let new_env = rewrite_loc_in_env env dst (Loc_reg new_reg) in - set_env new_env + match + List.find_opt + (fun candidate_register -> not (is_register_used env candidate_register)) + candidate_regs_for_spill + with + | Some free_register -> + let* () = append (mv free_register destination_register) in + let updated_environment = + rewrite_location_in_environment env destination_register (Loc_reg free_register) + in + set_env updated_environment | None -> - let* new_loc = store_reg_into_frame dst in - let new_env = rewrite_loc_in_env env dst new_loc in - set_env new_env) + let* spilled_location = store_reg_into_frame destination_register in + let updated_environment = + rewrite_location_in_environment env destination_register spilled_location + in + set_env updated_environment) ;; let resolve_call_symbol name = @@ -132,126 +142,197 @@ let copy_result_to dst = if equal_reg dst result_reg then return () else append (mv dst result_reg) ;; -let spill_dangerous_args state exps = - let dangerous_idxs = indices_of_args_to_spill state exps in - let spill_slots = List.length dangerous_idxs * word_size in +let spill_dangerous_args state arguments = + let dangerous_argument_indices = indices_of_args_to_spill state arguments in + let spill_slots = List.length dangerous_argument_indices * word_size in let* () = if spill_slots > 0 then append (addi sp sp (-spill_slots)) else return () in Base.List.foldi - exps + arguments ~init:(return (Base.Map.empty (module Base.Int))) - ~f:(fun i acc arg -> - let* spilled = acc in - if List.mem i dangerous_idxs + ~f:(fun argument_index acc argument -> + let* spilled_locations_by_index = acc in + if List.mem argument_index dangerous_argument_indices then - let* () = gen_imm result_reg arg in - let* loc = store_reg_into_frame result_reg in - return (Base.Map.add_exn spilled ~key:i ~data:loc) - else return spilled) + let* () = gen_imm result_reg argument in + let* spilled_location = store_reg_into_frame result_reg in + return + (Base.Map.add_exn + spilled_locations_by_index + ~key:argument_index + ~data:spilled_location) + else return spilled_locations_by_index) ;; -let load_exps_into_regs spilled_locs arg_regs exps = - let n = min (List.length exps) (List.length arg_regs) in - Base.List.foldi (Base.List.take exps n) ~init:(return ()) ~f:(fun i acc arg -> +let load_immediates_into_registers spilled_locations argument_registers immediate_arguments = + let immediate_count_to_load = + min (List.length immediate_arguments) (List.length argument_registers) + in + Base.List.foldi + (Base.List.take immediate_arguments immediate_count_to_load) + ~init:(return ()) + ~f:(fun argument_index acc immediate_argument -> let* () = acc in - let reg = List.nth arg_regs i in - match Base.Map.find spilled_locs i with - | Some loc -> load_into_reg reg loc - | None -> gen_imm reg arg) + let destination_register = List.nth argument_registers argument_index in + match Base.Map.find spilled_locations argument_index with + | Some spilled_location -> load_into_reg destination_register spilled_location + | None -> gen_imm destination_register immediate_argument) ;; -let emit_args_to_stack spilled args = - Base.List.foldi args ~init:(return ()) ~f:(fun i acc arg -> +let emit_arguments_to_stack spilled_arguments arguments = + Base.List.foldi arguments ~init:(return ()) ~f:(fun argument_index acc argument -> let* () = acc in - let offset = i * word_size in + let stack_offset = argument_index * word_size in let* () = - match Base.Map.find spilled i with - | Some loc -> load_into_reg t0 loc - | None -> gen_imm t0 arg + match Base.Map.find spilled_arguments argument_index with + | Some spilled_location -> load_into_reg t0 spilled_location + | None -> gen_imm t0 argument in - append (sd t0 (sp, offset))) + append (sd t0 (sp, stack_offset))) ;; let push_stack_args stack_args = - let n = List.length stack_args in - if n = 0 + let stack_argument_count = List.length stack_args in + if stack_argument_count = 0 then return 0 else ( - let stack_bytes = n * word_size in + let stack_bytes = stack_argument_count * word_size in let* () = append (addi sp sp (-stack_bytes)) in - let no_spills = Base.Map.empty (module Base.Int) in - let* () = emit_args_to_stack no_spills stack_args in + let no_spilled_arguments = Base.Map.empty (module Base.Int) in + let* () = emit_arguments_to_stack no_spilled_arguments stack_args in return stack_bytes) ;; -let gen_call_with_regs dst regs args spilled symbol = - let* () = load_exps_into_regs spilled regs args in - let stack_args = Base.List.drop args (List.length regs) in - let* reserved = push_stack_args stack_args in - let* () = append (call symbol) in - let* () = copy_result_to dst in - if reserved > 0 then append (addi sp sp reserved) else return () +let gen_call_with_regs + destination_register + argument_registers + call_arguments + spilled_arguments + function_symbol + = + let* () = + load_immediates_into_registers + spilled_arguments + argument_registers + call_arguments + in + let stack_arguments = Base.List.drop call_arguments (List.length argument_registers) in + let* reserved_stack_bytes = push_stack_args stack_arguments in + let* () = append (call function_symbol) in + let* () = copy_result_to destination_register in + if reserved_stack_bytes > 0 + then append (addi sp sp reserved_stack_bytes) + else return () ;; (* let foo = ... in foo () *) -let gen_nullary dst fname = - let* sym = resolve_call_symbol fname in - let* () = append (call sym) in - copy_result_to dst +let gen_nullary destination_register function_name = + let* resolved_symbol = resolve_call_symbol function_name in + let* () = append (call resolved_symbol) in + copy_result_to destination_register ;; -let gen_direct_call dst fname args spilled = - let* sym = resolve_call_symbol fname in - gen_call_with_regs dst arg_regs args spilled sym +let gen_direct_call destination_register function_name call_arguments spilled_arguments = + let* resolved_symbol = resolve_call_symbol function_name in + gen_call_with_regs + destination_register + arg_regs + call_arguments + spilled_arguments + resolved_symbol ;; -let gen_via_apply_nargs dst fname nargs args spilled = - let argv_bytes = nargs * word_size in - let* () = gen_imm a0 (ImmediateVar fname) in - let* () = append (li a1 nargs) in +let gen_via_apply_nargs + destination_register + function_name + argument_count + call_arguments + spilled_arguments + = + let argv_bytes = argument_count * word_size in + let* () = gen_imm a0 (ImmediateVar function_name) in + let* () = append (li a1 argument_count) in let* () = append (addi sp sp (-argv_bytes)) in - let* () = emit_args_to_stack spilled args in + let* () = emit_arguments_to_stack spilled_arguments call_arguments in let* () = append (mv a2 sp) in let* () = append (call "eml_applyN") in - let* () = copy_result_to dst in + let* () = copy_result_to destination_register in append (addi sp sp argv_bytes) ;; -let rec gen_invocation dst fname args = +let rec gen_invocation destination_register function_name call_arguments = let* () = spill_caller_saved_vars_to_frame in let* state = get in - let* spilled = spill_dangerous_args state args in - let nargs = List.length args in - let callee_arity_opt = Base.Map.find state.arity_map fname in - let style = classify_call ~nargs ~callee_arity_opt ~fname ~args in + let* spilled_arguments = spill_dangerous_args state call_arguments in + let argument_count = List.length call_arguments in + let callee_arity_opt = Base.Map.find state.arity_map function_name in + let style = + classify_call + ~argument_count + ~callee_arity_opt + ~function_name + ~arguments:call_arguments + in match style with - | Nullary name -> gen_nullary dst name - | Curry_chain { fname = fn; arity; first_args; rest_args } -> - gen_curried_call dst fn arity first_args rest_args - | Direct { fname = fn; args = a } -> gen_direct_call dst fn a spilled - | Via_apply_nargs { fname = fn; nargs = n; args = a } -> - gen_via_apply_nargs dst fn n a spilled - -and gen_curried_call dst fname _arity first_args rest_args = + | Nullary resolved_function_name -> + gen_nullary destination_register resolved_function_name + | CurryChain { function_name; arity; initial_arguments; remaining_arguments } -> + gen_curried_call destination_register function_name arity initial_arguments remaining_arguments + | Direct { function_name; arguments } -> + gen_direct_call destination_register function_name arguments spilled_arguments + | ViaApplyNargs { function_name; argument_count; arguments } -> + gen_via_apply_nargs + destination_register + function_name + argument_count + arguments + spilled_arguments + +and gen_curried_call + destination_register + function_name + _arity + initial_arguments + remaining_arguments + = let* part_name = fresh_partial in let* () = gen_cexpr - dst - (ComplexApp (ImmediateVar fname, List.hd first_args, List.tl first_args)) + destination_register + (ComplexApp + ( ImmediateVar function_name + , List.hd initial_arguments + , List.tl initial_arguments )) + in + let* partial_function_location = store_reg_into_frame destination_register in + let* () = + modify_env + (fun environment -> + Base.Map.set + environment + ~key:part_name + ~data:partial_function_location) in - let* loc = store_reg_into_frame dst in - let* () = modify_env (fun env -> Base.Map.set env ~key:part_name ~data:loc) in (* Apply each rest_arg one at a time (eml_applyN expects one application per call) *) - let rec apply_rest = function + let rec apply_remaining_arguments = function | [] -> return () - | [ arg ] -> gen_cexpr dst (ComplexApp (ImmediateVar part_name, arg, [])) - | arg :: rest -> - let* () = gen_cexpr dst (ComplexApp (ImmediateVar part_name, arg, [])) in - let* loc' = store_reg_into_frame dst in - let* () = modify_env (fun env -> Base.Map.set env ~key:part_name ~data:loc') in - apply_rest rest + | [ argument ] -> + gen_cexpr destination_register (ComplexApp (ImmediateVar part_name, argument, [])) + | argument :: remaining_arguments_tail -> + let* () = + gen_cexpr + destination_register + (ComplexApp (ImmediateVar part_name, argument, [])) + in + let* updated_partial_location = store_reg_into_frame destination_register in + let* () = + modify_env + (fun environment -> + Base.Map.set environment ~key:part_name ~data:updated_partial_location) + in + apply_remaining_arguments remaining_arguments_tail in - apply_rest rest_args + apply_remaining_arguments remaining_arguments and gen_unit dst = append (li dst (tag_int 0)) @@ -264,11 +345,11 @@ and gen_not dst op = let* () = gen_imm t0 op in append (xori dst t0 (tag_int 1)) -and gen_binop dst op left right = - let* () = gen_imm t0 left in - let* () = gen_imm t1 right in +and gen_binop dst binary_operator left_operand right_operand = + let* () = gen_imm t0 left_operand in + let* () = gen_imm t1 right_operand in let* () = evacuate_reg dst in - match bin_op dst (bin_oper_to_string op) t0 t1 with + match bin_op dst (bin_oper_to_string binary_operator) t0 t1 with | Ok instrs -> append instrs | Error msg -> fail msg @@ -278,16 +359,16 @@ and gen_branch dst cond then_e else_e = (* Branch to else when cond equals tagged false (1); not zero *) let* () = append (li t1 (tag_int 0)) in let* () = append (beq t0 t1 else_lbl) in - let* st_before_then = get in - let frame_before_then = st_before_then.frame_offset in + let* state_before_then = get in + let frame_offset_before_then = state_before_then.frame_offset in let* () = gen_anf dst then_e in let* () = append (j end_lbl) in - let* st_after_then = get in + let* state_after_then = get in let* () = put - { st_before_then with - frame_offset = frame_before_then - ; instr_buffer = st_after_then.instr_buffer + { state_before_then with + frame_offset = frame_offset_before_then + ; instr_buffer = state_after_then.instr_buffer } in let* () = append (label else_lbl) in @@ -332,7 +413,7 @@ and gen_tuple dst e1 e2 rest = let* spilled = spill_dangerous_args state elts in let array_bytes = argc * word_size in let* () = append (addi sp sp (-array_bytes)) in - let* () = emit_args_to_stack spilled elts in + let* () = emit_arguments_to_stack spilled elts in let* () = append (li result_reg argc) in let* () = append (addi (List.nth arg_regs 1) sp 0) in let* () = append (call "create_tuple") in @@ -367,23 +448,23 @@ and gen_anf dst = function let bind_param_to_reg env i = function | ImmediateVar name -> - let r = List.nth arg_regs i in - return (Base.Map.set env ~key:name ~data:(Loc_reg r)) + let register = List.nth arg_regs i in + return (Base.Map.set env ~key:name ~data:(Loc_reg register)) | _ -> fail "unsupported pattern" ;; let bind_param_to_stack env i = function | ImmediateVar name -> - let off = (i + 2) * word_size in - return (Base.Map.set env ~key:name ~data:(Loc_mem (fp, off))) + let stack_offset = (i + 2) * word_size in + return (Base.Map.set env ~key:name ~data:(Loc_mem (fp, stack_offset))) | _ -> fail "unsupported pattern" ;; let flush_instr_buffer ppf = - let* st = get in - let buf = st.instr_buffer in - let* () = put { st with instr_buffer = [] } in - let () = List.iter (fun item -> format_item ppf item) (List.rev buf) in + let* state = get in + let instruction_buffer = state.instr_buffer in + let* () = put { state with instr_buffer = [] } in + let () = List.iter (fun item -> format_item ppf item) (List.rev instruction_buffer) in return () ;; @@ -407,8 +488,8 @@ let gen_func ~enable_gc asm_name params body frame_sz ppf = in let* () = set_env env in let* () = append (prologue ~enable_gc ~name:asm_name ~stack_size:frame_sz) in - let* st = get in - let* () = put { st with frame_offset = 0 } in + let* state = get in + let* () = put { state with frame_offset = 0 } in let* () = spill_params_to_frame params_reg in let* () = gen_anf result_reg body in let* () = append (epilogue ~enable_gc ~is_main:(String.equal asm_name "main")) in @@ -436,10 +517,12 @@ let gen_program ~enable_gc ppf (analysis : analysis_result) = } in let comp = - Base.List.foldi analysis.functions ~init:(return ()) ~f:(fun i acc fn -> + Base.List.foldi analysis.functions ~init:(return ()) ~f:(fun function_index acc fn -> let frame_sz = (2 + fn.slots_count) * word_size in let* () = acc in - let* () = modify (fun st -> { st with current_func_index = i }) in + let* () = + modify (fun state -> { state with current_func_index = function_index }) + in gen_func ~enable_gc fn.asm_name fn.params fn.body frame_sz ppf) in match run comp init with From c7e997847592981eda20d6ae1db92be5d7e05470 Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 19:26:50 +0300 Subject: [PATCH 57/74] fix zanuda --- EML/bin/EML.ml | 55 ++++++----- EML/bin/dune | 1 + EML/lib/backend/llvm_ir/analysis.ml | 30 +++--- EML/lib/backend/llvm_ir/analysis.mli | 25 +++++ EML/lib/backend/llvm_ir/architecture.ml | 36 +++++-- EML/lib/backend/llvm_ir/architecture.mli | 52 ++++++++++ EML/lib/backend/llvm_ir/generator.ml | 94 ++++++------------ EML/lib/backend/llvm_ir/generator.mli | 9 ++ EML/lib/backend/llvm_ir/generator_state.ml | 3 +- EML/lib/backend/llvm_ir/generator_state.mli | 53 ++++++++++ EML/lib/backend/llvm_ir/runner.ml | 8 +- EML/lib/backend/ricsv/analysis.ml | 64 +++++++------ EML/lib/backend/ricsv/analysis.mli | 27 ++++++ EML/lib/backend/ricsv/architecture.mli | 101 ++++++++++++++++++++ EML/lib/backend/ricsv/auxillary.mli | 42 ++++++++ EML/lib/backend/ricsv/generator.ml | 3 +- EML/lib/backend/ricsv/generator.mli | 9 ++ EML/lib/backend/ricsv/generator_state.ml | 10 +- EML/lib/backend/ricsv/generator_state.mli | 49 ++++++++++ EML/lib/frontend/inferencer.mli | 64 +++++++++++++ EML/lib/frontend/parser.mli | 5 + EML/lib/middleend/anf.ml | 9 +- EML/lib/middleend/anf.mli | 45 +++++++++ EML/lib/middleend/anf_pp.ml | 12 ++- EML/lib/middleend/anf_pp.mli | 6 ++ EML/lib/middleend/cc.ml | 12 +-- EML/lib/middleend/cc.mli | 8 ++ EML/lib/middleend/ll.ml | 26 ++--- EML/lib/middleend/ll.mli | 10 ++ EML/lib/runtime/primitives.mli | 24 +++++ EML/lib/utils/helpers.ml | 6 +- EML/lib/utils/helpers.mli | 8 ++ EML/lib/utils/monads.ml | 4 + EML/lib/utils/monads.mli | 4 + EML/lib/utils/pretty_printer.mli | 16 ++++ EML/tests/anf_tests.mli | 6 ++ EML/tests/cc_tests.mli | 5 + EML/tests/inferencer_tests.mli | 6 ++ EML/tests/ll_tests.mli | 5 + EML/tests/llvm_tests.mli | 6 ++ EML/tests/parser_tests.mli | 5 + EML/tests/riscv_tests.mli | 6 ++ 42 files changed, 787 insertions(+), 182 deletions(-) create mode 100644 EML/lib/backend/llvm_ir/analysis.mli create mode 100644 EML/lib/backend/llvm_ir/architecture.mli create mode 100644 EML/lib/backend/llvm_ir/generator.mli create mode 100644 EML/lib/backend/llvm_ir/generator_state.mli create mode 100644 EML/lib/backend/ricsv/analysis.mli create mode 100644 EML/lib/backend/ricsv/architecture.mli create mode 100644 EML/lib/backend/ricsv/auxillary.mli create mode 100644 EML/lib/backend/ricsv/generator.mli create mode 100644 EML/lib/backend/ricsv/generator_state.mli create mode 100644 EML/lib/frontend/inferencer.mli create mode 100644 EML/lib/frontend/parser.mli create mode 100644 EML/lib/middleend/anf.mli create mode 100644 EML/lib/middleend/anf_pp.mli create mode 100644 EML/lib/middleend/cc.mli create mode 100644 EML/lib/middleend/ll.mli create mode 100644 EML/lib/runtime/primitives.mli create mode 100644 EML/lib/utils/helpers.mli create mode 100644 EML/lib/utils/pretty_printer.mli create mode 100644 EML/tests/anf_tests.mli create mode 100644 EML/tests/cc_tests.mli create mode 100644 EML/tests/inferencer_tests.mli create mode 100644 EML/tests/ll_tests.mli create mode 100644 EML/tests/llvm_tests.mli create mode 100644 EML/tests/parser_tests.mli create mode 100644 EML/tests/riscv_tests.mli diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 33a5f8df..4dadc017 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -132,35 +132,34 @@ let compiler opts : (unit, unit) Result.t = (* ------------------------------------------------------------------------- *) let parse_args () : (opts, unit) Result.t = - let input_file = ref default_opts.input_file in - let output_file = ref default_opts.output_file in - let enable_gc = ref default_opts.enable_gc in - let backend = ref Ricsv in - let infer_only = ref default_opts.infer_only in - let positional_seen = ref false in - let open Arg in - let spec = - [ ( "-backend" - , Symbol - ([ "ricsv"; "llvm" ], fun s -> backend := if s = "llvm" then Llvm else Ricsv) - , " Code generation backend (default: ricsv)" ) - ; "-fromfile", String (fun s -> input_file := Some s), " Read source from file" - ; "-o", String (fun s -> output_file := Some s), " Write output to file" - ; "-gc", Set enable_gc, " Enable GC runtime support" - ; "-infer", Set infer_only, " Run only type inference and print inferred types" - ] + let parse_backend = function + | "llvm" -> Ok Llvm + | "ricsv" -> Ok Ricsv + | _ -> Error () in - parse spec (fun _ -> positional_seen := true) "Compiler for custom language"; - if !positional_seen - then Error () - else - Ok - { input_file = !input_file - ; output_file = !output_file - ; enable_gc = !enable_gc - ; backend = !backend - ; infer_only = !infer_only - } + let rec loop current_opts = function + | [] -> Ok current_opts + | "-gc" :: rest -> + loop { current_opts with enable_gc = true } rest + | "-infer" :: rest -> + loop { current_opts with infer_only = true } rest + | "-fromfile" :: path :: rest -> + loop { current_opts with input_file = Some path } rest + | "-o" :: path :: rest -> + loop { current_opts with output_file = Some path } rest + | "-backend" :: backend_name :: rest -> + (match parse_backend backend_name with + | Ok backend -> loop { current_opts with backend } rest + | Error () -> Error ()) + | argument :: _ when String.length argument > 0 && Char.equal argument.[0] '-' -> + Error () + | _positional_argument :: _ -> + Error () + in + let argv = Array.to_list Sys.argv in + match argv with + | [] -> Ok default_opts + | _program_name :: arguments -> loop default_opts arguments ;; let () = diff --git a/EML/bin/dune b/EML/bin/dune index 1d0e46b7..c2e348c7 100644 --- a/EML/bin/dune +++ b/EML/bin/dune @@ -1,3 +1,4 @@ (executable (name EML) + (modes byte exe) (libraries stdio base EML.lib)) diff --git a/EML/lib/backend/llvm_ir/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml index b15a428b..399c70eb 100644 --- a/EML/lib/backend/llvm_ir/analysis.ml +++ b/EML/lib/backend/llvm_ir/analysis.ml @@ -91,20 +91,28 @@ let analyze (program : anf_program) = | AnfEval _ -> None) program in - let counts = ref (Base.Map.empty (module Base.String)) in let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in - let asm_name name = - let base = mangle_reserved name in - let n = Base.Map.find !counts name |> Option.value ~default:0 in - counts := Base.Map.set !counts ~key:name ~data:(n + 1); - if n = 0 then base else base ^ "_" ^ Int.to_string n - in - let functions = - List.map - (fun (func_name, _arity, params, body, slots_count) -> - { func_name; asm_name = asm_name func_name; params; body; slots_count }) + let functions, _ = + List.fold_left + (fun (reversed_functions, counts) (func_name, _arity, params, body, slots_count) -> + let base_asm_name = mangle_reserved func_name in + let duplicate_index = + Base.Map.find counts func_name |> Option.value ~default:0 + in + let updated_counts = + Base.Map.set counts ~key:func_name ~data:(duplicate_index + 1) + in + let asm_name = + if duplicate_index = 0 + then base_asm_name + else base_asm_name ^ "_" ^ Int.to_string duplicate_index + in + ( { func_name; asm_name; params; body; slots_count } :: reversed_functions + , updated_counts )) + ([], Base.Map.empty (module Base.String)) raw in + let functions = List.rev functions in let has_main = List.exists (fun func_layout -> String.equal func_layout.func_name "main") functions in diff --git a/EML/lib/backend/llvm_ir/analysis.mli b/EML/lib/backend/llvm_ir/analysis.mli new file mode 100644 index 00000000..a02061ef --- /dev/null +++ b/EML/lib/backend/llvm_ir/analysis.mli @@ -0,0 +1,25 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; slots_count : int + } + +type analysis_result = + { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +val arity_map_of_program + : anf_program + -> (string, int, Base.String.comparator_witness) Base.Map.t + +val analyze : anf_program -> analysis_result diff --git a/EML/lib/backend/llvm_ir/architecture.ml b/EML/lib/backend/llvm_ir/architecture.ml index aab42361..63224627 100644 --- a/EML/lib/backend/llvm_ir/architecture.ml +++ b/EML/lib/backend/llvm_ir/architecture.ml @@ -39,21 +39,21 @@ module Llvm_backend = struct | Not (operand, name) -> Some (build_not operand name builder) | Load (load_ty, ptr_value, name) -> Some (build_load load_ty ptr_value name builder) | Store (value, ptr_value) -> - ignore (build_store value ptr_value builder); + let (_ : Llvm.llvalue) = build_store value ptr_value builder in None | Alloca (alloca_ty, name) -> Some (build_alloca alloca_ty name builder) | Call (ft, callee, args, name) -> Some (build_call ft callee args name builder) | Ret None -> - ignore (build_ret_void builder); + let (_ : Llvm.llvalue) = build_ret_void builder in None | Ret (Some ret_value) -> - ignore (build_ret ret_value builder); + let (_ : Llvm.llvalue) = build_ret ret_value builder in None | Br block -> - ignore (build_br block builder); + let (_ : Llvm.llvalue) = build_br block builder in None | CondBr (cond, then_bb, else_bb) -> - ignore (build_cond_br cond then_bb else_bb builder); + let (_ : Llvm.llvalue) = build_cond_br cond then_bb else_bb builder in None | Phi (incoming, name) -> Some (build_phi incoming name builder) | Bitcast (operand, dest_ty, name) -> @@ -90,12 +90,28 @@ module Llvm_backend = struct emit builder (IntToPtr (operand, dest_ty, name)) ;; - let store builder value ptr_value = ignore (emit builder (Store (value, ptr_value))) - let ret_void builder = ignore (emit builder (Ret None)) - let ret builder ret_value = ignore (emit builder (Ret (Some ret_value))) - let br builder block = ignore (emit builder (Br block)) + let store builder value ptr_value = + let (_ : Llvm.llvalue option) = emit builder (Store (value, ptr_value)) in + () + ;; + + let ret_void builder = + let (_ : Llvm.llvalue option) = emit builder (Ret None) in + () + ;; + + let ret builder ret_value = + let (_ : Llvm.llvalue option) = emit builder (Ret (Some ret_value)) in + () + ;; + + let br builder block = + let (_ : Llvm.llvalue option) = emit builder (Br block) in + () + ;; let cond_br builder cond then_bb else_bb = - ignore (emit builder (CondBr (cond, then_bb, else_bb))) + let (_ : Llvm.llvalue option) = emit builder (CondBr (cond, then_bb, else_bb)) in + () ;; end diff --git a/EML/lib/backend/llvm_ir/architecture.mli b/EML/lib/backend/llvm_ir/architecture.mli new file mode 100644 index 00000000..8c61568a --- /dev/null +++ b/EML/lib/backend/llvm_ir/architecture.mli @@ -0,0 +1,52 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module Llvm_backend : sig + type instr = + | Add of llvalue * llvalue * string + | Sub of llvalue * llvalue * string + | Mul of llvalue * llvalue * string + | Sdiv of llvalue * llvalue * string + | Neg of llvalue * string + | Icmp of Icmp.t * llvalue * llvalue * string + | And of llvalue * llvalue * string + | Or of llvalue * llvalue * string + | Not of llvalue * string + | Load of lltype * llvalue * string + | Store of llvalue * llvalue + | Alloca of lltype * string + | Call of lltype * llvalue * llvalue array * string + | Ret of llvalue option + | Br of llbasicblock + | CondBr of llvalue * llbasicblock * llbasicblock + | Phi of (llvalue * llbasicblock) list * string + | Bitcast of llvalue * lltype * string + | PtrToInt of llvalue * lltype * string + | IntToPtr of llvalue * lltype * string + + val emit : llbuilder -> instr -> llvalue option + val add : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val sub : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val mul : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val sdiv : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val neg : llbuilder -> llvalue -> string -> llvalue option + val icmp : llbuilder -> Icmp.t -> llvalue -> llvalue -> string -> llvalue option + val and_ : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val or_ : llbuilder -> llvalue -> llvalue -> string -> llvalue option + val not : llbuilder -> llvalue -> string -> llvalue option + val load : llbuilder -> lltype -> llvalue -> string -> llvalue option + val alloca : llbuilder -> lltype -> string -> llvalue option + val call : llbuilder -> lltype -> llvalue -> llvalue array -> string -> llvalue option + val phi : llbuilder -> (llvalue * llbasicblock) list -> string -> llvalue option + val bitcast : llbuilder -> llvalue -> lltype -> string -> llvalue option + val ptrtoint : llbuilder -> llvalue -> lltype -> string -> llvalue option + val inttoptr : llbuilder -> llvalue -> lltype -> string -> llvalue option + val store : llbuilder -> llvalue -> llvalue -> unit + val ret_void : llbuilder -> unit + val ret : llbuilder -> llvalue -> unit + val br : llbuilder -> llbasicblock -> unit + val cond_br : llbuilder -> llvalue -> llbasicblock -> llbasicblock -> unit +end diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml index 15c86760..7c09c598 100644 --- a/EML/lib/backend/llvm_ir/generator.ml +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -15,8 +15,6 @@ let tag_int n = 1 + (n lsl 1) let tag_bool b = if b then 4 else 2 let tag_char c = tag_int (Char.code c) let context = global_context () -let my_module = ref (create_module context "EML") -let current_module () = !my_module let builder = builder context let int_t = i64_type context let i32_t = i32_type context @@ -44,52 +42,16 @@ let predefined_funcs = predefined_runtime_funcs ;; -let variable_value_table : (string, llvalue) Hashtbl.t = - Hashtbl.create (List.length predefined_funcs * 4) -;; - -let variable_type_table : (string, lltype) Hashtbl.t = - Hashtbl.create (List.length predefined_funcs * 4) -;; - -let reset_for_new_program () = - my_module := create_module context "EML"; - Hashtbl.clear variable_value_table; - Hashtbl.clear variable_type_table -;; - -let predefined_init () = - let module_ = current_module () in - List.iter - (fun (str, t) -> - let func = declare_function str t module_ in - Hashtbl.add variable_type_table str t; - Hashtbl.add variable_value_table str func) +let predefined_init current_module = + List.fold_left + (fun (value_env, type_env) (function_name, function_type) -> + let function_value = declare_function function_name function_type current_module in + ( Base.Map.set value_env ~key:function_name ~data:function_value + , Base.Map.set type_env ~key:function_name ~data:function_type )) + (Base.Map.empty (module Base.String), Base.Map.empty (module Base.String)) predefined_funcs ;; -let snapshot_envs () = - let value_env = - Hashtbl.fold - (fun key value acc -> Base.Map.set acc ~key ~data:value) - variable_value_table - (Base.Map.empty (module Base.String)) - in - let type_env = - Hashtbl.fold - (fun key value acc -> Base.Map.set acc ~key ~data:value) - variable_type_table - (Base.Map.empty (module Base.String)) - in - value_env, type_env -;; - -let emit_value builder instr = - match emit builder instr with - | Some v -> Ok v - | None -> Error "emit_value: expected value" -;; - let emit_void builder instr : (unit, string) Result.t = match emit builder instr with | _ -> Ok () @@ -101,9 +63,8 @@ let emit_void_st builder instr = | Error e -> fail e ;; -let with_optional_value opt = - match opt with - | Some v -> return v +let with_optional_value = function + | Some value -> return value | None -> fail "Llvm_backend: expected value" ;; @@ -112,7 +73,8 @@ let lookup_func name = match value_opt with | Some func -> return func | None -> - (match lookup_function name (current_module ()) with + let* state = get in + (match lookup_function name state.current_module with | Some func -> return func | None -> fail ("Couldn't find value for key: " ^ name)) ;; @@ -667,7 +629,7 @@ let declare_function (func_layout : function_layout) state = let llvm_name = if func_layout.func_name = "main" then "eml_main" else func_layout.asm_name in - let func = declare_function llvm_name func_type (current_module ()) in + let func = declare_function llvm_name func_type state.current_module in let key = if func_layout.func_name = "main" then "main" else func_layout.asm_name in { state with value_env = Base.Map.set state.value_env ~key ~data:func @@ -724,18 +686,18 @@ let gen_function else fail "gen_function: param index out of bounds" in set_value_name name param_value; - (match enable_gc with - | true -> - let* gc_allocas = get_gc_allocas in - let* allocas_map = - match gc_allocas with - | Some m -> return m - | None -> fail "gen_function: enable_gc but gc_allocas not set" - in - let* alloca_ptr = with_optional_value (alloca builder ptr_t name) in - store builder param_value alloca_ptr; - set_gc_allocas (Some (Base.Map.set allocas_map ~key:name ~data:alloca_ptr)) - | false -> set_value name param_value) + if enable_gc + then + let* gc_allocas = get_gc_allocas in + let* allocas_map = + match gc_allocas with + | Some map -> return map + | None -> fail "gen_function: enable_gc but gc_allocas not set" + in + let* alloca_ptr = with_optional_value (alloca builder ptr_t name) in + store builder param_value alloca_ptr; + set_gc_allocas (Some (Base.Map.set allocas_map ~key:name ~data:alloca_ptr)) + else set_value name param_value | ImmediateConst _ -> return ()) in let* body_value = gen_anf func_layout.body in @@ -771,13 +733,13 @@ let gen_function ;; let gen_program ~output_file ~enable_gc (program : anf_program) = - reset_for_new_program (); - predefined_init (); - let value_env, type_env = snapshot_envs () in + let llvm_module = create_module context "EML" in + let value_env, type_env = predefined_init llvm_module in let { functions; resolve; _ } = analyze program in let initial_state : Generator_state.state = { value_env ; type_env + ; current_module = llvm_module ; gc_allocas = None ; gc_entry_block = None ; naming_state = Default_naming.init @@ -811,6 +773,6 @@ let gen_program ~output_file ~enable_gc (program : anf_program) = with | Error err -> Error err | Ok _ -> - print_module output_file (current_module ()); + print_module output_file llvm_module; Ok () ;; diff --git a/EML/lib/backend/llvm_ir/generator.mli b/EML/lib/backend/llvm_ir/generator.mli new file mode 100644 index 00000000..f19341f6 --- /dev/null +++ b/EML/lib/backend/llvm_ir/generator.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : output_file:string + -> enable_gc:bool + -> Middleend.Anf.anf_program + -> (unit, string) Result.t diff --git a/EML/lib/backend/llvm_ir/generator_state.ml b/EML/lib/backend/llvm_ir/generator_state.ml index faa842c4..74fbc427 100644 --- a/EML/lib/backend/llvm_ir/generator_state.ml +++ b/EML/lib/backend/llvm_ir/generator_state.ml @@ -28,6 +28,7 @@ module Make (N : NAMING) = struct type state = { value_env : (string, llvalue, Base.String.comparator_witness) Base.Map.t ; type_env : (string, lltype, Base.String.comparator_witness) Base.Map.t + ; current_module : llmodule ; gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option ; gc_entry_block : llbasicblock option ; naming_state : N.t @@ -119,7 +120,7 @@ module Make (N : NAMING) = struct return triple ;; - let run m init = m init + let run m = m end include Make (Default_naming) diff --git a/EML/lib/backend/llvm_ir/generator_state.mli b/EML/lib/backend/llvm_ir/generator_state.mli new file mode 100644 index 00000000..3b348d8f --- /dev/null +++ b/EML/lib/backend/llvm_ir/generator_state.mli @@ -0,0 +1,53 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Llvm + +module type NAMING = sig + type t + + val init : t + val fresh_blocks : t -> (string * string * string) * t +end + +module Default_naming : NAMING + +module Make (N : NAMING) : sig + type state = + { value_env : (string, llvalue, Base.String.comparator_witness) Base.Map.t + ; type_env : (string, lltype, Base.String.comparator_witness) Base.Map.t + ; current_module : llmodule + ; gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option + ; gc_entry_block : llbasicblock option + ; naming_state : N.t + ; resolve : (int -> string -> (string * int) option) option + ; current_func_index : int + } + + type 'a t = state -> ('a * state, string) Result.t + + val return : 'a -> 'a t + val bind : 'a t -> ('a -> 'b t) -> 'b t + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + val get : state t + val put : state -> unit t + val modify : (state -> state) -> unit t + val fail : string -> 'a t + val map_find_opt : (string, 'a, 'cmp) Base.Map.t -> string -> 'a option + val find_value_opt : string -> llvalue option t + val find_type_opt : string -> lltype option t + val resolved_find_value_opt : string -> llvalue option t + val resolved_find_type_opt : string -> lltype option t + val set_value : string -> llvalue -> unit t + val set_type : string -> lltype -> unit t + val remove_value : string -> unit t + val get_gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option t + val set_gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option -> unit t + val get_gc_entry_block : llbasicblock option t + val set_gc_entry_block : llbasicblock option -> unit t + val fresh_blocks : (string * string * string) t + val run : 'a t -> state -> ('a * state, string) Result.t +end + +include module type of Make (Default_naming) diff --git a/EML/lib/backend/llvm_ir/runner.ml b/EML/lib/backend/llvm_ir/runner.ml index 4ad9e879..a8d0d532 100644 --- a/EML/lib/backend/llvm_ir/runner.ml +++ b/EML/lib/backend/llvm_ir/runner.ml @@ -6,14 +6,16 @@ open Middleend.Anf let gen_program ~enable_gc ppf (program : anf_program) : (unit, string) Result.t = let temp_ll_path = Filename.temp_file "eml_llvm" ".ll" in + let remove_temp_file_if_exists () = + if Sys.file_exists temp_ll_path then Sys.remove temp_ll_path + in match Generator.gen_program ~output_file:temp_ll_path ~enable_gc program with | Error err -> - (try Sys.remove temp_ll_path with - | _ -> ()); + remove_temp_file_if_exists (); Error err | Ok () -> let content = In_channel.with_open_text temp_ll_path In_channel.input_all in - Sys.remove temp_ll_path; + remove_temp_file_if_exists (); Format.fprintf ppf "%s" content; Ok () ;; diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index c3b68500..caa169f5 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -23,8 +23,6 @@ type analysis_result = ; resolve : int -> string -> (string * int) option } -let arg_regs_count = 8 - let sum_by f xs = List.fold_left (fun acc x -> acc + f x) 0 xs let max_by f xs = List.fold_left (fun acc x -> max acc (f x)) 0 xs @@ -172,34 +170,46 @@ let analyze (program : anf_program) = | AnfEval _ -> None) program in - let generated_name_counts = ref (Base.Map.empty (module Base.String)) in let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in - let build_asm_name name = - let base = mangle_reserved name in - let duplicate_index = Base.Map.find !generated_name_counts name |> Option.value ~default:0 in - generated_name_counts := - Base.Map.set !generated_name_counts ~key:name ~data:(duplicate_index + 1); - if duplicate_index = 0 then base else base ^ "_" ^ Int.to_string duplicate_index - in - let functions = - List.map - (fun ( func_name - , _arity - , params - , body - , slots_count - , max_stack_args - , max_create_tuple_array_bytes ) -> - { func_name - ; asm_name = build_asm_name func_name - ; params - ; body - ; slots_count - ; max_stack_args - ; max_create_tuple_array_bytes - }) + let functions, _ = + List.fold_left + (fun (reversed_functions, generated_name_counts) + ( func_name + , _arity + , params + , body + , slots_count + , max_stack_args + , max_create_tuple_array_bytes ) -> + let base_asm_name = mangle_reserved func_name in + let duplicate_index = + Base.Map.find generated_name_counts func_name |> Option.value ~default:0 + in + let updated_generated_name_counts = + Base.Map.set + generated_name_counts + ~key:func_name + ~data:(duplicate_index + 1) + in + let asm_name = + if duplicate_index = 0 + then base_asm_name + else base_asm_name ^ "_" ^ Int.to_string duplicate_index + in + ( { func_name + ; asm_name + ; params + ; body + ; slots_count + ; max_stack_args + ; max_create_tuple_array_bytes + } + :: reversed_functions + , updated_generated_name_counts )) + ([], Base.Map.empty (module Base.String)) analyzed_functions_raw in + let functions = List.rev functions in let has_main = List.exists (fun fn -> String.equal fn.func_name "main") functions in let functions = if has_main diff --git a/EML/lib/backend/ricsv/analysis.mli b/EML/lib/backend/ricsv/analysis.mli new file mode 100644 index 00000000..d3c05552 --- /dev/null +++ b/EML/lib/backend/ricsv/analysis.mli @@ -0,0 +1,27 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf + +type function_layout = + { func_name : string + ; asm_name : string + ; params : immediate list + ; body : anf_expr + ; slots_count : int + ; max_stack_args : int + ; max_create_tuple_array_bytes : int + } + +type analysis_result = + { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; functions : function_layout list + ; resolve : int -> string -> (string * int) option + } + +val arity_map_of_program + : anf_program + -> (string, int, Base.String.comparator_witness) Base.Map.t + +val analyze : anf_program -> analysis_result diff --git a/EML/lib/backend/ricsv/architecture.mli b/EML/lib/backend/ricsv/architecture.mli new file mode 100644 index 00000000..b3eef301 --- /dev/null +++ b/EML/lib/backend/ricsv/architecture.mli @@ -0,0 +1,101 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +module Riscv_backend : sig + type reg = + | Zero + | RA + | SP + | A of int + | T of int + | S of int + [@@deriving eq] + + type offset = reg * int + + type instr = + | Addi of reg * reg * int + | Ld of reg * offset + | Sd of reg * offset + | Mv of reg * reg + | Li of reg * int + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Call of string + | Ret + | Beq of reg * reg * string + | J of string + | Label of string + | La of reg * string + | Slt of reg * reg * reg + | Seqz of reg * reg + | Snez of reg * reg + | Xori of reg * reg * int + | Xor of reg * reg * reg + | Mul of reg * reg * reg + | Div of reg * reg * reg + | Srli of reg * reg * int + + val pp_reg : Format.formatter -> reg -> unit + val pp_offset : Format.formatter -> offset -> unit + val pp_instr : Format.formatter -> instr -> unit + val tag_int : int -> int + val fp : reg + val sp : reg + val ra : reg + val zero : reg + val a0 : reg + val a1 : reg + val a2 : reg + val a3 : reg + val a4 : reg + val a5 : reg + val a6 : reg + val a7 : reg + val t0 : reg + val t1 : reg + val result_reg : reg + val addi : reg -> reg -> int -> instr list + val ld : reg -> offset -> instr list + val sd : reg -> offset -> instr list + val mv : reg -> reg -> instr list + val li : reg -> int -> instr list + val add : reg -> reg -> reg -> instr list + val sub : reg -> reg -> reg -> instr list + val call : string -> instr list + val ret : unit -> instr list + val beq : reg -> reg -> string -> instr list + val j : string -> instr list + val label : string -> instr list + val la : reg -> string -> instr list + val slt : reg -> reg -> reg -> instr list + val seqz : reg -> reg -> instr list + val snez : reg -> reg -> instr list + val xori : reg -> reg -> int -> instr list + val xor : reg -> reg -> reg -> instr list + val mul : reg -> reg -> reg -> instr list + val div : reg -> reg -> reg -> instr list + val srli : reg -> reg -> int -> instr list + val add_tag_items : reg -> int -> instr list + val arg_regs : reg list + val candidate_regs_for_spill : reg list + val arg_regs_count : int + val word_size : int + val stack_align : int + val frame_header_size : int + val saved_fp_offset : int + val saved_ra_offset : int + val max_addi_imm : int + val sub_sp : int -> instr list + val addi_or_li_add : reg -> reg -> int -> instr list + val sd_at_sp_offset : reg -> int -> instr list + + type location = + | Loc_reg of reg + | Loc_mem of offset + + val prologue : enable_gc:bool -> name:string -> stack_size:int -> instr list + val epilogue : enable_gc:bool -> is_main:bool -> instr list + val format_item : Format.formatter -> instr -> unit +end diff --git a/EML/lib/backend/ricsv/auxillary.mli b/EML/lib/backend/ricsv/auxillary.mli new file mode 100644 index 00000000..3b85b3a5 --- /dev/null +++ b/EML/lib/backend/ricsv/auxillary.mli @@ -0,0 +1,42 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Middleend.Anf +open Architecture +open Riscv_backend + +val is_caller_saved : reg -> bool +val bin_op : reg -> string -> reg -> reg -> (instr list, string) result +val bin_oper_to_string : Frontend.Ast.bin_oper -> string + +val vars_in_caller_saved_regs + : (string, location, Base.String.comparator_witness) Base.Map.t + -> (string * reg) list + +val indices_of_args_to_spill : Generator_state.state -> immediate list -> int list + +type call_style = + | Nullary of string + | CurryChain of + { function_name : string + ; arity : int + ; initial_arguments : immediate list + ; remaining_arguments : immediate list + } + | Direct of + { function_name : string + ; arguments : immediate list + } + | ViaApplyNargs of + { function_name : string + ; argument_count : int + ; arguments : immediate list + } + +val classify_call + : argument_count:int + -> callee_arity_opt:int option + -> function_name:string + -> arguments:immediate list + -> call_style diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 705fc4ff..534b96a5 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -66,8 +66,7 @@ let spill_caller_saved_vars_to_frame = let evacuate_reg destination_register = let is_register_used environment register = - Base.Map.exists environment ~f:(fun location -> - match location with + Base.Map.exists environment ~f:(function | Loc_reg mapped_register -> equal_reg register mapped_register | Loc_mem _ -> false) in diff --git a/EML/lib/backend/ricsv/generator.mli b/EML/lib/backend/ricsv/generator.mli new file mode 100644 index 00000000..5d1bfb22 --- /dev/null +++ b/EML/lib/backend/ricsv/generator.mli @@ -0,0 +1,9 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val gen_program + : enable_gc:bool + -> Format.formatter + -> Analysis.analysis_result + -> (unit, string) Result.t diff --git a/EML/lib/backend/ricsv/generator_state.ml b/EML/lib/backend/ricsv/generator_state.ml index 93d65b6a..9197def7 100644 --- a/EML/lib/backend/ricsv/generator_state.ml +++ b/EML/lib/backend/ricsv/generator_state.ml @@ -78,15 +78,17 @@ module Make (N : NAMING) = struct return pair ;; - let run m init = m init + let run m = m let append (items : instr list) = let modify_instr_buffer f = modify (fun st -> { st with instr_buffer = f st.instr_buffer }) in - if items = [] - then return () - else modify_instr_buffer (fun l -> List.fold_left (fun acc it -> it :: acc) l items) + match items with + | [] -> return () + | _ -> + modify_instr_buffer (fun buffer -> + List.fold_left (fun acc instruction -> instruction :: acc) buffer items) ;; end diff --git a/EML/lib/backend/ricsv/generator_state.mli b/EML/lib/backend/ricsv/generator_state.mli new file mode 100644 index 00000000..93d5d122 --- /dev/null +++ b/EML/lib/backend/ricsv/generator_state.mli @@ -0,0 +1,49 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Architecture +open Riscv_backend + +module type NAMING = sig + type t + + val init : t + val fresh_partial : t -> string * t + val fresh_branch : t -> (string * string) * t +end + +module Default_naming : NAMING + +module Make (N : NAMING) : sig + type env = (string, location, Base.String.comparator_witness) Base.Map.t + + type state = + { frame_offset : int + ; naming_state : N.t + ; arity_map : (string, int, Base.String.comparator_witness) Base.Map.t + ; env : env + ; instr_buffer : instr list + ; current_func_index : int + ; symbol_resolve : int -> string -> (string * int) option + } + + type 'a t = state -> ('a * state, string) Result.t + + val return : 'a -> 'a t + val fail : string -> 'a t + val bind : 'a t -> ('a -> 'b t) -> 'b t + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + val get : state t + val put : state -> unit t + val modify : (state -> state) -> unit t + val modify_env : (env -> env) -> unit t + val get_env : env t + val set_env : env -> unit t + val fresh_partial : string t + val fresh_branch : (string * string) t + val run : 'a t -> state -> ('a * state, string) Result.t + val append : instr list -> unit t +end + +include module type of Make (Default_naming) diff --git a/EML/lib/frontend/inferencer.mli b/EML/lib/frontend/inferencer.mli new file mode 100644 index 00000000..fe3d0a1f --- /dev/null +++ b/EML/lib/frontend/inferencer.mli @@ -0,0 +1,64 @@ +(** Copyright 2024-2025, Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +type error = + | OccursCheck of string * ty + | NoVariable of string + | UnificationFailed of ty * ty + | SeveralBounds of string + | LHS of string + | RHS of string + | UnexpectedFunction of ty + +val pp_error : Format.formatter -> error -> unit + +module ResultMonad : sig + type 'a t + + val return : 'a -> 'a t + val fail : error -> 'a t + + include Base.Monad.Infix with type 'a t := 'a t + + module Syntax : sig + val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t + end + + val fresh : int t + val current_level : int t + val enter_level : unit t + val leave_level : unit t + val set_var_level : string -> int -> unit t + val get_var_level : string -> int option t + val run : 'a t -> ('a, error) Result.t +end + +module Substitution : sig + type t + + val empty : t +end + +module VarSet : Stdlib.Set.S with type elt = string + +module Scheme : sig + type t = Scheme of VarSet.t * ty +end + +module TypeEnv : sig + type t = (ident, Scheme.t, Base.String.comparator_witness) Base.Map.t + + val extend : t -> ident -> Scheme.t -> t + val free_vars : t -> VarSet.t + val apply : Substitution.t -> t -> t + val find : t -> ident -> Scheme.t option + val initial_env : t + val env_with_gc : t +end + +val infer_structure : TypeEnv.t -> program -> (Substitution.t * TypeEnv.t) ResultMonad.t +val infer_simple_expression : expr -> (ty, error) Result.t +val run_infer : program -> (TypeEnv.t, error) Result.t diff --git a/EML/lib/frontend/parser.mli b/EML/lib/frontend/parser.mli new file mode 100644 index 00000000..abf45da6 --- /dev/null +++ b/EML/lib/frontend/parser.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val parse : string -> (Ast.program, string) Result.t diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index b6fee33e..2366c65c 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -120,8 +120,7 @@ let build_tuple_lets tuple_var indices_pats body = ;; let build_tuple_top_level_bindings tuple_var indices_pats = - let rec aux tuple_var indices_pats = - match indices_pats with + let rec aux tuple_var = function | [] -> return [] | (i, pat) :: rest -> let* bind_id = get_var pat in @@ -186,9 +185,9 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | [] -> fail "application with no arguments")) | ExpTuple (exp1, exp2, exp_list) -> let all_exprs = exp1 :: exp2 :: exp_list in - anf_list all_exprs (fun imm_list -> - match imm_list with - | imm1 :: imm2 :: rest -> bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k + anf_list all_exprs (function + | imm1 :: imm2 :: rest -> + bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k | _ -> fail "Invalid tuple") | ExpLambda (pat, pat_list, body) -> let params = pat :: pat_list in diff --git a/EML/lib/middleend/anf.mli b/EML/lib/middleend/anf.mli new file mode 100644 index 00000000..0b9a614b --- /dev/null +++ b/EML/lib/middleend/anf.mli @@ -0,0 +1,45 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +type immediate = + | ImmediateConst of const + | ImmediateVar of ident +[@@deriving show { with_path = false }] + +type complex_expr = + | ComplexImmediate of immediate + | ComplexUnit + | ComplexBinOper of bin_oper * immediate * immediate + | ComplexUnarOper of unar_oper * immediate + | ComplexTuple of immediate * immediate * immediate list + | ComplexField of immediate * int + | ComplexList of immediate list + | ComplexOption of immediate option + | ComplexApp of immediate * immediate * immediate list + | ComplexLambda of pattern list * anf_expr + | ComplexBranch of immediate * anf_expr * anf_expr +[@@deriving show { with_path = false }] + +and anf_expr = + | AnfLet of is_rec * ident * complex_expr * anf_expr + | AnfExpr of complex_expr +[@@deriving show { with_path = false }] + +type arity = int + +val pp_arity : Format.formatter -> arity -> unit + +type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] +type anf_fun_bind = ident * arity * anf_expr [@@deriving show { with_path = false }] + +type anf_structure = + | AnfEval of anf_expr + | AnfValue of is_rec * anf_fun_bind * anf_fun_bind list +[@@deriving show { with_path = false }] + +type anf_program = anf_structure list [@@deriving show { with_path = false }] + +val anf_program : program -> (anf_program, string) Result.t diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml index c3635e1c..e0dfe79e 100644 --- a/EML/lib/middleend/anf_pp.ml +++ b/EML/lib/middleend/anf_pp.ml @@ -1,3 +1,7 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + (* Pretty-printer for ANF expressions *) open Stdlib.Format open Frontend @@ -12,7 +16,7 @@ let rec pp_immediate fmt = function (match c with | ConstInt n -> fprintf fmt "%d" n | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s + | ConstString s -> fprintf fmt "%S" s | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) | ImmediateVar x -> fprintf fmt "%s" x @@ -51,14 +55,13 @@ and pp_complex_expr fmt = function (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt " ") pp_immediate) all_args | ComplexLambda (patterns, body) -> - let pp_pattern fmt pat = - match pat with + let rec pp_pattern fmt = function | PatVariable x -> fprintf fmt "%s" x | PatConst c -> (match c with | ConstInt n -> fprintf fmt "%d" n | ConstBool b -> fprintf fmt "%b" b - | ConstString s -> fprintf fmt "\"%s\"" s + | ConstString s -> fprintf fmt "%S" s | ConstChar ch -> fprintf fmt "'%s'" (Char.escaped ch)) | PatTuple (p1, p2, rest) -> let all_pats = p1 :: p2 :: rest in @@ -111,7 +114,6 @@ and pp_anf_expr fmt = function fprintf fmt "let %s%s = %a in@ %a" rec_flag name pp_complex_expr v pp_anf_expr body | AnfExpr e -> pp_complex_expr fmt e -and pp_anf_bind fmt (name, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr and pp_anf_fun_bind fmt (name, _arity, expr) = fprintf fmt "%s = %a" name pp_anf_expr expr and pp_anf_structure fmt = function diff --git a/EML/lib/middleend/anf_pp.mli b/EML/lib/middleend/anf_pp.mli new file mode 100644 index 00000000..c45191a6 --- /dev/null +++ b/EML/lib/middleend/anf_pp.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val pp_anf_program : Format.formatter -> Anf.anf_program -> unit +val anf_to_string : Anf.anf_program -> string diff --git a/EML/lib/middleend/cc.ml b/EML/lib/middleend/cc.ml index 40e9ffe2..0d0489f3 100644 --- a/EML/lib/middleend/cc.ml +++ b/EML/lib/middleend/cc.ml @@ -44,7 +44,7 @@ let rec collect_free_vars = function in VarSet.union free_vars_in_rhs (VarSet.diff (collect_free_vars body) bound_vars) | ExpLambda (pat, pats, exp) -> - let bound_vars = union_map_list (fun p -> vars_in_pattern p) (pat :: pats) in + let bound_vars = union_map_list vars_in_pattern (pat :: pats) in VarSet.diff (collect_free_vars exp) bound_vars | ExpApply (e1, e2) -> VarSet.union (collect_free_vars e1) (collect_free_vars e2) | ExpFunction ((pat, exp), cases) -> @@ -92,13 +92,13 @@ let pp_error ppf = function type 'a t = context -> ('a, error) Result.t -let return x = fun _ -> Ok x -let fail e = fun _ -> Error e +let return (value : 'a) : 'a t = fun _ -> Ok value +let fail (error : error) : 'a t = fun _ -> Error error -let bind m f = +let bind (computation : 'a t) (next : 'a -> 'b t) : 'b t = fun ctx -> - match m ctx with - | Ok a -> f a ctx + match computation ctx with + | Ok a -> next a ctx | Error e -> Error e ;; diff --git a/EML/lib/middleend/cc.mli b/EML/lib/middleend/cc.mli new file mode 100644 index 00000000..2a63b103 --- /dev/null +++ b/EML/lib/middleend/cc.mli @@ -0,0 +1,8 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type error = LambdaWithoutParameters + +val pp_error : Format.formatter -> error -> unit +val closure_conversion_result : Frontend.Ast.program -> (Frontend.Ast.program, error) Result.t diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml index 602b9596..55d0bd04 100644 --- a/EML/lib/middleend/ll.ml +++ b/EML/lib/middleend/ll.ml @@ -37,9 +37,9 @@ let names_in_pattern p = | PatConstruct (_, None) -> [] | PatConstruct (_, Some q) -> collect q | PatType (q, _) -> collect q - | PatTuple (p1, p2, rest) -> List.concat (List.map collect (p1 :: p2 :: rest)) + | PatTuple (p1, p2, rest) -> List.concat_map collect (p1 :: p2 :: rest) | PatUnit -> [] - | PatList ps -> List.concat (List.map collect ps) + | PatList ps -> List.concat_map collect ps | PatOption p_opt -> (match p_opt with | None -> [] @@ -57,10 +57,10 @@ let rename_pattern env p = | Not_found -> s in PatVariable s' - | PatConstruct (id, p_opt) -> PatConstruct (id, Option.map (fun x -> subst x) p_opt) + | PatConstruct (id, p_opt) -> PatConstruct (id, Option.map subst p_opt) | PatType (p, t) -> PatType (subst p, t) | PatList ps -> PatList (List.map subst ps) - | PatOption p_opt -> PatOption (Option.map (fun x -> subst x) p_opt) + | PatOption p_opt -> PatOption (Option.map subst p_opt) | other -> other in subst p @@ -208,7 +208,7 @@ module Make (N : NAMING) = struct let* extra_structures, rest_binds = lift_binds (inner ctx) more in let all_defs = names_in_pattern pat - @ List.concat (List.map (fun (p, _) -> names_in_pattern p) more) + @ List.concat_map (fun (p, _) -> names_in_pattern p) more in let body_ctx = { (inner ctx) with renames = without_bindings ctx.renames all_defs } @@ -249,20 +249,20 @@ module Make (N : NAMING) = struct | [] -> fail RecLetEmptyBinding in return - { structures = + { res_body with + structures = inner_structures @ [ SValue (Rec, first_bind, rest_binds) ] @ res_body.structures - ; expr = res_body.expr } | ExpLambda (pat, pats, body) when ctx.at_toplevel -> let* res = lift_expr (inner ctx) body in - return { structures = res.structures; expr = ExpLambda (pat, pats, res.expr) } + return { res with expr = ExpLambda (pat, pats, res.expr) } | ExpLambda (pat, pats, body) -> let* names = take_names 1 in let name = List.hd names in let args = pat :: pats in - let bound = List.concat (List.map names_in_pattern args) in + let bound = List.concat_map names_in_pattern args in let* res = lift_expr { (inner ctx) with renames = without_bindings ctx.renames bound } body in @@ -331,10 +331,10 @@ module Make (N : NAMING) = struct (fun e1' e2' e3' -> ExpBranch (e1', e2', Some e3'))) | ExpConstruct (id, Some e) -> let* res = lift_expr (inner ctx) e in - return { structures = res.structures; expr = ExpConstruct (id, Some res.expr) } + return { res with expr = ExpConstruct (id, Some res.expr) } | ExpTypeAnnotation (e, typ) -> let* res = lift_expr (inner ctx) e in - return { structures = res.structures; expr = ExpTypeAnnotation (res.expr, typ) } + return { res with expr = ExpTypeAnnotation (res.expr, typ) } | ExpBinOper (op, e1, e2) -> pair (lift_expr (inner ctx) e1) @@ -342,7 +342,7 @@ module Make (N : NAMING) = struct (fun e1' e2' -> ExpBinOper (op, e1', e2')) | ExpUnarOper (op, e) -> let* res = lift_expr (inner ctx) e in - return { structures = res.structures; expr = ExpUnarOper (op, res.expr) } + return { res with expr = ExpUnarOper (op, res.expr) } | ExpTuple (e1, e2, rest) -> let* first = lift_expr (inner ctx) e1 in let* second = lift_expr (inner ctx) e2 in @@ -357,7 +357,7 @@ module Make (N : NAMING) = struct | ExpOption None -> return { structures = []; expr = ExpOption None } | ExpOption (Some e) -> let* res = lift_expr (inner ctx) e in - return { structures = res.structures; expr = ExpOption (Some res.expr) } + return { res with expr = ExpOption (Some res.expr) } and lift_binds (ctx : context) binds : (structure list * (pattern * expr) list) t = fold_binds ctx binds (fun ctx _ e -> lift_expr ctx e) diff --git a/EML/lib/middleend/ll.mli b/EML/lib/middleend/ll.mli new file mode 100644 index 00000000..6baea862 --- /dev/null +++ b/EML/lib/middleend/ll.mli @@ -0,0 +1,10 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type error = + | RecLetEmptyBinding + | SValueEmptyBinding + +val pp_error : Format.formatter -> error -> unit +val lambda_lifting_result : Frontend.Ast.program -> (Frontend.Ast.structure list, error) Result.t diff --git a/EML/lib/runtime/primitives.mli b/EML/lib/runtime/primitives.mli new file mode 100644 index 00000000..51638839 --- /dev/null +++ b/EML/lib/runtime/primitives.mli @@ -0,0 +1,24 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val predefined_runtime_op_names : string list + +type llvm_arg = + | Ptr + | Int + | I32 + +type llvm_ret = + | RPtr + | RInt + | RVoid + +type runtime_func_sig = + { name : string + ; ret : llvm_ret + ; args : llvm_arg list + } + +val predefined_runtime_funcs : runtime_func_sig list +val runtime_primitive_arities : (string * int) list diff --git a/EML/lib/utils/helpers.ml b/EML/lib/utils/helpers.ml index 38c6ec9e..cf022194 100644 --- a/EML/lib/utils/helpers.ml +++ b/EML/lib/utils/helpers.ml @@ -1,3 +1,7 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + open Base open Frontend open Ast @@ -19,7 +23,7 @@ let rec extract_tuple_pattern_idents acc = function let acc'' = extract_tuple_pattern_idents acc' p2 in List.fold_left rest - ~f:(fun current_acc pat -> extract_tuple_pattern_idents current_acc pat) + ~f:extract_tuple_pattern_idents ~init:acc'' | PatAny -> "_" :: acc | _ -> acc diff --git a/EML/lib/utils/helpers.mli b/EML/lib/utils/helpers.mli new file mode 100644 index 00000000..324697e0 --- /dev/null +++ b/EML/lib/utils/helpers.mli @@ -0,0 +1,8 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val is_simple_pattern : Frontend.Ast.pattern -> bool +val is_tuple_pattern : Frontend.Ast.pattern -> bool +val extract_tuple_pattern_idents : string list -> Frontend.Ast.pattern -> string list +val pattern_to_ident : Frontend.Ast.pattern -> string option diff --git a/EML/lib/utils/monads.ml b/EML/lib/utils/monads.ml index 2b27f737..1cc07b40 100644 --- a/EML/lib/utils/monads.ml +++ b/EML/lib/utils/monads.ml @@ -1,3 +1,7 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + open Base module ANFMonad = struct diff --git a/EML/lib/utils/monads.mli b/EML/lib/utils/monads.mli index e0997779..b385e75d 100644 --- a/EML/lib/utils/monads.mli +++ b/EML/lib/utils/monads.mli @@ -1,3 +1,7 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + open Base module ANFMonad : sig diff --git a/EML/lib/utils/pretty_printer.mli b/EML/lib/utils/pretty_printer.mli new file mode 100644 index 00000000..cfc9036e --- /dev/null +++ b/EML/lib/utils/pretty_printer.mli @@ -0,0 +1,16 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Frontend.Ast + +val string_of_bin_op : bin_oper -> string +val string_of_unary_op : unar_oper -> string +val pp_bin_op : Format.formatter -> bin_oper -> unit +val pp_unary_op : Format.formatter -> unar_oper -> unit +val pp_const : Format.formatter -> const -> unit +val pp_pattern : Format.formatter -> pattern -> unit +val pp_expr : Format.formatter -> expr -> unit +val pp_structure_item : Format.formatter -> structure -> unit +val pp_structure : Format.formatter -> structure list -> unit +val pp_program : Format.formatter -> structure list -> unit diff --git a/EML/tests/anf_tests.mli b/EML/tests/anf_tests.mli new file mode 100644 index 00000000..2101d1ea --- /dev/null +++ b/EML/tests/anf_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val parse_and_anf : string -> unit +val parse_and_anf_pp : string -> unit diff --git a/EML/tests/cc_tests.mli b/EML/tests/cc_tests.mli new file mode 100644 index 00000000..4378f996 --- /dev/null +++ b/EML/tests/cc_tests.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val run : string -> unit diff --git a/EML/tests/inferencer_tests.mli b/EML/tests/inferencer_tests.mli new file mode 100644 index 00000000..8ed1aec6 --- /dev/null +++ b/EML/tests/inferencer_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val pretty_printer_parse_and_infer : string -> unit +val pretty_printer_infer_simple_expression : EML_lib.Frontend.Ast.expr -> unit diff --git a/EML/tests/ll_tests.mli b/EML/tests/ll_tests.mli new file mode 100644 index 00000000..4378f996 --- /dev/null +++ b/EML/tests/ll_tests.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val run : string -> unit diff --git a/EML/tests/llvm_tests.mli b/EML/tests/llvm_tests.mli new file mode 100644 index 00000000..803dcf06 --- /dev/null +++ b/EML/tests/llvm_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val compile_llvm : string -> string +val run_llvm : string -> unit diff --git a/EML/tests/parser_tests.mli b/EML/tests/parser_tests.mli new file mode 100644 index 00000000..910bf993 --- /dev/null +++ b/EML/tests/parser_tests.mli @@ -0,0 +1,5 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val parse_test : string -> unit diff --git a/EML/tests/riscv_tests.mli b/EML/tests/riscv_tests.mli new file mode 100644 index 00000000..f5abe362 --- /dev/null +++ b/EML/tests/riscv_tests.mli @@ -0,0 +1,6 @@ +(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +val compile : string -> string +val run : string -> unit From 2283c640679422c50bb7c397129a1ca1e7e638ff Mon Sep 17 00:00:00 2001 From: Danil Usoltsev Date: Mon, 9 Mar 2026 19:29:39 +0300 Subject: [PATCH 58/74] fmt project --- EML/bin/EML.ml | 12 ++--- EML/lib/backend/llvm_ir/generator.ml | 4 +- EML/lib/backend/llvm_ir/generator_state.mli | 10 +++- EML/lib/backend/ricsv/analysis.ml | 11 ++-- EML/lib/backend/ricsv/architecture.ml | 2 +- EML/lib/backend/ricsv/auxillary.ml | 18 +++---- EML/lib/backend/ricsv/generator.ml | 60 ++++++++++----------- EML/lib/frontend/inferencer.ml | 25 +++------ EML/lib/middleend/anf.ml | 30 +++++++---- EML/lib/middleend/cc.mli | 5 +- EML/lib/middleend/ll.ml | 3 +- EML/lib/middleend/ll.mli | 5 +- EML/lib/utils/helpers.ml | 5 +- EML/lib/utils/pretty_printer.ml | 19 +++++-- EML/tests/cc_tests.ml | 27 ++++++---- EML/tests/inferencer_tests.ml | 16 +++--- EML/tests/ll_tests.ml | 21 +++++--- EML/tests/parser_tests.ml | 3 +- 18 files changed, 147 insertions(+), 129 deletions(-) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 4dadc017..7fcf4417 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -139,22 +139,18 @@ let parse_args () : (opts, unit) Result.t = in let rec loop current_opts = function | [] -> Ok current_opts - | "-gc" :: rest -> - loop { current_opts with enable_gc = true } rest - | "-infer" :: rest -> - loop { current_opts with infer_only = true } rest + | "-gc" :: rest -> loop { current_opts with enable_gc = true } rest + | "-infer" :: rest -> loop { current_opts with infer_only = true } rest | "-fromfile" :: path :: rest -> loop { current_opts with input_file = Some path } rest - | "-o" :: path :: rest -> - loop { current_opts with output_file = Some path } rest + | "-o" :: path :: rest -> loop { current_opts with output_file = Some path } rest | "-backend" :: backend_name :: rest -> (match parse_backend backend_name with | Ok backend -> loop { current_opts with backend } rest | Error () -> Error ()) | argument :: _ when String.length argument > 0 && Char.equal argument.[0] '-' -> Error () - | _positional_argument :: _ -> - Error () + | _positional_argument :: _ -> Error () in let argv = Array.to_list Sys.argv in match argv with diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml index 7c09c598..bbe18961 100644 --- a/EML/lib/backend/llvm_ir/generator.ml +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -687,7 +687,7 @@ let gen_function in set_value_name name param_value; if enable_gc - then + then ( let* gc_allocas = get_gc_allocas in let* allocas_map = match gc_allocas with @@ -696,7 +696,7 @@ let gen_function in let* alloca_ptr = with_optional_value (alloca builder ptr_t name) in store builder param_value alloca_ptr; - set_gc_allocas (Some (Base.Map.set allocas_map ~key:name ~data:alloca_ptr)) + set_gc_allocas (Some (Base.Map.set allocas_map ~key:name ~data:alloca_ptr))) else set_value name param_value | ImmediateConst _ -> return ()) in diff --git a/EML/lib/backend/llvm_ir/generator_state.mli b/EML/lib/backend/llvm_ir/generator_state.mli index 3b348d8f..25e7f403 100644 --- a/EML/lib/backend/llvm_ir/generator_state.mli +++ b/EML/lib/backend/llvm_ir/generator_state.mli @@ -42,8 +42,14 @@ module Make (N : NAMING) : sig val set_value : string -> llvalue -> unit t val set_type : string -> lltype -> unit t val remove_value : string -> unit t - val get_gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option t - val set_gc_allocas : (string, llvalue, Base.String.comparator_witness) Base.Map.t option -> unit t + + val get_gc_allocas + : (string, llvalue, Base.String.comparator_witness) Base.Map.t option t + + val set_gc_allocas + : (string, llvalue, Base.String.comparator_witness) Base.Map.t option + -> unit t + val get_gc_entry_block : llbasicblock option t val set_gc_entry_block : llbasicblock option -> unit t val fresh_blocks : (string * string * string) t diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index caa169f5..920a0a84 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -65,7 +65,8 @@ let rec max_stack_args_cexpr = function | ComplexBinOper (_, left, right) -> max (max_stack_args_imm left) (max_stack_args_imm right) | ComplexUnarOper (_, imm) -> max_stack_args_imm imm - | ComplexTuple (first, second, rest) -> max_by max_stack_args_imm (first :: second :: rest) + | ComplexTuple (first, second, rest) -> + max_by max_stack_args_imm (first :: second :: rest) | ComplexField (imm, _) -> max_stack_args_imm imm | ComplexList imm_list -> max_by max_stack_args_imm imm_list | ComplexApp (_first, second, rest) -> @@ -105,8 +106,7 @@ let rec max_create_tuple_array_cexpr = function let bytes_per_cons_cell = 2 * word_size in let bytes_from_elements = sum_by max_create_tuple_array_imm imm_list in (bytes_per_cons_cell * List.length imm_list) + bytes_from_elements - | ComplexApp (_f, second, rest) -> - max_by max_create_tuple_array_imm (second :: rest) + | ComplexApp (_f, second, rest) -> max_by max_create_tuple_array_imm (second :: rest) | ComplexOption None -> 0 | ComplexOption (Some imm) -> max_create_tuple_array_imm imm | ComplexLambda (_, body) -> max_create_tuple_array_anf body @@ -186,10 +186,7 @@ let analyze (program : anf_program) = Base.Map.find generated_name_counts func_name |> Option.value ~default:0 in let updated_generated_name_counts = - Base.Map.set - generated_name_counts - ~key:func_name - ~data:(duplicate_index + 1) + Base.Map.set generated_name_counts ~key:func_name ~data:(duplicate_index + 1) in let asm_name = if duplicate_index = 0 diff --git a/EML/lib/backend/ricsv/architecture.ml b/EML/lib/backend/ricsv/architecture.ml index b13ed18d..e4638586 100644 --- a/EML/lib/backend/ricsv/architecture.ml +++ b/EML/lib/backend/ricsv/architecture.ml @@ -77,7 +77,7 @@ module Riscv_backend = struct | Ret -> fprintf ppf "ret" ;; - let tag_int n = 2 * n + 1 + let tag_int n = (2 * n) + 1 let fp = S 0 let sp = SP let ra = RA diff --git a/EML/lib/backend/ricsv/auxillary.ml b/EML/lib/backend/ricsv/auxillary.ml index 54ef2551..7de3de17 100644 --- a/EML/lib/backend/ricsv/auxillary.ml +++ b/EML/lib/backend/ricsv/auxillary.ml @@ -65,10 +65,13 @@ let indices_of_args_to_spill generation_state immediate_arguments = | ImmediateConst _ -> false | ImmediateVar function_name -> Base.Map.mem generation_state.arity_map function_name in - Base.List.foldi immediate_arguments ~init:[] ~f:(fun argument_index spilled_indices immediate_argument -> - if argument_overwrites_result_register immediate_argument - then argument_index :: spilled_indices - else spilled_indices) + Base.List.foldi + immediate_arguments + ~init:[] + ~f:(fun argument_index spilled_indices immediate_argument -> + if argument_overwrites_result_register immediate_argument + then argument_index :: spilled_indices + else spilled_indices) |> List.rev ;; @@ -90,12 +93,7 @@ type call_style = ; arguments : immediate list } -let classify_call - ~argument_count - ~callee_arity_opt - ~function_name - ~arguments - : call_style +let classify_call ~argument_count ~callee_arity_opt ~function_name ~arguments : call_style = match callee_arity_opt with | Some 0 when argument_count = 1 -> Nullary function_name diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index 534b96a5..cbc0644f 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -12,7 +12,9 @@ open Generator_state open Auxillary let alloc_frame_slot = - let* () = modify (fun state -> { state with frame_offset = state.frame_offset + word_size }) in + let* () = + modify (fun state -> { state with frame_offset = state.frame_offset + word_size }) + in let* state = get in return (fp, -state.frame_offset) ;; @@ -72,7 +74,8 @@ let evacuate_reg destination_register = in let rewrite_location_in_environment environment from_register to_location = Base.Map.map environment ~f:(function - | Loc_reg mapped_register when equal_reg mapped_register from_register -> to_location + | Loc_reg mapped_register when equal_reg mapped_register from_register -> + to_location | location -> location) in let* env = get_env in @@ -162,7 +165,11 @@ let spill_dangerous_args state arguments = else return spilled_locations_by_index) ;; -let load_immediates_into_registers spilled_locations argument_registers immediate_arguments = +let load_immediates_into_registers + spilled_locations + argument_registers + immediate_arguments + = let immediate_count_to_load = min (List.length immediate_arguments) (List.length argument_registers) in @@ -170,11 +177,11 @@ let load_immediates_into_registers spilled_locations argument_registers immediat (Base.List.take immediate_arguments immediate_count_to_load) ~init:(return ()) ~f:(fun argument_index acc immediate_argument -> - let* () = acc in - let destination_register = List.nth argument_registers argument_index in - match Base.Map.find spilled_locations argument_index with - | Some spilled_location -> load_into_reg destination_register spilled_location - | None -> gen_imm destination_register immediate_argument) + let* () = acc in + let destination_register = List.nth argument_registers argument_index in + match Base.Map.find spilled_locations argument_index with + | Some spilled_location -> load_into_reg destination_register spilled_location + | None -> gen_imm destination_register immediate_argument) ;; let emit_arguments_to_stack spilled_arguments arguments = @@ -209,18 +216,13 @@ let gen_call_with_regs function_symbol = let* () = - load_immediates_into_registers - spilled_arguments - argument_registers - call_arguments + load_immediates_into_registers spilled_arguments argument_registers call_arguments in let stack_arguments = Base.List.drop call_arguments (List.length argument_registers) in let* reserved_stack_bytes = push_stack_args stack_arguments in let* () = append (call function_symbol) in let* () = copy_result_to destination_register in - if reserved_stack_bytes > 0 - then append (addi sp sp reserved_stack_bytes) - else return () + if reserved_stack_bytes > 0 then append (addi sp sp reserved_stack_bytes) else return () ;; (* let foo = ... in @@ -276,7 +278,12 @@ let rec gen_invocation destination_register function_name call_arguments = | Nullary resolved_function_name -> gen_nullary destination_register resolved_function_name | CurryChain { function_name; arity; initial_arguments; remaining_arguments } -> - gen_curried_call destination_register function_name arity initial_arguments remaining_arguments + gen_curried_call + destination_register + function_name + arity + initial_arguments + remaining_arguments | Direct { function_name; arguments } -> gen_direct_call destination_register function_name arguments spilled_arguments | ViaApplyNargs { function_name; argument_count; arguments } -> @@ -299,18 +306,12 @@ and gen_curried_call gen_cexpr destination_register (ComplexApp - ( ImmediateVar function_name - , List.hd initial_arguments - , List.tl initial_arguments )) + (ImmediateVar function_name, List.hd initial_arguments, List.tl initial_arguments)) in let* partial_function_location = store_reg_into_frame destination_register in let* () = - modify_env - (fun environment -> - Base.Map.set - environment - ~key:part_name - ~data:partial_function_location) + modify_env (fun environment -> + Base.Map.set environment ~key:part_name ~data:partial_function_location) in (* Apply each rest_arg one at a time (eml_applyN expects one application per call) *) let rec apply_remaining_arguments = function @@ -319,15 +320,12 @@ and gen_curried_call gen_cexpr destination_register (ComplexApp (ImmediateVar part_name, argument, [])) | argument :: remaining_arguments_tail -> let* () = - gen_cexpr - destination_register - (ComplexApp (ImmediateVar part_name, argument, [])) + gen_cexpr destination_register (ComplexApp (ImmediateVar part_name, argument, [])) in let* updated_partial_location = store_reg_into_frame destination_register in let* () = - modify_env - (fun environment -> - Base.Map.set environment ~key:part_name ~data:updated_partial_location) + modify_env (fun environment -> + Base.Map.set environment ~key:part_name ~data:updated_partial_location) in apply_remaining_arguments remaining_arguments_tail in diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/frontend/inferencer.ml index cc8b4d00..7f7cdbdf 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/frontend/inferencer.ml @@ -95,26 +95,22 @@ end = struct end let fresh : int t = - fun st -> - { st with counter = st.counter + 1 }, Result.return st.counter + fun st -> { st with counter = st.counter + 1 }, Result.return st.counter ;; let current_level : int t = fun st -> st, Result.return st.current_level let enter_level : unit t = - fun st -> - { st with current_level = st.current_level + 1 }, Result.return () + fun st -> { st with current_level = st.current_level + 1 }, Result.return () ;; let leave_level : unit t = - fun st -> - { st with current_level = max 0 (st.current_level - 1) }, Result.return () + fun st -> { st with current_level = max 0 (st.current_level - 1) }, Result.return () ;; let set_var_level var lvl : unit t = fun st -> - ( { st with var_levels = Map.set st.var_levels ~key:var ~data:lvl } - , Result.return () ) + { st with var_levels = Map.set st.var_levels ~key:var ~data:lvl }, Result.return () ;; let get_var_level var : int option t = @@ -122,12 +118,7 @@ end = struct ;; let run monad = - snd - (monad - { counter = 0 - ; current_level = 0 - ; var_levels = Map.empty (module String) - }) + snd (monad { counter = 0; current_level = 0; var_levels = Map.empty (module String) }) ;; end @@ -175,7 +166,7 @@ end = struct let mapping key value = if Type.occurs_in key value then fail (OccursCheck (key, value)) - else ( + else let* key_lvl = get_var_level key in let vars = Type.free_vars value |> VarSet.elements in let* () = @@ -189,7 +180,7 @@ end = struct | Some v_lvl when v_lvl > key_lvl -> set_var_level v key_lvl | _ -> return ()) in - return (key, value)) + return (key, value) ;; let singleton key value = @@ -446,7 +437,7 @@ let rec infer_pattern env = function | "[]", None -> let* fresh = fresh_var in return (Substitution.empty, TyList fresh, env) - | "::", Some ((PatTuple (_, _, []) as pair_pat)) -> + | "::", Some (PatTuple (_, _, []) as pair_pat) -> let* sub_pair, ty_pair, env' = infer_pattern env pair_pat in let* fresh_hd = fresh_var in let* fresh_tl = fresh_var in diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 2366c65c..0868672a 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -87,7 +87,8 @@ let match_list_cases cases = | _ -> false in let get_cons_pats = function - | PatConstruct ("::", Some (PatTuple (head_pat, tail_pat, []))) -> Some (head_pat, tail_pat) + | PatConstruct ("::", Some (PatTuple (head_pat, tail_pat, []))) -> + Some (head_pat, tail_pat) | _ -> None in match cases with @@ -111,7 +112,8 @@ let build_tuple_lets tuple_var indices_pats body = let* body_with_rest = aux tuple_var rest body in let* inner = match pat with - | PatTuple (p1, p2, rest_pats) -> aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest + | PatTuple (p1, p2, rest_pats) -> + aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest | _ -> return body_with_rest in return (AnfLet (NonRec, bind_id, ComplexField (ImmediateVar tuple_var, i), inner)) @@ -127,7 +129,8 @@ let build_tuple_top_level_bindings tuple_var indices_pats = let cur = bind_id, AnfExpr (ComplexField (ImmediateVar tuple_var, i)) in let* inner = match pat with - | PatTuple (p1, p2, rest_pats) -> aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) + | PatTuple (p1, p2, rest_pats) -> + aux bind_id (tuple_indices (p1 :: p2 :: rest_pats)) | _ -> return [] in let* rest_bindings = aux tuple_var rest in @@ -186,8 +189,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | ExpTuple (exp1, exp2, exp_list) -> let all_exprs = exp1 :: exp2 :: exp_list in anf_list all_exprs (function - | imm1 :: imm2 :: rest -> - bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k + | imm1 :: imm2 :: rest -> bind_complex_expr (ComplexTuple (imm1, imm2, rest)) k | _ -> fail "Invalid tuple") | ExpLambda (pat, pat_list, body) -> let params = pat :: pat_list in @@ -206,7 +208,9 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | PatTuple (p1, p2, rest_pats) :: remaining_params -> let* body_with_rest = wrap_params current_body remaining_params in let* var = fresh in - let* body_with_tuple_destructured = build_tuple_lets var (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest in + let* body_with_tuple_destructured = + build_tuple_lets var (tuple_indices (p1 :: p2 :: rest_pats)) body_with_rest + in return (AnfExpr (ComplexLambda ([ PatVariable var ], body_with_tuple_destructured))) | _ -> fail "Only variable, constant and tuple patterns in lambda" @@ -237,7 +241,9 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = cons_aexp_base in let* branch_result = - bind_complex_expr (ComplexBranch (ImmediateVar cond_var, nil_aexp, cons_aexp)) k + bind_complex_expr + (ComplexBranch (ImmediateVar cond_var, nil_aexp, cons_aexp)) + k in return (AnfLet @@ -255,10 +261,12 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = | ExpConstruct ("[]", None) -> bind_complex_expr (ComplexList []) k | ExpConstruct ("::", Some (ExpTuple (head_e, tail_e, []))) -> anf head_e (fun head_imm -> - anf tail_e (fun tail_imm -> bind_complex_expr (ComplexTuple (head_imm, tail_imm, [])) k)) + anf tail_e (fun tail_imm -> + bind_complex_expr (ComplexTuple (head_imm, tail_imm, [])) k)) | ExpConstruct _ -> fail "Constructors not implemented" -and anf_to_immediate_expr expr = anf expr (fun imm -> return (AnfExpr (ComplexImmediate imm))) +and anf_to_immediate_expr expr = + anf expr (fun imm -> return (AnfExpr (ComplexImmediate imm))) and anf_list (exprs : expr list) (k : immediate list -> anf_expr t) : anf_expr t = match exprs with @@ -283,7 +291,9 @@ let anf_structure_item (item : structure) : anf_structure list t = match pat with | PatTuple (p1, p2, rest) -> let* tuple_var = fresh in - let* component_bindings = build_tuple_top_level_bindings tuple_var (tuple_indices (p1 :: p2 :: rest)) in + let* component_bindings = + build_tuple_top_level_bindings tuple_var (tuple_indices (p1 :: p2 :: rest)) + in let one_value (id, e) = AnfValue (NonRec, to_fun_bind (id, e), []) in let new_items = AnfValue (rec_flag, to_fun_bind (tuple_var, anf_expr_body), []) diff --git a/EML/lib/middleend/cc.mli b/EML/lib/middleend/cc.mli index 2a63b103..0d1fec45 100644 --- a/EML/lib/middleend/cc.mli +++ b/EML/lib/middleend/cc.mli @@ -5,4 +5,7 @@ type error = LambdaWithoutParameters val pp_error : Format.formatter -> error -> unit -val closure_conversion_result : Frontend.Ast.program -> (Frontend.Ast.program, error) Result.t + +val closure_conversion_result + : Frontend.Ast.program + -> (Frontend.Ast.program, error) Result.t diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml index 55d0bd04..f76736ba 100644 --- a/EML/lib/middleend/ll.ml +++ b/EML/lib/middleend/ll.ml @@ -207,8 +207,7 @@ module Make (N : NAMING) = struct let* res_rhs = lift_expr (inner ctx) exp in let* extra_structures, rest_binds = lift_binds (inner ctx) more in let all_defs = - names_in_pattern pat - @ List.concat_map (fun (p, _) -> names_in_pattern p) more + names_in_pattern pat @ List.concat_map (fun (p, _) -> names_in_pattern p) more in let body_ctx = { (inner ctx) with renames = without_bindings ctx.renames all_defs } diff --git a/EML/lib/middleend/ll.mli b/EML/lib/middleend/ll.mli index 6baea862..ba81839f 100644 --- a/EML/lib/middleend/ll.mli +++ b/EML/lib/middleend/ll.mli @@ -7,4 +7,7 @@ type error = | SValueEmptyBinding val pp_error : Format.formatter -> error -> unit -val lambda_lifting_result : Frontend.Ast.program -> (Frontend.Ast.structure list, error) Result.t + +val lambda_lifting_result + : Frontend.Ast.program + -> (Frontend.Ast.structure list, error) Result.t diff --git a/EML/lib/utils/helpers.ml b/EML/lib/utils/helpers.ml index cf022194..a5ea36fb 100644 --- a/EML/lib/utils/helpers.ml +++ b/EML/lib/utils/helpers.ml @@ -21,10 +21,7 @@ let rec extract_tuple_pattern_idents acc = function | PatTuple (p1, p2, rest) -> let acc' = extract_tuple_pattern_idents acc p1 in let acc'' = extract_tuple_pattern_idents acc' p2 in - List.fold_left - rest - ~f:extract_tuple_pattern_idents - ~init:acc'' + List.fold_left rest ~f:extract_tuple_pattern_idents ~init:acc'' | PatAny -> "_" :: acc | _ -> acc ;; diff --git a/EML/lib/utils/pretty_printer.ml b/EML/lib/utils/pretty_printer.ml index 21a3ab6a..1a2a7f83 100644 --- a/EML/lib/utils/pretty_printer.ml +++ b/EML/lib/utils/pretty_printer.ml @@ -66,7 +66,8 @@ and pp_expr ppf = function | ExpBranch (c, t, None) -> Format.fprintf ppf "if %a then %a" pp_expr c pp_expr t | ExpBranch (c, t, Some e) -> Format.fprintf ppf "if %a then %a else %a" pp_expr c pp_expr t pp_expr e - | ExpBinOper (op, l, r) -> Format.fprintf ppf "(%a %a %a)" pp_expr l pp_bin_op op pp_expr r + | ExpBinOper (op, l, r) -> + Format.fprintf ppf "(%a %a %a)" pp_expr l pp_bin_op op pp_expr r | ExpUnarOper (op, e) -> Format.fprintf ppf "(%a %a)" pp_unary_op op pp_expr e | ExpTuple (e1, e2, rest) -> Format.fprintf @@ -90,7 +91,11 @@ and pp_expr ppf = function body | ExpTypeAnnotation (e, t) -> Format.fprintf ppf "(%a : %a)" pp_expr e pp_ty t | ExpLet (is_rec, bind, more, body) -> - let kw = match is_rec with Rec -> "let rec" | NonRec -> "let" in + let kw = + match is_rec with + | Rec -> "let rec" + | NonRec -> "let" + in Format.fprintf ppf "%s %a in %a" kw pp_binds (bind, more) pp_expr body | ExpApply _ as e -> let f, args = flatten_apply e in @@ -125,7 +130,8 @@ and pp_expr ppf = function | ExpConstruct (id, Some e) -> Format.fprintf ppf "%s (%a)" id pp_expr e and pp_atomic_expr ppf = function - | (ExpIdent _ | ExpConst _ | ExpOption None | ExpConstruct (_, None)) as e -> pp_expr ppf e + | (ExpIdent _ | ExpConst _ | ExpOption None | ExpConstruct (_, None)) as e -> + pp_expr ppf e | e -> Format.fprintf ppf "(%a)" pp_expr e and flatten_apply e = @@ -137,7 +143,6 @@ and flatten_apply e = go e [] and pp_case ppf (p, e) = Format.fprintf ppf "| %a -> %a" pp_pattern p pp_expr e - and pp_bind ppf (p, e) = Format.fprintf ppf "%a = %a" pp_pattern p pp_expr e and pp_binds ppf (first, more) = @@ -148,7 +153,11 @@ and pp_binds ppf (first, more) = let pp_structure_item ppf = function | SEval e -> Format.fprintf ppf "%a;;" pp_expr e | SValue (is_rec, bind, more) -> - let kw = match is_rec with Rec -> "let rec" | NonRec -> "let" in + let kw = + match is_rec with + | Rec -> "let rec" + | NonRec -> "let" + in Format.fprintf ppf "%s %a;;" kw pp_binds (bind, more) ;; diff --git a/EML/tests/cc_tests.ml b/EML/tests/cc_tests.ml index 6e67947f..b38efba8 100644 --- a/EML/tests/cc_tests.ml +++ b/EML/tests/cc_tests.ml @@ -37,7 +37,8 @@ let%expect_test "top_level_nonrec_and_group" = id 3 + addk 4 ;; |}; - [%expect {| + [%expect + {| let f = fun x -> let id = fun y -> y and addk = fun x z -> (x + z) in (id 3 + addk x 4);; |}] ;; @@ -51,7 +52,8 @@ let%expect_test "recursive_and_with_external_capture" = go 20 ;; |}; - [%expect {| + [%expect + {| let solve = fun bound -> let rec go = fun bound n -> if (n <= bound) then true else stop bound ((n - 1)) and stop = fun bound n -> if (n > bound) then false else go bound ((n - 1)) in go bound 20;; |}] ;; @@ -65,7 +67,8 @@ let%expect_test "recursive_local_function_value_capture" = plus 1 + call 2 ;; |}; - [%expect {| + [%expect + {| let run = fun x -> let rec plus = fun x y -> (x + y) and call = fun x c -> (c + plus x 7) in (plus x 1 + call x 2);; |}] ;; @@ -84,7 +87,8 @@ let%expect_test "nested_levels_of_captures" = middle 3 ;; |}; - [%expect {| let outer = fun x -> let middle = fun x y -> let inner = fun x y z -> let deepest = fun x y z w -> (((x + y) + z) + w) in deepest x y z 1 in inner x y 2 in middle x 3;; |}] + [%expect + {| let outer = fun x -> let middle = fun x y -> let inner = fun x y z -> let deepest = fun x y z w -> (((x + y) + z) + w) in deepest x y z 1 in inner x y 2 in middle x 3;; |}] ;; let%expect_test "if_with_lambda_in_both_branches" = @@ -93,7 +97,8 @@ let%expect_test "if_with_lambda_in_both_branches" = let choose flag base alt = if flag then (fun v -> base + v) else (fun v -> alt + v) |}; - [%expect {| let choose = fun flag base alt -> if flag then (fun base v -> (base + v)) base else (fun alt v -> (alt + v)) alt;; |}] + [%expect + {| let choose = fun flag base alt -> if flag then (fun base v -> (base + v)) base else (fun alt v -> (alt + v)) alt;; |}] ;; let%expect_test "match_with_option_lambda_capture" = @@ -105,7 +110,8 @@ let%expect_test "match_with_option_lambda_capture" = | None -> fun z -> z ;; |}; - [%expect {| let mapper = fun x -> match x with | Some (y) -> (fun y z -> (y + z)) y | None -> fun z -> z;; |}] + [%expect + {| let mapper = fun x -> match x with | Some (y) -> (fun y z -> (y + z)) y | None -> fun z -> z;; |}] ;; let%expect_test "sequence_and_tuple_pattern_capture" = @@ -117,7 +123,8 @@ let%expect_test "sequence_and_tuple_pattern_capture" = use_pair (3, 4) ;; |}; - [%expect {| let consume = fun a b -> let () = print_int a in let use_pair = fun a b (x, y) -> (((a + b) + x) + y) in use_pair a b ((3, 4));; |}] + [%expect + {| let consume = fun a b -> let () = print_int a in let use_pair = fun a b (x, y) -> (((a + b) + x) + y) in use_pair a b ((3, 4));; |}] ;; let%expect_test "list_and_option_expressions" = @@ -128,7 +135,8 @@ let%expect_test "list_and_option_expressions" = [f 1; f 2] ;; |}; - [%expect {| let build = fun seed -> let f = fun seed x -> Some ((seed + x)) in f seed 1::f seed 2::[];; |}] + [%expect + {| let build = fun seed -> let f = fun seed x -> Some ((seed + x)) in f seed 1::f seed 2::[];; |}] ;; let%expect_test "type_annotation_inside_capture" = @@ -139,5 +147,6 @@ let%expect_test "type_annotation_inside_capture" = g 5 ;; |}; - [%expect {| let annotated = fun base -> let g = fun base x -> ((base + x) : int) in g base 5;; |}] + [%expect + {| let annotated = fun base -> let g = fun base x -> ((base + x) : int) in g base 5;; |}] ;; diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 16d917e1..17637cf0 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -27,7 +27,6 @@ let pretty_printer_infer_simple_expression expr = | Error e -> Format.printf "Infer error. %a\n" pp_error e ;; - let%expect_test "test_factorial" = pretty_printer_parse_and_infer {| let rec fac n = @@ -104,7 +103,6 @@ let main = id 10 |}; val main: int|}] ;; - let%expect_test "test_rec_rhs_error" = pretty_printer_parse_and_infer {| let rec x = 1 |}; [%expect @@ -183,7 +181,8 @@ let%expect_test "test_program_2" = pretty_printer_parse_and_infer "let square = fun x -> x * x\n\ \ let result = square 10"; - [%expect {| + [%expect + {| val result: int val square: int -> int|}] ;; @@ -204,7 +203,6 @@ let%expect_test "test_option_type_error" = [%expect {|Infer error. Failed to unify types: bool and int.|}] ;; - let%expect_test "test_polymorphic_identity" = pretty_printer_parse_and_infer {| let id x = x @@ -300,8 +298,7 @@ let main = compose inc double 10 |}; ;; let%expect_test "test_occurs_check_error" = - pretty_printer_parse_and_infer - {| fun x -> x x |}; + pretty_printer_parse_and_infer {| fun x -> x x |}; [%expect {|Infer error. Occurs check failed. Type variable 't0' occurs inside t0 -> t1.|}] ;; @@ -367,10 +364,10 @@ let main = run () |}; val run: unit -> int|}] ;; - let%expect_test "test_rec_lhs_not_variable_error" = pretty_printer_parse_and_infer {| let rec Some x = Some 1 |}; - [%expect {|Infer error. Left-hand side error: Only variables are allowed on the left-hand side of let rec.|}] + [%expect + {|Infer error. Left-hand side error: Only variables are allowed on the left-hand side of let rec.|}] ;; let%expect_test "test_expr_let_rec_in" = @@ -393,7 +390,6 @@ let%expect_test "test_expr_let_rec_and_in" = [%expect {|int|}] ;; - let%expect_test "test_string_const_and_const_pattern" = pretty_printer_parse_and_infer {| let is_hi s = @@ -443,4 +439,4 @@ let%expect_test "test_ast_pattern_list_lambda" = let%expect_test "test_ast_pattern_unit_lambda" = pretty_printer_infer_simple_expression (ExpLambda (PatUnit, [], ExpConst (ConstInt 1))); [%expect {|unit -> int|}] -;; \ No newline at end of file +;; diff --git a/EML/tests/ll_tests.ml b/EML/tests/ll_tests.ml index 9b3d57b0..a60f76d8 100644 --- a/EML/tests/ll_tests.ml +++ b/EML/tests/ll_tests.ml @@ -26,7 +26,8 @@ let%expect_test "nonrecursive_multiple_lets" = bar x 2 + baz ;; |}; - [%expect {| + [%expect + {| let lifted_0 = fun x y -> (x + y);; let foo = fun x -> let bar = lifted_0 and baz = 2 in (bar x 2 + baz);; |}] @@ -41,7 +42,8 @@ let%expect_test "nonrecursive_multiple_functions" = bar 2 + baz x 5 ;; |}; - [%expect {| + [%expect + {| let lifted_0 = fun y -> y;; let lifted_1 = fun x c -> (x + c);; let foo = fun x -> let bar = lifted_0 @@ -60,7 +62,8 @@ let%expect_test "mutual_recursion_in_let_rec_and" = is_small limit 13 ;; |}; - [%expect {| + [%expect + {| let lifted_2 = fun limit n -> if (n <= limit) then true else lifted_1 limit ((n - 1));; let lifted_3 = fun limit n -> if (n > limit) then false else lifted_0 limit ((n - 1));; let rec lifted_0 = lifted_2 @@ -77,7 +80,8 @@ let%expect_test "recursive_local_bindings_use_renamed_functions" = bar x 5 + baz x 6 ;; |}; - [%expect {| + [%expect + {| let lifted_2 = fun x y -> (x + y);; let lifted_3 = fun x c -> (c + lifted_0 x 5);; let rec lifted_0 = lifted_2 @@ -93,7 +97,8 @@ let%expect_test "sequence_with_local_lambda" = (let h x y = x + y in h x 10) ;; |}; - [%expect {| + [%expect + {| let lifted_0 = fun x y -> (x + y);; let g = fun x -> let () = print_int x in let h = lifted_0 in h x 10;; |}] ;; @@ -106,7 +111,8 @@ let%expect_test "tuple_pattern_lambda_lifting" = f a b (1, 2) ;; |}; - [%expect {| + [%expect + {| let lifted_0 = fun a b (x, y) -> (((a + b) + x) + y);; let pair_sum = fun a b -> let f = lifted_0 in f a b ((1, 2));; |}] ;; @@ -120,7 +126,8 @@ let%expect_test "match_with_option_and_inline_lambdas" = | None -> fun z -> z + 1 ;; |}; - [%expect {| + [%expect + {| let lifted_0 = fun y z -> (y - z);; let lifted_1 = fun z -> (z + 1);; let f = fun x -> match x with | Some (y) -> lifted_0 y | None -> lifted_1;; |}] diff --git a/EML/tests/parser_tests.ml b/EML/tests/parser_tests.ml index edee0c37..5af3cc27 100644 --- a/EML/tests/parser_tests.ml +++ b/EML/tests/parser_tests.ml @@ -54,7 +54,6 @@ let main = fac 4 |}; |}] ;; - let%expect_test "factorial" = parse_test "let rec factorial n = if n < 2 then 1 else n * factorial(n - 1);;"; [%expect @@ -231,4 +230,4 @@ let%expect_test "test_unit" = [])) ] |}] -;; \ No newline at end of file +;; From 4e08f7eb64f53055d058cfc0d0fbc0d930f87360 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 01:06:19 +0300 Subject: [PATCH 59/74] fix manytests Signed-off-by: Victoria Ostrovskaya --- EML/tests/many_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EML/tests/many_tests b/EML/tests/many_tests index dbb09231..0bd48791 120000 --- a/EML/tests/many_tests +++ b/EML/tests/many_tests @@ -1 +1 @@ -/home/danil/comp25/manytests \ No newline at end of file +../../manytests \ No newline at end of file From 4ee35912061ca117c0778f209d6bc0afdd739743 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 01:07:38 +0300 Subject: [PATCH 60/74] add more llvm tests Signed-off-by: Victoria Ostrovskaya --- EML/tests/llvm_tests.ml | 810 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 765 insertions(+), 45 deletions(-) diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml index c85d08e0..22adfe53 100644 --- a/EML/tests/llvm_tests.ml +++ b/EML/tests/llvm_tests.ml @@ -26,8 +26,8 @@ let compile_llvm src : string = let run_llvm src = Format.printf "%s" (compile_llvm src) -let%expect_test "unit_main" = - run_llvm "let main = ()"; +let%expect_test "unary_minus" = + run_llvm "let x = -5"; [%expect {| ; ModuleID = 'EML' @@ -62,6 +62,11 @@ declare ptr @print_gc_status() ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare ptr @llvm.frameaddress.p0(i32 immarg) #0 +define ptr @x() { +entry: + ret ptr inttoptr (i64 -9 to ptr) +} + define ptr @eml_main() { entry: ret ptr inttoptr (i64 1 to ptr) @@ -72,8 +77,8 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] ;; -let%expect_test "int_main" = - run_llvm "let main = 42"; +let%expect_test "unary_not" = + run_llvm "let x = not true"; [%expect {| ; ModuleID = 'EML' @@ -108,9 +113,14 @@ declare ptr @print_gc_status() ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare ptr @llvm.frameaddress.p0(i32 immarg) #0 +define ptr @x() { +entry: + ret ptr inttoptr (i64 2 to ptr) +} + define ptr @eml_main() { entry: - ret ptr inttoptr (i64 85 to ptr) + ret ptr inttoptr (i64 1 to ptr) } attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } @@ -118,8 +128,8 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] ;; -let%expect_test "unary_minus" = - run_llvm "let x = -5"; +let%expect_test "unit_main" = + run_llvm "let main = ()"; [%expect {| ; ModuleID = 'EML' @@ -154,11 +164,6 @@ declare ptr @print_gc_status() ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare ptr @llvm.frameaddress.p0(i32 immarg) #0 -define ptr @x() { -entry: - ret ptr inttoptr (i64 -9 to ptr) -} - define ptr @eml_main() { entry: ret ptr inttoptr (i64 1 to ptr) @@ -169,8 +174,8 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] ;; -let%expect_test "unary_not" = - run_llvm "let x = not true"; +let%expect_test "mul_only" = + run_llvm "let main = 7 * 8"; [%expect {| ; ModuleID = 'EML' @@ -205,14 +210,9 @@ declare ptr @print_gc_status() ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare ptr @llvm.frameaddress.p0(i32 immarg) #0 -define ptr @x() { -entry: - ret ptr inttoptr (i64 2 to ptr) -} - define ptr @eml_main() { entry: - ret ptr inttoptr (i64 1 to ptr) + ret ptr inttoptr (i64 113 to ptr) } attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } @@ -220,48 +220,768 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] ;; -let%expect_test "mul_only" = - run_llvm "let main = 7 * 8"; +let%expect_test "double_fn" = + run_llvm + {| + let double x = x + x + let main = double 21 + |}; [%expect {| -; ModuleID = 'EML' -source_filename = "EML" + ; ModuleID = 'EML' + source_filename = "EML" -declare ptr @eml_applyN(ptr, i64, ptr) + declare ptr @eml_applyN(ptr, i64, ptr) -declare ptr @create_tuple(i64, ptr) + declare ptr @create_tuple(i64, ptr) -declare ptr @alloc_closure(ptr, i64) + declare ptr @alloc_closure(ptr, i64) -declare ptr @field(ptr, i64) + declare ptr @field(ptr, i64) -declare ptr @llvm_call_indirect(ptr, ptr, i64) + declare ptr @llvm_call_indirect(ptr, ptr, i64) -declare void @print_int(i64) + declare void @print_int(i64) -declare void @init_gc() + declare void @init_gc() -declare void @destroy_gc() + declare void @destroy_gc() -declare void @set_ptr_stack(ptr) + declare void @set_ptr_stack(ptr) -declare i64 @get_heap_start() + declare i64 @get_heap_start() -declare i64 @get_heap_final() + declare i64 @get_heap_final() -declare ptr @collect() + declare ptr @collect() -declare ptr @print_gc_status() + declare ptr @print_gc_status() -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) -declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 -define ptr @eml_main() { -entry: - ret ptr inttoptr (i64 113 to ptr) -} + define ptr @double(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %x to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %add = add i64 %untagged, %untagged3 + %twice = mul i64 %add, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + ret ptr %result_int + } -attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + define ptr @eml_main() { + entry: + %direct_double = call ptr @double(ptr inttoptr (i64 43 to ptr)) + ret ptr %direct_double + } -|}] + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "abs_fn" = + run_llvm + {| + let abs x = if x < 0 then -x else x + let main = abs 7 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @abs(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_slt = icmp slt i64 %untagged, 0 + %tagged_bool = select i1 %icmp_slt, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + %raw_bool = ptrtoint ptr %result_bool to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_0, label %else_0 + + then_0: ; preds = %entry + %raw_int1 = ptrtoint ptr %x to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %neg = sub i64 0, %untagged3 + %twice = mul i64 %neg, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + br label %merge_0 + + else_0: ; preds = %entry + br label %merge_0 + + merge_0: ; preds = %else_0, %then_0 + %ite_result = phi ptr [ %result_int, %then_0 ], [ %x, %else_0 ] + ret ptr %ite_result + } + + define ptr @eml_main() { + entry: + %direct_abs = call ptr @abs(ptr inttoptr (i64 15 to ptr)) + ret ptr %direct_abs + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "nested_calls" = + run_llvm + {| + let sq x = x * x + let sum_of_squares a b = sq a + sq b + let main = sum_of_squares 3 4 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @sq(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %x to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %mul = mul i64 %untagged, %untagged3 + %twice = mul i64 %mul, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + ret ptr %result_int + } + + define ptr @sum_of_squares(ptr %a, ptr %b) { + entry: + %direct_sq = call ptr @sq(ptr %a) + %direct_sq1 = call ptr @sq(ptr %b) + %raw_int = ptrtoint ptr %direct_sq to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int2 = ptrtoint ptr %direct_sq1 to i64 + %minus13 = sub i64 %raw_int2, 1 + %untagged4 = sdiv i64 %minus13, 2 + %add = add i64 %untagged, %untagged4 + %twice = mul i64 %add, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + ret ptr %result_int + } + + define ptr @eml_main() { + entry: + %direct_sum_of_squares = call ptr @sum_of_squares(ptr inttoptr (i64 7 to ptr), ptr inttoptr (i64 9 to ptr)) + ret ptr %direct_sum_of_squares + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "fibonacci" = + run_llvm + {| + let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) + let main = fib 6 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @fib(ptr %n) { + entry: + %raw_int = ptrtoint ptr %n to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_slt = icmp slt i64 %untagged, 2 + %tagged_bool = select i1 %icmp_slt, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + %raw_bool = ptrtoint ptr %result_bool to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_0, label %else_0 + + then_0: ; preds = %entry + br label %merge_0 + + else_0: ; preds = %entry + %raw_int1 = ptrtoint ptr %n to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %sub = sub i64 %untagged3, 1 + %twice = mul i64 %sub, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + %direct_fib = call ptr @fib(ptr %result_int) + %raw_int4 = ptrtoint ptr %n to i64 + %minus15 = sub i64 %raw_int4, 1 + %untagged6 = sdiv i64 %minus15, 2 + %sub7 = sub i64 %untagged6, 2 + %twice8 = mul i64 %sub7, 2 + %tagged9 = add i64 %twice8, 1 + %result_int10 = inttoptr i64 %tagged9 to ptr + %direct_fib11 = call ptr @fib(ptr %result_int10) + %raw_int12 = ptrtoint ptr %direct_fib to i64 + %minus113 = sub i64 %raw_int12, 1 + %untagged14 = sdiv i64 %minus113, 2 + %raw_int15 = ptrtoint ptr %direct_fib11 to i64 + %minus116 = sub i64 %raw_int15, 1 + %untagged17 = sdiv i64 %minus116, 2 + %add = add i64 %untagged14, %untagged17 + %twice18 = mul i64 %add, 2 + %tagged19 = add i64 %twice18, 1 + %result_int20 = inttoptr i64 %tagged19 to ptr + br label %merge_0 + + merge_0: ; preds = %else_0, %then_0 + %ite_result = phi ptr [ inttoptr (i64 3 to ptr), %then_0 ], [ %result_int20, %else_0 ] + ret ptr %ite_result + } + + define ptr @eml_main() { + entry: + %direct_fib = call ptr @fib(ptr inttoptr (i64 13 to ptr)) + ret ptr %direct_fib + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "is_positive" = + run_llvm + {| + let is_positive n = n > 0 + let main = is_positive 42 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @is_positive(ptr %n) { + entry: + %raw_int = ptrtoint ptr %n to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_sgt = icmp sgt i64 %untagged, 0 + %tagged_bool = select i1 %icmp_sgt, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + ret ptr %result_bool + } + + define ptr @eml_main() { + entry: + %direct_is_positive = call ptr @is_positive(ptr inttoptr (i64 85 to ptr)) + ret ptr %direct_is_positive + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "mul3" = + run_llvm + {| + let mul3 a b c = a * b * c + let main = mul3 2 3 4 + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @mul3(ptr %a, ptr %b, ptr %c) { + entry: + %raw_int = ptrtoint ptr %a to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %b to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %mul = mul i64 %untagged, %untagged3 + %twice = mul i64 %mul, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + %raw_int4 = ptrtoint ptr %result_int to i64 + %minus15 = sub i64 %raw_int4, 1 + %untagged6 = sdiv i64 %minus15, 2 + %raw_int7 = ptrtoint ptr %c to i64 + %minus18 = sub i64 %raw_int7, 1 + %untagged9 = sdiv i64 %minus18, 2 + %mul10 = mul i64 %untagged6, %untagged9 + %twice11 = mul i64 %mul10, 2 + %tagged12 = add i64 %twice11, 1 + %result_int13 = inttoptr i64 %tagged12 to ptr + ret ptr %result_int13 + } + + define ptr @eml_main() { + entry: + %direct_mul3 = call ptr @mul3(ptr inttoptr (i64 5 to ptr), ptr inttoptr (i64 7 to ptr), ptr inttoptr (i64 9 to ptr)) + ret ptr %direct_mul3 + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "test1" = + run_llvm + {| + let large x = if 0<>x then print_int 0 else print_int 1 + let main = + let x = if (if (if 0 + then 0 else (let t42 = print_int 42 in 1)) + then 0 else 1) + then 0 else 1 in + large x + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @large(ptr %x) { + entry: + %raw_int = ptrtoint ptr %x to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %icmp_ne = icmp ne i64 0, %untagged + %tagged_bool = select i1 %icmp_ne, i64 4, i64 2 + %result_bool = inttoptr i64 %tagged_bool to ptr + %raw_bool = ptrtoint ptr %result_bool to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_0, label %else_0 + + then_0: ; preds = %entry + call void @print_int(i64 1) + br label %merge_0 + + else_0: ; preds = %entry + call void @print_int(i64 3) + br label %merge_0 + + merge_0: ; preds = %else_0, %then_0 + %ite_result = phi ptr [ inttoptr (i64 1 to ptr), %then_0 ], [ inttoptr (i64 1 to ptr), %else_0 ] + ret ptr %ite_result + } + + define ptr @eml_main() { + entry: + br i1 false, label %then_1, label %else_1 + + then_1: ; preds = %entry + br label %merge_1 + + else_1: ; preds = %entry + call void @print_int(i64 85) + br label %merge_1 + + merge_1: ; preds = %else_1, %then_1 + %ite_result = phi ptr [ inttoptr (i64 1 to ptr), %then_1 ], [ inttoptr (i64 3 to ptr), %else_1 ] + %raw_bool = ptrtoint ptr %ite_result to i64 + %is_true = icmp eq i64 %raw_bool, 4 + br i1 %is_true, label %then_2, label %else_2 + + then_2: ; preds = %merge_1 + br label %merge_2 + + else_2: ; preds = %merge_1 + br label %merge_2 + + merge_2: ; preds = %else_2, %then_2 + %ite_result1 = phi ptr [ inttoptr (i64 1 to ptr), %then_2 ], [ inttoptr (i64 3 to ptr), %else_2 ] + %raw_bool2 = ptrtoint ptr %ite_result1 to i64 + %is_true3 = icmp eq i64 %raw_bool2, 4 + br i1 %is_true3, label %then_3, label %else_3 + + then_3: ; preds = %merge_2 + br label %merge_3 + + else_3: ; preds = %merge_2 + br label %merge_3 + + merge_3: ; preds = %else_3, %then_3 + %ite_result4 = phi ptr [ inttoptr (i64 1 to ptr), %then_3 ], [ inttoptr (i64 3 to ptr), %else_3 ] + %direct_large = call ptr @large(ptr %ite_result4) + ret ptr %direct_large + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] +;; + +let%expect_test "codegen closure fn with 10 arg" = + run_llvm + {| + let add a b c d e f g = a + b + c + d + e + f + g + + let main = + let temp1 = add 1 1 1 1 in + let temp2 = temp1 1 1 in + let temp3 = temp2 1 1 in + print_int temp3 + ;; + |}; + [%expect + {| + ; ModuleID = 'EML' + source_filename = "EML" + + declare ptr @eml_applyN(ptr, i64, ptr) + + declare ptr @create_tuple(i64, ptr) + + declare ptr @alloc_closure(ptr, i64) + + declare ptr @field(ptr, i64) + + declare ptr @llvm_call_indirect(ptr, ptr, i64) + + declare void @print_int(i64) + + declare void @init_gc() + + declare void @destroy_gc() + + declare void @set_ptr_stack(ptr) + + declare i64 @get_heap_start() + + declare i64 @get_heap_final() + + declare ptr @collect() + + declare ptr @print_gc_status() + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare ptr @llvm.frameaddress.p0(i32 immarg) #0 + + define ptr @add(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g) { + entry: + %raw_int = ptrtoint ptr %a to i64 + %minus1 = sub i64 %raw_int, 1 + %untagged = sdiv i64 %minus1, 2 + %raw_int1 = ptrtoint ptr %b to i64 + %minus12 = sub i64 %raw_int1, 1 + %untagged3 = sdiv i64 %minus12, 2 + %add = add i64 %untagged, %untagged3 + %twice = mul i64 %add, 2 + %tagged = add i64 %twice, 1 + %result_int = inttoptr i64 %tagged to ptr + %raw_int4 = ptrtoint ptr %result_int to i64 + %minus15 = sub i64 %raw_int4, 1 + %untagged6 = sdiv i64 %minus15, 2 + %raw_int7 = ptrtoint ptr %c to i64 + %minus18 = sub i64 %raw_int7, 1 + %untagged9 = sdiv i64 %minus18, 2 + %add10 = add i64 %untagged6, %untagged9 + %twice11 = mul i64 %add10, 2 + %tagged12 = add i64 %twice11, 1 + %result_int13 = inttoptr i64 %tagged12 to ptr + %raw_int14 = ptrtoint ptr %result_int13 to i64 + %minus115 = sub i64 %raw_int14, 1 + %untagged16 = sdiv i64 %minus115, 2 + %raw_int17 = ptrtoint ptr %d to i64 + %minus118 = sub i64 %raw_int17, 1 + %untagged19 = sdiv i64 %minus118, 2 + %add20 = add i64 %untagged16, %untagged19 + %twice21 = mul i64 %add20, 2 + %tagged22 = add i64 %twice21, 1 + %result_int23 = inttoptr i64 %tagged22 to ptr + %raw_int24 = ptrtoint ptr %result_int23 to i64 + %minus125 = sub i64 %raw_int24, 1 + %untagged26 = sdiv i64 %minus125, 2 + %raw_int27 = ptrtoint ptr %e to i64 + %minus128 = sub i64 %raw_int27, 1 + %untagged29 = sdiv i64 %minus128, 2 + %add30 = add i64 %untagged26, %untagged29 + %twice31 = mul i64 %add30, 2 + %tagged32 = add i64 %twice31, 1 + %result_int33 = inttoptr i64 %tagged32 to ptr + %raw_int34 = ptrtoint ptr %result_int33 to i64 + %minus135 = sub i64 %raw_int34, 1 + %untagged36 = sdiv i64 %minus135, 2 + %raw_int37 = ptrtoint ptr %f to i64 + %minus138 = sub i64 %raw_int37, 1 + %untagged39 = sdiv i64 %minus138, 2 + %add40 = add i64 %untagged36, %untagged39 + %twice41 = mul i64 %add40, 2 + %tagged42 = add i64 %twice41, 1 + %result_int43 = inttoptr i64 %tagged42 to ptr + %raw_int44 = ptrtoint ptr %result_int43 to i64 + %minus145 = sub i64 %raw_int44, 1 + %untagged46 = sdiv i64 %minus145, 2 + %raw_int47 = ptrtoint ptr %g to i64 + %minus148 = sub i64 %raw_int47, 1 + %untagged49 = sdiv i64 %minus148, 2 + %add50 = add i64 %untagged46, %untagged49 + %twice51 = mul i64 %add50, 2 + %tagged52 = add i64 %twice51, 1 + %result_int53 = inttoptr i64 %tagged52 to ptr + ret ptr %result_int53 + } + + define ptr @eml_main() { + entry: + %boxed_alloc_closure = call ptr @alloc_closure(ptr @add, i64 7) + br label %apply_step_0 + + merge_0: ; preds = %apply_step_3 + %apply_result = phi ptr [ %apply_step_310, %apply_step_3 ] + br label %apply_step_011 + + apply_step_0: ; preds = %entry + %apply_one = alloca [1 x ptr], align 8 + %one_elem = getelementptr [1 x ptr], ptr %apply_one, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem, align 8 + %apply_step_01 = call ptr @eml_applyN(ptr %boxed_alloc_closure, i64 1, ptr %one_elem) + br label %apply_step_1 + + apply_step_1: ; preds = %apply_step_0 + %cur_1 = phi ptr [ %apply_step_01, %apply_step_0 ] + %apply_one2 = alloca [1 x ptr], align 8 + %one_elem3 = getelementptr [1 x ptr], ptr %apply_one2, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem3, align 8 + %apply_step_14 = call ptr @eml_applyN(ptr %cur_1, i64 1, ptr %one_elem3) + br label %apply_step_2 + + apply_step_2: ; preds = %apply_step_1 + %cur_2 = phi ptr [ %apply_step_14, %apply_step_1 ] + %apply_one5 = alloca [1 x ptr], align 8 + %one_elem6 = getelementptr [1 x ptr], ptr %apply_one5, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem6, align 8 + %apply_step_27 = call ptr @eml_applyN(ptr %cur_2, i64 1, ptr %one_elem6) + br label %apply_step_3 + + apply_step_3: ; preds = %apply_step_2 + %cur_3 = phi ptr [ %apply_step_27, %apply_step_2 ] + %apply_one8 = alloca [1 x ptr], align 8 + %one_elem9 = getelementptr [1 x ptr], ptr %apply_one8, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem9, align 8 + %apply_step_310 = call ptr @eml_applyN(ptr %cur_3, i64 1, ptr %one_elem9) + br label %merge_0 + + merge_1: ; preds = %apply_step_112 + %apply_result20 = phi ptr [ %apply_step_119, %apply_step_112 ] + br label %apply_step_021 + + apply_step_011: ; preds = %merge_0 + %apply_one13 = alloca [1 x ptr], align 8 + %one_elem14 = getelementptr [1 x ptr], ptr %apply_one13, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem14, align 8 + %apply_step_015 = call ptr @eml_applyN(ptr %apply_result, i64 1, ptr %one_elem14) + br label %apply_step_112 + + apply_step_112: ; preds = %apply_step_011 + %cur_116 = phi ptr [ %apply_step_015, %apply_step_011 ] + %apply_one17 = alloca [1 x ptr], align 8 + %one_elem18 = getelementptr [1 x ptr], ptr %apply_one17, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem18, align 8 + %apply_step_119 = call ptr @eml_applyN(ptr %cur_116, i64 1, ptr %one_elem18) + br label %merge_1 + + merge_2: ; preds = %apply_step_122 + %apply_result30 = phi ptr [ %apply_step_129, %apply_step_122 ] + %print_int_arg = ptrtoint ptr %apply_result30 to i64 + call void @print_int(i64 %print_int_arg) + ret ptr inttoptr (i64 1 to ptr) + + apply_step_021: ; preds = %merge_1 + %apply_one23 = alloca [1 x ptr], align 8 + %one_elem24 = getelementptr [1 x ptr], ptr %apply_one23, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem24, align 8 + %apply_step_025 = call ptr @eml_applyN(ptr %apply_result20, i64 1, ptr %one_elem24) + br label %apply_step_122 + + apply_step_122: ; preds = %apply_step_021 + %cur_126 = phi ptr [ %apply_step_025, %apply_step_021 ] + %apply_one27 = alloca [1 x ptr], align 8 + %one_elem28 = getelementptr [1 x ptr], ptr %apply_one27, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem28, align 8 + %apply_step_129 = call ptr @eml_applyN(ptr %cur_126, i64 1, ptr %one_elem28) + br label %merge_2 + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] ;; From dbfc18b56ec904e269f63d9ab29616379bfcca57 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 01:13:26 +0300 Subject: [PATCH 61/74] replce inferencer to midleend Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 25 ++++++++----------- EML/lib/frontend/runner.ml | 16 +++--------- EML/lib/frontend/runner.mli | 11 ++------ EML/lib/{frontend => middleend}/inferencer.ml | 2 +- .../{frontend => middleend}/inferencer.mli | 2 +- EML/lib/middleend/runner.ml | 20 +++++++++++++-- EML/lib/middleend/runner.mli | 4 ++- EML/tests/inferencer_tests.ml | 2 +- 8 files changed, 40 insertions(+), 42 deletions(-) rename EML/lib/{frontend => middleend}/inferencer.ml (99%) rename EML/lib/{frontend => middleend}/inferencer.mli (98%) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index 7fcf4417..c87be74d 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -4,7 +4,7 @@ open Stdio open EML_lib -open Frontend +open Middleend type backend = | Ricsv @@ -39,28 +39,25 @@ let report_infer_error oc e = (Format.asprintf "Inferencer error: %a\n" Inferencer.pp_error e) ;; -let with_frontend text env oc f_success : (env, unit) Result.t = - match Frontend.Runner.run text env with +let with_frontend text oc f_success : (env, unit) Result.t = + match Frontend.Runner.run text with | Error (Frontend.Runner.Parse s) -> report_parse_error oc s; Error () - | Error (Frontend.Runner.Infer e) -> - report_infer_error oc e; - Error () - | Ok (ast, env', out_list) -> f_success ast env' out_list + | Ok ast -> f_success ast ;; -let with_middleend ast _env' f : (env, unit) Result.t = - match Middleend.Runner.run ast with +let with_middleend ast env f : (env, unit) Result.t = + match Middleend.Runner.run ast env with | Error e_mid -> Format.eprintf "Middleend error: %a\n%!" Middleend.Runner.pp_error e_mid; Error () - | Ok anf_ast -> f anf_ast + | Ok (anf_ast, env') -> f anf_ast env' ;; let run_compile text env oc ~backend ~enable_gc : (env, unit) Result.t = - with_frontend text env oc (fun ast env' _out_list -> - with_middleend ast env' (fun anf_ast -> + with_frontend text oc (fun ast -> + with_middleend ast env (fun anf_ast env' -> let ppf = Format.formatter_of_out_channel oc in let res = match backend with @@ -109,8 +106,8 @@ let compiler opts : (unit, unit) Result.t = in let env0 = if opts.enable_gc - then Inferencer.TypeEnv.env_with_gc - else Inferencer.TypeEnv.initial_env + then Middleend.Inferencer.TypeEnv.env_with_gc + else Middleend.Inferencer.TypeEnv.initial_env in let with_output f = match opts.output_file with diff --git a/EML/lib/frontend/runner.ml b/EML/lib/frontend/runner.ml index 0f5ec7eb..65a5094b 100644 --- a/EML/lib/frontend/runner.ml +++ b/EML/lib/frontend/runner.ml @@ -4,27 +4,17 @@ open Ast open Format -open Inferencer -type error = - | Parse of string - | Infer of Inferencer.error +type error = Parse of string let pp_error ppf = function | Parse s -> fprintf ppf "Parse error: %s" s - | Infer e -> fprintf ppf "Inference error: %a" Inferencer.pp_error e ;; let parse (text : string) : (program, string) Result.t = Parser.parse text -let run (text : string) (env : TypeEnv.t) - : (program * TypeEnv.t * (ident option * ty) list, error) Result.t - = +let run (text : string) : (program, error) Result.t = match Parser.parse text with | Error s -> Error (Parse s) - | Ok ast -> - (match Inferencer.ResultMonad.run (infer_structure env ast) with - | Error (OccursCheck _) -> Ok (ast, env, []) - | Error e -> Error (Infer e) - | Ok (_subst, env') -> Ok (ast, env', [])) + | Ok ast -> Ok ast ;; diff --git a/EML/lib/frontend/runner.mli b/EML/lib/frontend/runner.mli index 42948926..c965527e 100644 --- a/EML/lib/frontend/runner.mli +++ b/EML/lib/frontend/runner.mli @@ -3,16 +3,9 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) open Ast -open Inferencer -type error = - | Parse of string - | Infer of Inferencer.error +type error = Parse of string val pp_error : Format.formatter -> error -> unit val parse : string -> (program, string) Result.t - -val run - : string - -> TypeEnv.t - -> (program * TypeEnv.t * (ident option * ty) list, error) Result.t +val run : string -> (program, error) Result.t diff --git a/EML/lib/frontend/inferencer.ml b/EML/lib/middleend/inferencer.ml similarity index 99% rename from EML/lib/frontend/inferencer.ml rename to EML/lib/middleend/inferencer.ml index 7f7cdbdf..1b6cc7c3 100644 --- a/EML/lib/frontend/inferencer.ml +++ b/EML/lib/middleend/inferencer.ml @@ -5,7 +5,7 @@ (* Template: https://gitlab.com/Kakadu/fp2020course-materials/-/tree/master/code/miniml?ref_type=heads*) open Base -open Ast +open Frontend.Ast open Stdlib.Format type error = diff --git a/EML/lib/frontend/inferencer.mli b/EML/lib/middleend/inferencer.mli similarity index 98% rename from EML/lib/frontend/inferencer.mli rename to EML/lib/middleend/inferencer.mli index fe3d0a1f..f501e0bf 100644 --- a/EML/lib/frontend/inferencer.mli +++ b/EML/lib/middleend/inferencer.mli @@ -2,7 +2,7 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open Ast +open Frontend.Ast type error = | OccursCheck of string * ty diff --git a/EML/lib/middleend/runner.ml b/EML/lib/middleend/runner.ml index 6a0834a2..a093518e 100644 --- a/EML/lib/middleend/runner.ml +++ b/EML/lib/middleend/runner.ml @@ -4,27 +4,43 @@ open Format open Frontend.Ast +open Inferencer open Cc open Ll open Anf type error = + | Infer of Inferencer.error | Closure of Cc.error | Lifting of Ll.error | Anf of string let pp_error ppf = function + | Infer e -> fprintf ppf "inference: %a" Inferencer.pp_error e | Closure e -> fprintf ppf "closure conversion: %a" Cc.pp_error e | Lifting e -> fprintf ppf "lambda lifting: %a" Ll.pp_error e | Anf s -> fprintf ppf "ANF: %s" s ;; -let run (program : program) : (anf_program, error) Result.t = +let run (program : program) (env : Inferencer.TypeEnv.t) + : (anf_program * Inferencer.TypeEnv.t, error) Result.t + = let ( >>= ) = Result.bind in + let env' = + match Inferencer.ResultMonad.run (infer_structure env program) with + | Error (Inferencer.OccursCheck _) -> Ok env + | Error e -> Error (Infer e) + | Ok (_subst, env'') -> Ok env'' + in + env' + >>= fun env'' -> closure_conversion_result program |> Result.map_error (fun e -> Closure e) >>= fun cc_ast -> lambda_lifting_result cc_ast |> Result.map_error (fun e -> Lifting e) - >>= fun ll_ast -> anf_program ll_ast |> Result.map_error (fun e -> Anf e) + >>= fun ll_ast -> + anf_program ll_ast + |> Result.map_error (fun e -> Anf e) + >>= fun anf_ast -> Ok (anf_ast, env'') ;; diff --git a/EML/lib/middleend/runner.mli b/EML/lib/middleend/runner.mli index e0ce122d..7dd28ccb 100644 --- a/EML/lib/middleend/runner.mli +++ b/EML/lib/middleend/runner.mli @@ -3,12 +3,14 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) open Frontend.Ast +open Inferencer open Anf type error = + | Infer of Inferencer.error | Closure of Cc.error | Lifting of Ll.error | Anf of string val pp_error : Format.formatter -> error -> unit -val run : program -> (anf_program, error) Result.t +val run : program -> TypeEnv.t -> (anf_program * TypeEnv.t, error) Result.t diff --git a/EML/tests/inferencer_tests.ml b/EML/tests/inferencer_tests.ml index 17637cf0..78369081 100644 --- a/EML/tests/inferencer_tests.ml +++ b/EML/tests/inferencer_tests.ml @@ -2,7 +2,7 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -open EML_lib.Frontend.Inferencer +open EML_lib.Middleend.Inferencer open EML_lib.Frontend.Ast open EML_lib.Frontend.Parser From ec2e2ac6b5b9f33b72ca43a54ddfc2f8bf525010 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 01:27:50 +0300 Subject: [PATCH 62/74] merge two runtimes Signed-off-by: Victoria Ostrovskaya --- EML/lib/runtime/dune | 15 +- EML/lib/runtime/llvm_runtime.c | 339 ------------------------- EML/lib/runtime/riscv_runtime.c | 367 ---------------------------- EML/lib/runtime/runtime.c | 421 ++++++++++++++++++++++++++++++++ 4 files changed, 434 insertions(+), 708 deletions(-) delete mode 100644 EML/lib/runtime/llvm_runtime.c delete mode 100644 EML/lib/runtime/riscv_runtime.c create mode 100644 EML/lib/runtime/runtime.c diff --git a/EML/lib/runtime/dune b/EML/lib/runtime/dune index 49fd9378..75ae5317 100644 --- a/EML/lib/runtime/dune +++ b/EML/lib/runtime/dune @@ -1,6 +1,6 @@ (rule (targets rv64_runtime.a) - (deps riscv_runtime.c) + (deps runtime.c) (action (progn (run @@ -8,8 +8,19 @@ -march=rv64gc -mabi=lp64d -O2 + -DEML_RISCV + -DENABLE_GC -c - %{dep:riscv_runtime.c} + %{dep:runtime.c} -o rv64_runtime.o) (run riscv64-linux-gnu-ar rcs %{targets} rv64_runtime.o)))) + +;; Optional: build LLVM runtime object for linking with .ll output. +;; Usage: dune build lib/runtime/llvm_runtime.o (with C compiler on host) + +(rule + (targets llvm_runtime.o) + (deps runtime.c) + (action + (run gcc -O2 -DEML_LLVM -DENABLE_GC -c %{dep:runtime.c} -o %{targets}))) diff --git a/EML/lib/runtime/llvm_runtime.c b/EML/lib/runtime/llvm_runtime.c deleted file mode 100644 index 7aafdbf1..00000000 --- a/EML/lib/runtime/llvm_runtime.c +++ /dev/null @@ -1,339 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -typedef void *eml_value; - -static int64_t tag_int_val(int64_t n) { return (n << 1) | 1; } - -#if defined(ENABLE_GC) -#define SIZE_HEAP_DEFAULT 800 -#define HEADER_WORDS 1 -static size_t size_heap; -#define MAX_STACK_SCAN_SLOTS (128 * 1024) - -typedef enum { - TAG_TUPLE = 0, - TAG_CLOSURE = 1, - TAG_LAST -} gc_tag; - -static const size_t TAG_SCAN_START[] = { - [TAG_TUPLE] = 1, - [TAG_CLOSURE] = 3, -}; - -#define IS_INT(v) ((v) & 0x1) -#define IS_PTR(v) ((v) != 0 && !IS_INT(v)) - -typedef struct { - uint8_t tag; - uint8_t _pad1; - uint16_t size; - uint32_t _pad2; -} box_header_t; - -static inline box_header_t *get_header(uint64_t *payload) { - return (box_header_t *)((uint64_t *)payload - 1); -} -static inline uint64_t *get_payload(box_header_t *hdr) { - return (uint64_t *)(hdr + 1); -} - -typedef struct { - uint64_t *start[2]; - uint64_t *end[2]; - uint64_t *alloc_ptr; - int current_bank; - uint64_t allocations; - uint64_t collections; - uint64_t words_allocated_total; -} gc_state; - -static gc_state GC; -static uint64_t *PTR_STACK = NULL; -static uint64_t *STACK_SCAN_LOW = NULL; /* range we are scanning in collect() */ -static uint64_t *STACK_SCAN_HIGH = NULL; -static bool gc_enabled; - -static inline int get_current_bank_idx(void) { return GC.current_bank; } -static inline int get_another_bank_idx(void) { return GC.current_bank ^ 1; } -static inline bool in_bank(uint64_t *ptr, int bank_idx) { - return (GC.start[bank_idx] <= ptr) && (ptr < GC.end[bank_idx]); -} - -static void mark_and_copy(uint64_t *stack_slot); - -static uint64_t *forward_or_copy(uint64_t *old_payload, bool *did_copy) { - int from_bank = get_another_bank_idx(); - if (old_payload <= (uint64_t *)GC.start[from_bank] + HEADER_WORDS - 1) - return old_payload; - box_header_t *old_header = get_header(old_payload); - if (old_header->tag >= TAG_LAST || old_header->size == 0 || - old_header->size > size_heap) { - *did_copy = false; - return old_payload; - } - uint64_t possible_forward_ptr = *((uint64_t *)old_payload - 1); - if (in_bank((uint64_t *)possible_forward_ptr, get_current_bank_idx())) { - *did_copy = false; - return (uint64_t *)possible_forward_ptr; - } - *did_copy = true; - uint16_t payload_words = old_header->size; - uint8_t object_tag = old_header->tag; - if (GC.alloc_ptr + payload_words + HEADER_WORDS > GC.end[GC.current_bank]) { - *did_copy = false; - return old_payload; - } - box_header_t *new_header = (box_header_t *)GC.alloc_ptr; - new_header->tag = object_tag; - new_header->size = payload_words; - uint64_t *new_payload = get_payload(new_header); - memcpy(new_payload, old_payload, payload_words * sizeof(uint64_t)); - GC.alloc_ptr += payload_words + HEADER_WORDS; - GC.words_allocated_total += payload_words + HEADER_WORDS; - *((uint64_t *)old_payload - 1) = (uint64_t)new_payload; - return new_payload; -} - -static void scan_object(uint64_t *obj) { - box_header_t *header = get_header(obj); - size_t start = (header->tag < TAG_LAST) ? TAG_SCAN_START[header->tag] : 0; - for (size_t i = start; i < header->size; i++) - mark_and_copy(obj + i); -} - -static void mark_and_copy(uint64_t *stack_slot) { - uint64_t raw_value = *stack_slot; - if (!IS_PTR(raw_value)) return; - uint64_t *old_object_payload = (uint64_t *)raw_value; - int another_bank = get_another_bank_idx(); - if (!in_bank(old_object_payload, another_bank)) { - if (STACK_SCAN_LOW && STACK_SCAN_HIGH) { - uint64_t *low = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_LOW : STACK_SCAN_HIGH; - uint64_t *high = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_HIGH : STACK_SCAN_LOW; - if (old_object_payload >= low && old_object_payload <= high) - return; /* skip stack pointer */ - } - return; - } - if (old_object_payload < (uint64_t *)GC.start[another_bank] + HEADER_WORDS) - return; - bool object_was_copied_now; - uint64_t *new_object_payload = - forward_or_copy(old_object_payload, &object_was_copied_now); - *stack_slot = (uint64_t)new_object_payload; - if (object_was_copied_now) scan_object(new_object_payload); -} - -void collect(void) { - uint64_t dummy; - uint64_t *current_stack_top = &dummy; - if (!PTR_STACK || current_stack_top > PTR_STACK) return; - STACK_SCAN_LOW = current_stack_top; - STACK_SCAN_HIGH = PTR_STACK; - GC.current_bank ^= 1; - GC.alloc_ptr = GC.start[GC.current_bank]; - { - uint64_t *stack_slot = current_stack_top; - size_t n = 0; - for (; n < MAX_STACK_SCAN_SLOTS && stack_slot <= PTR_STACK; n++, stack_slot++) - mark_and_copy(stack_slot); - } - STACK_SCAN_LOW = NULL; - STACK_SCAN_HIGH = NULL; - GC.collections++; -} - -void allocate_banks(void) { - for (int i = 0; i < 2; i++) { - GC.start[i] = malloc(size_heap * sizeof(uint64_t)); - GC.end[i] = GC.start[i] + size_heap; - } -} - -void init_gc(void) { - gc_enabled = true; - size_heap = SIZE_HEAP_DEFAULT; - { - const char *heap_size_env = getenv("EML_HEAP_SIZE"); - if (heap_size_env) { - int heap_size_val = atoi(heap_size_env); - if (heap_size_val >= 400 && heap_size_val <= 1024 * 1024) - size_heap = (size_t)heap_size_val; - } - } - allocate_banks(); - GC.current_bank = 0; - GC.alloc_ptr = GC.start[0]; - GC.words_allocated_total = 0; -} - -void destroy_gc(void) { - for (int i = 0; i < 2; i++) - free(GC.start[i]); -} - -void set_ptr_stack(uint64_t *stack_bottom) { PTR_STACK = stack_bottom; } - -eml_value print_gc_status(void) { - int bank = GC.current_bank; - ptrdiff_t current_alloc = GC.alloc_ptr - GC.start[bank]; - ptrdiff_t free_space = GC.end[bank] - GC.alloc_ptr; - printf("=== GC Status ===\n"); - printf("Current allocated: %td\n", current_alloc); - printf("Total allocated: %" PRIu64 "\n", total); - printf("Free space: %td\n", free_space); - printf("Heap size: %d\n", SIZE_HEAP); - printf("Current bank index: %d\n", bank); - printf("GC collections: %" PRIu64 "\n", collections); - printf("GC allocations: %" PRIu64 "\n", allocations); - printf("=================\n"); - fflush(stdout); - return (eml_value)(uintptr_t)tag_int_val(0); -} - -uint64_t *gc_alloc(size_t words, uint64_t tag) { - size_t total_words = words + HEADER_WORDS; - if (GC.alloc_ptr + total_words > GC.end[GC.current_bank] && - (collect(), GC.alloc_ptr + total_words > GC.end[GC.current_bank])) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - box_header_t *header = (box_header_t *)GC.alloc_ptr; - *header = (box_header_t){ .tag = (uint8_t)tag, .size = (uint16_t)words }; - uint64_t *obj = get_payload(header); - memset(obj, 0, words * sizeof(uint64_t)); - GC.alloc_ptr += total_words; - GC.allocations++; - GC.words_allocated_total += total_words; - return obj; -} - -static void *eml_alloc(size_t bytes, uint64_t tag) { - if (gc_enabled) { - size_t words = (bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); - return gc_alloc(words, tag); - } - return malloc(bytes); -} - -int64_t get_heap_start(void) { return tag_int_val(0); } -int64_t get_heap_final(void) { return tag_int_val((int64_t)size_heap); } - -#else -eml_value print_gc_status(void) { - (void)printf("GC disabled\n"); - return (eml_value)(uintptr_t)tag_int_val(0); -} -eml_value collect(void) { return (eml_value)(uintptr_t)tag_int_val(0); } -void init_gc(void) {} -void destroy_gc(void) {} -int64_t get_heap_start(void) { return tag_int_val(0); } -int64_t get_heap_final(void) { return tag_int_val(0); } -void set_ptr_stack(uint64_t *stack_bottom) { (void)stack_bottom; } -static void *eml_alloc(size_t bytes, uint64_t tag) { (void)tag; return malloc(bytes); } -#endif - -void print_int(int64_t tagged_n) { - printf("%ld\n", (long)(tagged_n >> 1)); -} - -typedef struct { - void *code; - int64_t arity; - int64_t received; - void *args[]; -} closure; - -extern void *llvm_call_indirect(void *fn, void **args, int64_t n); - -static void *call_closure_full(closure *c, void **args) { - return llvm_call_indirect(c->code, args, c->arity); -} - -void *eml_applyN(closure *c, int64_t argc, void **argv); - -closure *alloc_closure(void *code, int64_t arity) { - size_t slots = (arity > 0) ? (size_t)arity : 1; - size_t sz = sizeof(closure) + slots * sizeof(void *); -#if defined(ENABLE_GC) - closure *c = (closure *)eml_alloc(sz, TAG_CLOSURE); -#else - closure *c = (closure *)malloc(sz); -#endif - c->code = code; - c->arity = arity; - c->received = 0; - memset(c->args, 0, slots * sizeof(void *)); - return c; -} - -static closure *copy_closure(const closure *src) { - size_t slots = (src->arity > 0) ? (size_t)src->arity : 1; - size_t sz = sizeof(closure) + slots * sizeof(void *); -#if defined(ENABLE_GC) - closure *dst = (closure *)eml_alloc(sz, TAG_CLOSURE); -#else - closure *dst = (closure *)malloc(sz); -#endif - memcpy(dst, src, sz); - return dst; -} - -void *apply1(closure *c, int64_t arg) { - void *argv[1]; - argv[0] = (void *)(uintptr_t)arg; - return eml_applyN(c, 1, argv); -} - -void *eml_applyN(closure *c, int64_t argc, void **argv) { - int64_t all = c->received + argc; - if (all == c->arity) { -#if defined(ENABLE_GC) - void **all_args = (void **)eml_alloc((size_t)c->arity * sizeof(void *), TAG_CLOSURE); -#else - void **all_args = (void **)malloc((size_t)c->arity * sizeof(void *)); -#endif - for (int64_t i = 0; i < c->received; i++) all_args[i] = c->args[i]; - for (int64_t i = 0; i < argc; i++) all_args[c->received + i] = argv[i]; - void *result = call_closure_full(c, all_args); -#if !defined(ENABLE_GC) - free(all_args); -#endif - return result; - } - closure *partial = copy_closure(c); - for (int64_t i = 0; i < argc; i++) - partial->args[partial->received++] = argv[i]; - return partial; -} - -typedef struct { int64_t arity; void *args[]; } tuple; - -tuple *create_tuple(int64_t argc, void **args) { - size_t words = 1 + (size_t)argc; -#if defined(ENABLE_GC) - tuple *t = (tuple *)eml_alloc(words * sizeof(uint64_t), TAG_TUPLE); -#else - tuple *t = (tuple *)malloc(words * sizeof(uint64_t)); -#endif - t->arity = argc; - for (size_t i = 0; i < (size_t)argc; i++) t->args[i] = args[i]; - return t; -} - -void *field(tuple *t, long n) { return t->args[n >> 1]; } - -extern void eml_main(void); - -int main(void) { - eml_main(); - return 0; -} diff --git a/EML/lib/runtime/riscv_runtime.c b/EML/lib/runtime/riscv_runtime.c deleted file mode 100644 index 15c461fd..00000000 --- a/EML/lib/runtime/riscv_runtime.c +++ /dev/null @@ -1,367 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define TO_ML_INTEGER(n) ((int64_t)(n) >> 1) - -void print_int(long n) { printf("%ld\n", TO_ML_INTEGER(n)); } - -#define TAG_TUPLE 246 -#define TAG_CLOSURE 247 -#define RISCV_REG_ARGS 8 -#define SIZE_HEAP 800 -#define HEADER_WORDS 1 - -typedef struct { - uint64_t raw; -} box_header_t; - -static inline uint16_t header_tag(const box_header_t *h) { return (uint16_t)(h->raw & 0xFFFFu); } -static inline uint16_t header_size(const box_header_t *h) { return (uint16_t)((h->raw >> 16) & 0xFFFFu); } -static inline void set_header(box_header_t *h, uint16_t tag, uint16_t size) { - h->raw = ((uint64_t)size << 16) | (uint64_t)tag; -} - -static inline box_header_t *get_header(uint64_t *payload) { - return (box_header_t *)((uint64_t *)payload - 1); -} - -static inline uint64_t *get_payload(box_header_t *hdr) { return (uint64_t *)(hdr + 1); } - -#define IS_INT(v) ((v)&0x1) -#define IS_PTR(v) ((v) != 0 && !IS_INT(v)) - -typedef struct { - uint64_t *start[2]; - uint64_t *end[2]; - uint64_t *alloc_ptr; - int current_bank; - uint64_t allocations; - uint64_t collections; - uint64_t words_allocated_total; -} gc_state; - -static gc_state GC; -static uint64_t *PTR_STACK = NULL; -static bool gc_enabled = false; - -static inline int get_current_bank_idx() { return GC.current_bank; } -static inline int get_another_bank_idx() { return GC.current_bank ^ 1; } -static inline bool in_bank(uint64_t *ptr, int bank_idx) { - return (GC.start[bank_idx] <= ptr) && (ptr < GC.end[bank_idx]); -} - -static size_t scan_start_for_tag(uint16_t tag) { - if (tag == TAG_TUPLE) { - return 1; - } - if (tag == TAG_CLOSURE) { - return 3; - } - return 0; -} - -void init_gc(void) { - if (gc_enabled) { - return; - } - for (int i = 0; i < 2; ++i) { - GC.start[i] = (uint64_t *)malloc(SIZE_HEAP * sizeof(uint64_t)); - if (GC.start[i] == NULL) { - fprintf(stderr, "Failed to allocate GC bank\n"); - abort(); - } - GC.end[i] = GC.start[i] + SIZE_HEAP; - } - GC.current_bank = 0; - GC.alloc_ptr = GC.start[0]; - GC.allocations = 0; - GC.collections = 0; - GC.words_allocated_total = 0; - gc_enabled = true; -} - -void destroy_gc(void) { - if (!gc_enabled) { - return; - } - for (int i = 0; i < 2; ++i) { - free(GC.start[i]); - GC.start[i] = NULL; - GC.end[i] = NULL; - } - GC.alloc_ptr = NULL; - PTR_STACK = NULL; - gc_enabled = false; -} - -void set_ptr_stack(uint64_t *ptr) { PTR_STACK = ptr; } - -static void mark_and_copy(uint64_t *stack_slot); - -static uint64_t *forward_or_copy(uint64_t *old_payload, bool *did_copy) { - uint64_t possible_forward_ptr = *((uint64_t *)old_payload - 1); - if (in_bank((uint64_t *)possible_forward_ptr, get_current_bank_idx())) { - *did_copy = false; - return (uint64_t *)possible_forward_ptr; - } - - box_header_t *old_header = get_header(old_payload); - uint16_t payload_words = header_size(old_header); - uint16_t object_tag = header_tag(old_header); - size_t need_words = (size_t)payload_words + HEADER_WORDS; - - if (GC.alloc_ptr + need_words > GC.end[GC.current_bank]) { - fprintf(stderr, "Out of memory during GC copy\n"); - abort(); - } - - box_header_t *new_header = (box_header_t *)GC.alloc_ptr; - set_header(new_header, object_tag, payload_words); - uint64_t *new_payload = get_payload(new_header); - memcpy(new_payload, old_payload, (size_t)payload_words * sizeof(uint64_t)); - - GC.alloc_ptr += need_words; - GC.words_allocated_total += need_words; - *((uint64_t *)old_payload - 1) = (uint64_t)new_payload; - *did_copy = true; - return new_payload; -} - -static void scan_object(uint64_t *obj) { - box_header_t *header = get_header(obj); - size_t start = scan_start_for_tag(header_tag(header)); - size_t payload_words = (size_t)header_size(header); - for (size_t i = start; i < payload_words; i++) { - mark_and_copy(obj + i); - } -} - -static void mark_and_copy(uint64_t *stack_slot) { - uint64_t raw_value = *stack_slot; - if (!IS_PTR(raw_value)) { - return; - } - - uint64_t *old_object_payload = (uint64_t *)raw_value; - int old_bank = get_another_bank_idx(); - if (!in_bank(old_object_payload, old_bank)) { - return; - } - - bool copied_now = false; - uint64_t *new_object_payload = forward_or_copy(old_object_payload, &copied_now); - *stack_slot = (uint64_t)new_object_payload; - - if (copied_now) { - scan_object(new_object_payload); - } -} - -void collect(void) { - uint64_t dummy; - uint64_t *current_stack_top = &dummy; - if (!PTR_STACK || current_stack_top > PTR_STACK) { - return; - } - - GC.current_bank ^= 1; - GC.alloc_ptr = GC.start[GC.current_bank]; - - for (uint64_t *stack_slot = current_stack_top; stack_slot <= PTR_STACK; stack_slot++) { - mark_and_copy(stack_slot); - } - - GC.collections++; -} - -uint64_t *gc_alloc(size_t words, uint64_t tag) { - size_t total_words = words + HEADER_WORDS; - - if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { - collect(); - if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - } - - box_header_t *h = (box_header_t *)GC.alloc_ptr; - set_header(h, (uint16_t)tag, (uint16_t)words); - uint64_t *obj = get_payload(h); - memset(obj, 0, words * sizeof(uint64_t)); - - GC.alloc_ptr += total_words; - GC.allocations++; - GC.words_allocated_total += total_words; - return obj; -} - -int64_t get_heap_start(void) { return 1; } -int64_t get_heap_final(void) { return (int64_t)((SIZE_HEAP << 1) | 1); } - - -void print_gc_status() { - int bank = GC.current_bank; - ptrdiff_t current_alloc = GC.alloc_ptr - GC.start[bank]; - ptrdiff_t free_space = GC.end[bank] - GC.alloc_ptr; - uint64_t total = GC.words_allocated_total; - uint64_t collections = GC.collections; - uint64_t allocations = GC.allocations; - - printf("=== GC Status ===\n"); - printf("Current allocated: %td\n", current_alloc); - printf("Total allocated: %" PRIu64 "\n", total); - printf("Free space: %td\n", free_space); - printf("Heap size: %d\n", SIZE_HEAP); - printf("Current bank index: %d\n", bank); - printf("GC collections: %" PRIu64 "\n", collections); - printf("GC allocations: %" PRIu64 "\n", allocations); - printf("=================\n"); - fflush(stdout); -} - -static void *eml_alloc(size_t size_in_bytes, uint64_t tag) { - if (gc_enabled) { - uint64_t size_in_words = - ((uint64_t)size_in_bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); - return gc_alloc(size_in_words, tag); - } - (void)tag; - return malloc(size_in_bytes); -} - -typedef struct { - void *code; - int64_t arity; - int64_t received; - void *args[]; -} closure; - - -closure *alloc_closure(void *code, int64_t arity) { - size_t size_in_bytes = sizeof(closure) + arity * sizeof(void *); - - closure *c = (closure *)eml_alloc(size_in_bytes, TAG_CLOSURE); - - c->code = code; - c->arity = arity; - c->received = 0; - - memset(c->args, 0, sizeof(void *) * arity); - return c; -} - -static closure *copy_closure(const closure *src) { - size_t total_size = sizeof(closure) + src->arity * sizeof(void *); - - closure *dst = (closure *)eml_alloc(total_size, TAG_CLOSURE); - - memcpy(dst, src, total_size); - return dst; -} - - -static void *call_closure_full(closure *c, void **args) { - int64_t arity = c->arity; - int64_t args_in_stack = (arity > RISCV_REG_ARGS) ? (arity - RISCV_REG_ARGS) : 0; - size_t storage_for_stack_args = (size_t)args_in_stack * sizeof(void *); - void **stack_args = (args_in_stack > 0) ? args + RISCV_REG_ARGS : NULL; - void *result; - - asm volatile( - "mv t0, %[storage_for_stack_args]\n" - "sub sp, sp, t0\n" - - "beqz %[args_in_stack], .Lend_stack_push\n" - "mv t1, sp\n" - "mv t2, %[stack_args]\n" - "mv t3, %[args_in_stack]\n" - "li t4, 0\n" - ".Lloop_stack_push:\n" - "beq t4, t3, .Lend_stack_push\n" - "slli t5, t4, 3\n" - "add t6, t2, t5\n" - "ld t0, 0(t6)\n" - "sd t0, 0(t1)\n" - "addi t1, t1, 8\n" - "addi t4, t4, 1\n" - "j .Lloop_stack_push\n" - ".Lend_stack_push:\n" - - "mv a0, %[a0]\n" - "mv a1, %[a1]\n" - "mv a2, %[a2]\n" - "mv a3, %[a3]\n" - "mv a4, %[a4]\n" - "mv a5, %[a5]\n" - "mv a6, %[a6]\n" - "mv a7, %[a7]\n" - - "mv t6, %[fn]\n" - "jalr ra, t6, 0\n" - - "mv t0, %[storage_for_stack_args]\n" - "add sp, sp, t0\n" - "mv %[result], a0\n" - - : [result] "=r"(result) - : [fn] "r"(c->code), - [a0] "r"(args[0]), [a1] "r"(args[1]), - [a2] "r"(args[2]), [a3] "r"(args[3]), - [a4] "r"(args[4]), [a5] "r"(args[5]), - [a6] "r"(args[6]), [a7] "r"(args[7]), - [stack_args] "r"(stack_args), [args_in_stack] "r"(args_in_stack), - [storage_for_stack_args] "r"(storage_for_stack_args) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "memory"); - - return result; -} - -void *eml_applyN(closure *c, int64_t argc, void **argv) { - - int64_t all_receive_args = c->received + argc; - - if (all_receive_args == c->arity) { - int64_t total_count_args = c->arity; - void **args = (void **)eml_alloc(total_count_args * sizeof(void *), TAG_CLOSURE); - - for (int64_t i = 0; i < c->received; i++) { - args[i] = c->args[i]; - } - for (int64_t i = 0; i < argc; i++) { - args[c->received + i] = argv[i]; - } - return call_closure_full(c, args); - } - - closure *partial = copy_closure(c); - - for (int64_t i = 0; i < argc; i++) { - partial->args[partial->received++] = argv[i]; - } - - return partial; -} - -typedef struct { - int64_t arity; - void *args[]; -} tuple; - -tuple *create_tuple(int64_t argc, void **args) { - size_t words = 1 + (size_t)argc; - tuple *t = (tuple *)eml_alloc(words * sizeof(uint64_t), TAG_TUPLE); - t->arity = argc; - for (size_t i = 0; i < (size_t)argc; i++) { - t->args[i] = args[i]; - } - return t; -} - -void *field(tuple *t, long n) { return t->args[n >> 1]; } diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/runtime.c new file mode 100644 index 00000000..bbcec4bb --- /dev/null +++ b/EML/lib/runtime/runtime.c @@ -0,0 +1,421 @@ +/* + * Unified EML runtime for RISC-V and LLVM backends. + * Build with -DEML_RISCV for RISC-V (call via asm, no main), + * -DEML_LLVM for LLVM (llvm_call_indirect + main -> eml_main). + * Optional: -DENABLE_GC for GC; otherwise malloc-only. + */ +#include +#include +#include +#include +#include +#include +#include + +typedef void *eml_value; + +static int64_t tag_int_val(int64_t n) { return (n << 1) | 1; } + +#define TO_ML_INTEGER(tagged_n) ((int64_t)(tagged_n) >> 1) + +void print_int(int64_t tagged_n) { + printf("%ld\n", (long)TO_ML_INTEGER(tagged_n)); +} + +/* Unified object header and GC tags (same for both backends) */ +#define TAG_TUPLE 0 +#define TAG_CLOSURE 1 +#define TAG_LAST 2 +#define HEADER_WORDS 1 + +#define SIZE_HEAP_DEFAULT 800 +#define MAX_STACK_SCAN_SLOTS (128 * 1024) + +#define IS_INT(v) ((v) & 0x1) +#define IS_PTR(v) ((v) != 0 && !IS_INT(v)) + +typedef struct { + uint8_t tag; + uint8_t _pad1; + uint16_t size; + uint32_t _pad2; +} box_header_t; + +static inline box_header_t *get_header(uint64_t *payload) { + return (box_header_t *)((uint64_t *)payload - 1); +} +static inline uint64_t *get_payload(box_header_t *hdr) { + return (uint64_t *)(hdr + 1); +} + +static const size_t TAG_SCAN_START[] = { + [TAG_TUPLE] = 1, + [TAG_CLOSURE] = 3, +}; + +typedef struct { + uint64_t *start[2]; + uint64_t *end[2]; + uint64_t *alloc_ptr; + int current_bank; + uint64_t allocations; + uint64_t collections; + uint64_t words_allocated_total; +} gc_state; + +static gc_state GC; +static uint64_t *PTR_STACK = NULL; +static uint64_t *STACK_SCAN_LOW = NULL; +static uint64_t *STACK_SCAN_HIGH = NULL; +static bool gc_enabled = false; +static size_t size_heap = SIZE_HEAP_DEFAULT; + +static inline int get_current_bank_idx(void) { return GC.current_bank; } +static inline int get_another_bank_idx(void) { return GC.current_bank ^ 1; } +static inline bool in_bank(uint64_t *ptr, int bank_idx) { + return (GC.start[bank_idx] <= ptr) && (ptr < GC.end[bank_idx]); +} + +#if defined(ENABLE_GC) + +static void mark_and_copy(uint64_t *stack_slot); + +static uint64_t *forward_or_copy(uint64_t *old_payload, bool *did_copy) { + int from_bank = get_another_bank_idx(); + if (old_payload <= (uint64_t *)GC.start[from_bank] + HEADER_WORDS - 1) { + *did_copy = false; + return old_payload; + } + box_header_t *old_header = get_header(old_payload); + if (old_header->tag >= TAG_LAST || old_header->size == 0 || + old_header->size > size_heap) { + *did_copy = false; + return old_payload; + } + uint64_t possible_forward_ptr = *((uint64_t *)old_payload - 1); + if (in_bank((uint64_t *)possible_forward_ptr, get_current_bank_idx())) { + *did_copy = false; + return (uint64_t *)possible_forward_ptr; + } + *did_copy = true; + uint16_t payload_words = old_header->size; + uint8_t object_tag = old_header->tag; + if (GC.alloc_ptr + payload_words + HEADER_WORDS > GC.end[GC.current_bank]) { + *did_copy = false; + return old_payload; + } + box_header_t *new_header = (box_header_t *)GC.alloc_ptr; + new_header->tag = object_tag; + new_header->size = payload_words; + uint64_t *new_payload = get_payload(new_header); + memcpy(new_payload, old_payload, payload_words * sizeof(uint64_t)); + GC.alloc_ptr += payload_words + HEADER_WORDS; + GC.words_allocated_total += payload_words + HEADER_WORDS; + *((uint64_t *)old_payload - 1) = (uint64_t)new_payload; + return new_payload; +} + +static void scan_object(uint64_t *obj) { + box_header_t *header = get_header(obj); + size_t start = (header->tag < TAG_LAST) ? TAG_SCAN_START[header->tag] : 0; + for (size_t i = start; i < header->size; i++) + mark_and_copy(obj + i); +} + +static void mark_and_copy(uint64_t *stack_slot) { + uint64_t raw_value = *stack_slot; + if (!IS_PTR(raw_value)) return; + uint64_t *old_object_payload = (uint64_t *)raw_value; + int another_bank = get_another_bank_idx(); + if (!in_bank(old_object_payload, another_bank)) { + if (STACK_SCAN_LOW && STACK_SCAN_HIGH) { + uint64_t *low = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_LOW : STACK_SCAN_HIGH; + uint64_t *high = STACK_SCAN_LOW < STACK_SCAN_HIGH ? STACK_SCAN_HIGH : STACK_SCAN_LOW; + if (old_object_payload >= low && old_object_payload <= high) + return; + } + return; + } + if (old_object_payload < (uint64_t *)GC.start[another_bank] + HEADER_WORDS) + return; + bool object_was_copied_now; + uint64_t *new_object_payload = + forward_or_copy(old_object_payload, &object_was_copied_now); + *stack_slot = (uint64_t)new_object_payload; + if (object_was_copied_now) scan_object(new_object_payload); +} + +static void allocate_banks(void) { + for (int i = 0; i < 2; i++) { + GC.start[i] = (uint64_t *)malloc(size_heap * sizeof(uint64_t)); + if (GC.start[i] == NULL) { + fprintf(stderr, "Failed to allocate GC bank\n"); + abort(); + } + GC.end[i] = GC.start[i] + size_heap; + } +} + +eml_value collect(void) { + uint64_t dummy; + uint64_t *current_stack_top = &dummy; + if (!PTR_STACK || current_stack_top > PTR_STACK) + return (eml_value)(uintptr_t)tag_int_val(0); + STACK_SCAN_LOW = current_stack_top; + STACK_SCAN_HIGH = PTR_STACK; + GC.current_bank ^= 1; + GC.alloc_ptr = GC.start[GC.current_bank]; + { + uint64_t *stack_slot = current_stack_top; + size_t n = 0; + for (; n < MAX_STACK_SCAN_SLOTS && stack_slot <= PTR_STACK; n++, stack_slot++) + mark_and_copy(stack_slot); + } + STACK_SCAN_LOW = NULL; + STACK_SCAN_HIGH = NULL; + GC.collections++; + return (eml_value)(uintptr_t)tag_int_val(0); +} + +uint64_t *gc_alloc(size_t words, uint64_t tag) { + size_t total_words = words + HEADER_WORDS; + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { + collect(); + if (GC.alloc_ptr + total_words > GC.end[GC.current_bank]) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + } + box_header_t *header = (box_header_t *)GC.alloc_ptr; + header->tag = (uint8_t)tag; + header->size = (uint16_t)words; + uint64_t *obj = get_payload(header); + memset(obj, 0, words * sizeof(uint64_t)); + GC.alloc_ptr += total_words; + GC.allocations++; + GC.words_allocated_total += total_words; + return obj; +} + +void init_gc(void) { + if (gc_enabled) return; + gc_enabled = true; + size_heap = SIZE_HEAP_DEFAULT; + { + const char *heap_size_env = getenv("EML_HEAP_SIZE"); + if (heap_size_env) { + int heap_size_val = atoi(heap_size_env); + if (heap_size_val >= 400 && heap_size_val <= 1024 * 1024) + size_heap = (size_t)heap_size_val; + } + } + allocate_banks(); + GC.current_bank = 0; + GC.alloc_ptr = GC.start[0]; + GC.allocations = 0; + GC.collections = 0; + GC.words_allocated_total = 0; +} + +void destroy_gc(void) { + if (!gc_enabled) return; + for (int i = 0; i < 2; i++) { + free(GC.start[i]); + GC.start[i] = NULL; + GC.end[i] = NULL; + } + GC.alloc_ptr = NULL; + PTR_STACK = NULL; + gc_enabled = false; +} + +void set_ptr_stack(uint64_t *stack_bottom) { PTR_STACK = stack_bottom; } + +eml_value print_gc_status(void) { + int bank = GC.current_bank; + ptrdiff_t current_alloc = GC.alloc_ptr - GC.start[bank]; + ptrdiff_t free_space = GC.end[bank] - GC.alloc_ptr; + printf("=== GC Status ===\n"); + printf("Current allocated: %td\n", current_alloc); + printf("Total allocated: %" PRIu64 "\n", GC.words_allocated_total); + printf("Free space: %td\n", free_space); + printf("Heap size: %zu\n", size_heap); + printf("Current bank index: %d\n", bank); + printf("GC collections: %" PRIu64 "\n", GC.collections); + printf("GC allocations: %" PRIu64 "\n", GC.allocations); + printf("=================\n"); + fflush(stdout); + return (eml_value)(uintptr_t)tag_int_val(0); +} + +static void *eml_alloc(size_t bytes, uint64_t tag) { + if (gc_enabled) { + size_t words = (bytes + sizeof(uint64_t) - 1) / sizeof(uint64_t); + return gc_alloc(words, tag); + } + (void)tag; + return malloc(bytes); +} + +int64_t get_heap_start(void) { return tag_int_val(0); } +int64_t get_heap_final(void) { return tag_int_val((int64_t)size_heap); } + +#else /* !ENABLE_GC */ + +void init_gc(void) {} +void destroy_gc(void) {} +void set_ptr_stack(uint64_t *stack_bottom) { (void)stack_bottom; } + +eml_value collect(void) { + return (eml_value)(uintptr_t)tag_int_val(0); +} + +eml_value print_gc_status(void) { + (void)printf("GC disabled\n"); + return (eml_value)(uintptr_t)tag_int_val(0); +} + +static void *eml_alloc(size_t bytes, uint64_t tag) { + (void)tag; + return malloc(bytes); +} + +int64_t get_heap_start(void) { return tag_int_val(0); } +int64_t get_heap_final(void) { return tag_int_val(0); } + +#endif /* ENABLE_GC */ + +/* Closure and call convention */ +typedef struct { + void *code; + int64_t arity; + int64_t received; + void *args[]; +} closure; + +#if defined(EML_LLVM) + +extern void *llvm_call_indirect(void *fn, void **args, int64_t n); + +static void *call_closure_full(closure *c, void **args) { + return llvm_call_indirect(c->code, args, c->arity); +} + +#else /* EML_RISCV */ + +#define RISCV_REG_ARGS 8 + +static void *call_closure_full(closure *c, void **args) { + int64_t arity = c->arity; + int64_t args_in_stack = (arity > RISCV_REG_ARGS) ? (arity - RISCV_REG_ARGS) : 0; + size_t storage_for_stack_args = (size_t)args_in_stack * sizeof(void *); + void **stack_args = (args_in_stack > 0) ? args + RISCV_REG_ARGS : NULL; + void *result; + + asm volatile( + "mv t0, %[storage_for_stack_args]\n" + "sub sp, sp, t0\n" + "beqz %[args_in_stack], .Lend_stack_push\n" + "mv t1, sp\n" + "mv t2, %[stack_args]\n" + "mv t3, %[args_in_stack]\n" + "li t4, 0\n" + ".Lloop_stack_push:\n" + "beq t4, t3, .Lend_stack_push\n" + "slli t5, t4, 3\n" + "add t6, t2, t5\n" + "ld t0, 0(t6)\n" + "sd t0, 0(t1)\n" + "addi t1, t1, 8\n" + "addi t4, t4, 1\n" + "j .Lloop_stack_push\n" + ".Lend_stack_push:\n" + "mv a0, %[a0]\n" + "mv a1, %[a1]\n" + "mv a2, %[a2]\n" + "mv a3, %[a3]\n" + "mv a4, %[a4]\n" + "mv a5, %[a5]\n" + "mv a6, %[a6]\n" + "mv a7, %[a7]\n" + "mv t6, %[fn]\n" + "jalr ra, t6, 0\n" + "mv t0, %[storage_for_stack_args]\n" + "add sp, sp, t0\n" + "mv %[result], a0\n" + : [result] "=r"(result) + : [fn] "r"(c->code), + [a0] "r"(args[0]), [a1] "r"(args[1]), [a2] "r"(args[2]), [a3] "r"(args[3]), + [a4] "r"(args[4]), [a5] "r"(args[5]), [a6] "r"(args[6]), [a7] "r"(args[7]), + [stack_args] "r"(stack_args), [args_in_stack] "r"(args_in_stack), + [storage_for_stack_args] "r"(storage_for_stack_args) + : "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "memory"); + + return result; +} + +#endif /* EML_LLVM / EML_RISCV */ + +closure *alloc_closure(void *code, int64_t arity) { + size_t slots = (arity > 0) ? (size_t)arity : 1; + size_t sz = sizeof(closure) + slots * sizeof(void *); + closure *c = (closure *)eml_alloc(sz, TAG_CLOSURE); + c->code = code; + c->arity = arity; + c->received = 0; + memset(c->args, 0, slots * sizeof(void *)); + return c; +} + +static closure *copy_closure(const closure *src) { + size_t slots = (src->arity > 0) ? (size_t)src->arity : 1; + size_t sz = sizeof(closure) + slots * sizeof(void *); + closure *dst = (closure *)eml_alloc(sz, TAG_CLOSURE); + memcpy(dst, src, sz); + return dst; +} + +void *eml_applyN(closure *c, int64_t argc, void **argv) { + int64_t all = c->received + argc; + if (all == c->arity) { + void **all_args = (void **)eml_alloc((size_t)c->arity * sizeof(void *), TAG_CLOSURE); + for (int64_t i = 0; i < c->received; i++) all_args[i] = c->args[i]; + for (int64_t i = 0; i < argc; i++) all_args[c->received + i] = argv[i]; + void *result = call_closure_full(c, all_args); +#if !defined(ENABLE_GC) + free(all_args); +#endif + return result; + } + closure *partial = copy_closure(c); + for (int64_t i = 0; i < argc; i++) + partial->args[partial->received++] = argv[i]; + return partial; +} + +typedef struct { + int64_t arity; + void *args[]; +} tuple; + +tuple *create_tuple(int64_t argc, void **args) { + size_t words = 1 + (size_t)argc; + tuple *t = (tuple *)eml_alloc(words * sizeof(uint64_t), TAG_TUPLE); + t->arity = argc; + for (size_t i = 0; i < (size_t)argc; i++) t->args[i] = args[i]; + return t; +} + +void *field(tuple *t, long n) { return t->args[n >> 1]; } + +#if defined(EML_LLVM) + +extern void eml_main(void); + +int main(void) { + eml_main(); + return 0; +} + +#endif /* EML_LLVM */ From 8b305c3120388a6dd159f10a4c6b5f0d097d6356 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 01:28:21 +0300 Subject: [PATCH 63/74] ref Signed-off-by: Victoria Ostrovskaya --- EML/bin/EML.ml | 8 -------- EML/lib/backend/llvm_ir/generator.ml | 1 - EML/lib/backend/ricsv/analysis.ml | 2 -- EML/lib/backend/ricsv/generator.ml | 4 ---- EML/lib/middleend/anf_pp.ml | 1 - EML/lib/middleend/ll.ml | 1 - EML/lib/utils/monads.ml | 1 - EML/tests/llvm_tests.ml | 2 -- EML/tests/riscv_tests.ml | 2 -- 9 files changed, 22 deletions(-) diff --git a/EML/bin/EML.ml b/EML/bin/EML.ml index c87be74d..06c4b14b 100644 --- a/EML/bin/EML.ml +++ b/EML/bin/EML.ml @@ -94,10 +94,6 @@ let run_infer_only text env oc : (env, unit) Result.t = Ok env') ;; -(* ------------------------------------------------------------------------- *) -(* Compiler entry point *) -(* ------------------------------------------------------------------------- *) - let compiler opts : (unit, unit) Result.t = let run text env oc = if opts.infer_only @@ -124,10 +120,6 @@ let compiler opts : (unit, unit) Result.t = | Error () -> Error () ;; -(* ------------------------------------------------------------------------- *) -(* CLI *) -(* ------------------------------------------------------------------------- *) - let parse_args () : (opts, unit) Result.t = let parse_backend = function | "llvm" -> Ok Llvm diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml index bbe18961..55863ff9 100644 --- a/EML/lib/backend/llvm_ir/generator.ml +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -156,7 +156,6 @@ let untag_bool_val tagged_val = ;; let tag_bool_result cond_value = - (* false=2, true=4 via select (no zext) *) let tagged_i64 = build_select cond_value (const_int int_t 4) (const_int int_t 2) "tagged_bool" builder in diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index 920a0a84..d93d8b44 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -71,7 +71,6 @@ let rec max_stack_args_cexpr = function | ComplexList imm_list -> max_by max_stack_args_imm imm_list | ComplexApp (_first, second, rest) -> let argument_count = 1 + List.length rest in - (* Reserve enough for largest call: eml_applyN needs nargs words; direct needs max(0, nargs-8). *) let required_stack_words = argument_count in let max_nested_argument_pressure = max_by max_stack_args_imm (second :: rest) in max required_stack_words max_nested_argument_pressure @@ -102,7 +101,6 @@ let rec max_create_tuple_array_cexpr = function max here (max_by max_create_tuple_array_imm elts) | ComplexField (imm, _) -> max_create_tuple_array_imm imm | ComplexList imm_list -> - (* Each cons adds 16 bytes; they accumulate along the list build *) let bytes_per_cons_cell = 2 * word_size in let bytes_from_elements = sum_by max_create_tuple_array_imm imm_list in (bytes_per_cons_cell * List.length imm_list) + bytes_from_elements diff --git a/EML/lib/backend/ricsv/generator.ml b/EML/lib/backend/ricsv/generator.ml index cbc0644f..c4f78645 100644 --- a/EML/lib/backend/ricsv/generator.ml +++ b/EML/lib/backend/ricsv/generator.ml @@ -36,8 +36,6 @@ let load_into_reg destination_register source_location = return () ;; -(** Spill function parameters to the frame in param order (index 0 → first slot). - Ensures env maps each param name to a consistent slot so (self l) loads self, not l. *) let spill_params_to_frame params_reg = Base.List.foldi params_reg ~init:(return ()) ~f:(fun index acc param -> let* () = acc in @@ -313,7 +311,6 @@ and gen_curried_call modify_env (fun environment -> Base.Map.set environment ~key:part_name ~data:partial_function_location) in - (* Apply each rest_arg one at a time (eml_applyN expects one application per call) *) let rec apply_remaining_arguments = function | [] -> return () | [ argument ] -> @@ -353,7 +350,6 @@ and gen_binop dst binary_operator left_operand right_operand = and gen_branch dst cond then_e else_e = let* () = gen_imm t0 cond in let* else_lbl, end_lbl = fresh_branch in - (* Branch to else when cond equals tagged false (1); not zero *) let* () = append (li t1 (tag_int 0)) in let* () = append (beq t0 t1 else_lbl) in let* state_before_then = get in diff --git a/EML/lib/middleend/anf_pp.ml b/EML/lib/middleend/anf_pp.ml index e0dfe79e..9d4d4b7f 100644 --- a/EML/lib/middleend/anf_pp.ml +++ b/EML/lib/middleend/anf_pp.ml @@ -2,7 +2,6 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -(* Pretty-printer for ANF expressions *) open Stdlib.Format open Frontend open Ast diff --git a/EML/lib/middleend/ll.ml b/EML/lib/middleend/ll.ml index f76736ba..d1b14b6e 100644 --- a/EML/lib/middleend/ll.ml +++ b/EML/lib/middleend/ll.ml @@ -44,7 +44,6 @@ let names_in_pattern p = (match p_opt with | None -> [] | Some x -> collect x) - (* | PatType _ -> ... *) in collect p ;; diff --git a/EML/lib/utils/monads.ml b/EML/lib/utils/monads.ml index 1cc07b40..b9d534c8 100644 --- a/EML/lib/utils/monads.ml +++ b/EML/lib/utils/monads.ml @@ -20,7 +20,6 @@ module ANFMonad = struct let run m = m 0 |> snd let fail msg = fun counter -> counter, Error msg - (** Монадические операторы *) module Syntax = struct let ( let* ) = ( >>= ) end diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml index 22adfe53..686f8693 100644 --- a/EML/tests/llvm_tests.ml +++ b/EML/tests/llvm_tests.ml @@ -2,8 +2,6 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -(** LLVM IR codegen tests (analogous to riscv_tests.ml). *) - open EML_lib open Frontend.Parser open Middleend.Anf diff --git a/EML/tests/riscv_tests.ml b/EML/tests/riscv_tests.ml index e1c40656..279e862c 100644 --- a/EML/tests/riscv_tests.ml +++ b/EML/tests/riscv_tests.ml @@ -2,8 +2,6 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -(** RISC-V codegen tests. *) - open EML_lib open Frontend.Parser open Middleend.Anf From 2f48f9ab194fa5c5dfc23d1f71b043503ea4647f Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 02:01:36 +0300 Subject: [PATCH 64/74] add name mangling Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/llvm_ir/analysis.ml | 49 +++++----------------------- EML/lib/backend/llvm_ir/analysis.mli | 5 --- EML/lib/backend/llvm_ir/generator.ml | 19 ++++------- EML/lib/backend/ricsv/analysis.ml | 7 +++- EML/lib/backend/ricsv/analysis.mli | 4 --- EML/lib/frontend/ast.ml | 4 +-- EML/lib/frontend/parser.ml | 2 +- EML/lib/middleend/anf.ml | 1 + EML/lib/middleend/anf.mli | 3 -- EML/lib/middleend/inferencer.ml | 5 ++- EML/lib/runtime/primitives.ml | 10 ++++-- EML/lib/runtime/primitives.mli | 6 ++-- EML/lib/utils/helpers.ml | 32 ------------------ EML/lib/utils/helpers.mli | 8 ----- EML/lib/utils/pretty_printer.ml | 4 +-- EML/lib/utils/pretty_printer.mli | 1 - EML/tests/llvm_tests.ml | 24 +++++++------- 17 files changed, 53 insertions(+), 131 deletions(-) delete mode 100644 EML/lib/utils/helpers.ml delete mode 100644 EML/lib/utils/helpers.mli diff --git a/EML/lib/backend/llvm_ir/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml index 399c70eb..b847fba9 100644 --- a/EML/lib/backend/llvm_ir/analysis.ml +++ b/EML/lib/backend/llvm_ir/analysis.ml @@ -4,13 +4,13 @@ open Frontend.Ast open Middleend.Anf +open Runtime.Primitives type function_layout = { func_name : string ; asm_name : string ; params : immediate list ; body : anf_expr - ; slots_count : int } type analysis_result = @@ -19,39 +19,6 @@ type analysis_result = ; resolve : int -> string -> (string * int) option } -let rec slots_in_imm = function - | ImmediateVar _ | ImmediateConst _ -> 0 - -and slots_in_cexpr = function - | ComplexImmediate imm -> slots_in_imm imm - | ComplexUnit -> 0 - | ComplexBinOper (_, left, right) -> slots_in_imm left + slots_in_imm right - | ComplexUnarOper (_, imm) -> slots_in_imm imm - | ComplexTuple (first, second, rest) -> - List.fold_left - (fun slot_count expr -> slot_count + slots_in_imm expr) - 0 - (first :: second :: rest) - | ComplexField (imm, _) -> slots_in_imm imm - | ComplexList imm_list -> - List.fold_left (fun slot_count expr -> slot_count + slots_in_imm expr) 0 imm_list - | ComplexApp (_, second, rest) -> - 1 - + List.fold_left - (fun slot_count expr -> slot_count + slots_in_imm expr) - 0 - (second :: rest) - | ComplexOption None -> 0 - | ComplexOption (Some imm) -> slots_in_imm imm - | ComplexLambda (_, body) -> slots_in_anf body - | ComplexBranch (cond, then_expr, else_expr) -> - slots_in_imm cond + slots_in_anf then_expr + slots_in_anf else_expr - -and slots_in_anf = function - | AnfExpr cexp -> slots_in_cexpr cexp - | AnfLet (_, _, cexp, cont) -> 1 + slots_in_cexpr cexp + slots_in_anf cont -;; - let rec params_of_anf = function | AnfExpr (ComplexLambda (pats, body)) -> let imms = @@ -87,14 +54,18 @@ let analyze (program : anf_program) = (function | AnfValue (_, (func_name, arity, body), _) -> let params, body = params_of_anf body in - Some (func_name, arity, params, body, slots_in_anf body) + Some (func_name, arity, params, body) | AnfEval _ -> None) program in - let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in + let mangle_reserved name = + if is_reserved name then "eml_" ^ name + else if String.equal name "_start" then "eml_start" + else name + in let functions, _ = List.fold_left - (fun (reversed_functions, counts) (func_name, _arity, params, body, slots_count) -> + (fun (reversed_functions, counts) (func_name, _arity, params, body) -> let base_asm_name = mangle_reserved func_name in let duplicate_index = Base.Map.find counts func_name |> Option.value ~default:0 @@ -107,8 +78,7 @@ let analyze (program : anf_program) = then base_asm_name else base_asm_name ^ "_" ^ Int.to_string duplicate_index in - ( { func_name; asm_name; params; body; slots_count } :: reversed_functions - , updated_counts )) + { func_name; asm_name; params; body } :: reversed_functions, updated_counts) ([], Base.Map.empty (module Base.String)) raw in @@ -125,7 +95,6 @@ let analyze (program : anf_program) = ; asm_name = "main" ; params = [] ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) - ; slots_count = 0 } in functions @ [ synthetic_main ]) diff --git a/EML/lib/backend/llvm_ir/analysis.mli b/EML/lib/backend/llvm_ir/analysis.mli index a02061ef..b4cf437b 100644 --- a/EML/lib/backend/llvm_ir/analysis.mli +++ b/EML/lib/backend/llvm_ir/analysis.mli @@ -9,7 +9,6 @@ type function_layout = ; asm_name : string ; params : immediate list ; body : anf_expr - ; slots_count : int } type analysis_result = @@ -18,8 +17,4 @@ type analysis_result = ; resolve : int -> string -> (string * int) option } -val arity_map_of_program - : anf_program - -> (string, int, Base.String.comparator_witness) Base.Map.t - val analyze : anf_program -> analysis_result diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml index 55863ff9..eec3185f 100644 --- a/EML/lib/backend/llvm_ir/generator.ml +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -253,7 +253,7 @@ let gen_binop_native op left_v right_v = let* r = untag_int_val right_v in let* v = with_optional_value (sdiv builder l r "sdiv") in tag_int_result v - | GretestEqual -> + | GreatestEqual -> let* l = untag_int_val left_v in let* r = untag_int_val right_v in let* v = with_optional_value (icmp builder Icmp.Sge l r "icmp_sge") in @@ -625,14 +625,10 @@ and gen_anf = function let declare_function (func_layout : function_layout) state = let arg_types = Array.make (List.length func_layout.params) ptr_t in let func_type = function_type ptr_t arg_types in - let llvm_name = - if func_layout.func_name = "main" then "eml_main" else func_layout.asm_name - in - let func = declare_function llvm_name func_type state.current_module in - let key = if func_layout.func_name = "main" then "main" else func_layout.asm_name in + let func = declare_function func_layout.asm_name func_type state.current_module in { state with - value_env = Base.Map.set state.value_env ~key ~data:func - ; type_env = Base.Map.set state.type_env ~key ~data:func_type + value_env = Base.Map.set state.value_env ~key:func_layout.asm_name ~data:func + ; type_env = Base.Map.set state.type_env ~key:func_layout.asm_name ~data:func_type } ;; @@ -658,10 +654,7 @@ let gen_function let comp = let* state = get in let* () = put { state with current_func_index = func_index } in - let* func, _ = - lookup_func_type - (if func_layout.func_name = "main" then "main" else func_layout.asm_name) - in + let* func, _ = lookup_func_type func_layout.asm_name in let entry_block = append_block context "entry" func in position_at_end entry_block builder; let* () = if enable_gc && is_entry then emit_gc_prologue else return () in @@ -725,7 +718,7 @@ let gen_function in put { state with - value_env = Base.Map.set value_env ~key:func_layout.func_name ~data:func + value_env = Base.Map.set value_env ~key:func_layout.asm_name ~data:func } in run comp initial_state diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index d93d8b44..307153c3 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -4,6 +4,7 @@ open Frontend.Ast open Middleend.Anf +open Runtime.Primitives let word_size = 8 @@ -168,7 +169,11 @@ let analyze (program : anf_program) = | AnfEval _ -> None) program in - let mangle_reserved name = if String.equal name "_start" then "eml_start" else name in + let mangle_reserved name = + if is_reserved name then "eml_" ^ name + else if String.equal name "_start" then "eml_start" + else name + in let functions, _ = List.fold_left (fun (reversed_functions, generated_name_counts) diff --git a/EML/lib/backend/ricsv/analysis.mli b/EML/lib/backend/ricsv/analysis.mli index d3c05552..4213140a 100644 --- a/EML/lib/backend/ricsv/analysis.mli +++ b/EML/lib/backend/ricsv/analysis.mli @@ -20,8 +20,4 @@ type analysis_result = ; resolve : int -> string -> (string * int) option } -val arity_map_of_program - : anf_program - -> (string, int, Base.String.comparator_witness) Base.Map.t - val analyze : anf_program -> analysis_result diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml index 5806081a..f43549b8 100644 --- a/EML/lib/frontend/ast.ml +++ b/EML/lib/frontend/ast.ml @@ -18,7 +18,7 @@ type bin_oper = | Division (* [/] *) | And (* [&&] *) | Or (* [||] *) - | GretestEqual (* [>=] *) + | GreatestEqual (* [>=] *) | LowestEqual (* [<=] *) | GreaterThan (* [>] *) | LowerThan (* [<] *) @@ -90,8 +90,6 @@ type program = structure list [@@deriving show { with_path = false }] let bin_op_list = [ "*"; "/"; "+"; "-"; "^"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] let unary_op_list = [ "~-" ] -let is_bin_op op = List.mem op bin_op_list -let is_operator opr = List.exists (fun s -> String.equal s opr) bin_op_list let is_unary_minus op = op = "~-" let rec pp_ty fmt = function diff --git a/EML/lib/frontend/parser.ml b/EML/lib/frontend/parser.ml index bfd9847b..3f871894 100644 --- a/EML/lib/frontend/parser.ml +++ b/EML/lib/frontend/parser.ml @@ -275,7 +275,7 @@ let compare = ; parse_expr_bin_oper NotEqual "<>" ; parse_expr_bin_oper LowestEqual "<=" ; parse_expr_bin_oper LowerThan "<" - ; parse_expr_bin_oper GretestEqual ">=" + ; parse_expr_bin_oper GreatestEqual ">=" ; parse_expr_bin_oper GreaterThan ">" ] ;; diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 0868672a..342f6d58 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -37,6 +37,7 @@ and anf_expr = type arity = int +(* Used by [@@deriving show] for types containing [arity]; not exported. *) let pp_arity ppf (n : arity) = Stdlib.Format.pp_print_int ppf n type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] diff --git a/EML/lib/middleend/anf.mli b/EML/lib/middleend/anf.mli index 0b9a614b..737acee6 100644 --- a/EML/lib/middleend/anf.mli +++ b/EML/lib/middleend/anf.mli @@ -29,9 +29,6 @@ and anf_expr = [@@deriving show { with_path = false }] type arity = int - -val pp_arity : Format.formatter -> arity -> unit - type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] type anf_fun_bind = ident * arity * anf_expr [@@deriving show { with_path = false }] diff --git a/EML/lib/middleend/inferencer.ml b/EML/lib/middleend/inferencer.ml index 1b6cc7c3..ebe7b073 100644 --- a/EML/lib/middleend/inferencer.ml +++ b/EML/lib/middleend/inferencer.ml @@ -4,6 +4,9 @@ (* Template: https://gitlab.com/Kakadu/fp2020course-materials/-/tree/master/code/miniml?ref_type=heads*) +(* Inference state is purely immutable: no Hashtbl, no [ref] or [mutable]. We use + [Map] (tree-like) for [var_levels] and thread state through the monad. *) + open Base open Frontend.Ast open Stdlib.Format @@ -452,7 +455,7 @@ let rec infer_pattern env = function ;; let infer_binop_type = function - | Equal | NotEqual | GreaterThan | GretestEqual | LowerThan | LowestEqual -> + | Equal | NotEqual | GreaterThan | GreatestEqual | LowerThan | LowestEqual -> fresh_var >>| fun fresh_ty -> fresh_ty, fresh_ty, TyPrim "bool" | Plus | Minus | Multiply | Division -> return (TyPrim "int", TyPrim "int", TyPrim "int") | And | Or -> return (TyPrim "bool", TyPrim "bool", TyPrim "bool") diff --git a/EML/lib/runtime/primitives.ml b/EML/lib/runtime/primitives.ml index 2827a0d9..503b1562 100644 --- a/EML/lib/runtime/primitives.ml +++ b/EML/lib/runtime/primitives.ml @@ -2,8 +2,6 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -let predefined_runtime_op_names = Frontend.Ast.unary_op_list - type llvm_arg = | Ptr | Int @@ -41,3 +39,11 @@ let predefined_runtime_funcs : runtime_func_sig list = let runtime_primitive_arities : (string * int) list = List.map (fun { name; args; _ } -> name, List.length args) predefined_runtime_funcs ;; + +(** Names that must not be used as user function symbols; user definitions are mangled to [eml_]. + [main] is not included: it is handled separately (e.g. LLVM emits user main as [eml_main]). *) +let reserved_function_names : string list = + List.map (fun { name; _ } -> name) predefined_runtime_funcs +;; + +let is_reserved (name : string) : bool = List.mem name reserved_function_names diff --git a/EML/lib/runtime/primitives.mli b/EML/lib/runtime/primitives.mli index 51638839..3d54a4ba 100644 --- a/EML/lib/runtime/primitives.mli +++ b/EML/lib/runtime/primitives.mli @@ -2,8 +2,6 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) -val predefined_runtime_op_names : string list - type llvm_arg = | Ptr | Int @@ -22,3 +20,7 @@ type runtime_func_sig = val predefined_runtime_funcs : runtime_func_sig list val runtime_primitive_arities : (string * int) list + +(** Names reserved for runtime; user functions with these names get [eml_] as symbol. *) +val reserved_function_names : string list +val is_reserved : string -> bool diff --git a/EML/lib/utils/helpers.ml b/EML/lib/utils/helpers.ml deleted file mode 100644 index a5ea36fb..00000000 --- a/EML/lib/utils/helpers.ml +++ /dev/null @@ -1,32 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -open Base -open Frontend -open Ast - -let is_simple_pattern = function - | PatVariable _ | PatAny | PatUnit -> true - | _ -> false -;; - -let is_tuple_pattern = function - | PatTuple (_, _, _) -> true - | _ -> false -;; - -let rec extract_tuple_pattern_idents acc = function - | PatVariable x -> x :: acc - | PatTuple (p1, p2, rest) -> - let acc' = extract_tuple_pattern_idents acc p1 in - let acc'' = extract_tuple_pattern_idents acc' p2 in - List.fold_left rest ~f:extract_tuple_pattern_idents ~init:acc'' - | PatAny -> "_" :: acc - | _ -> acc -;; - -let pattern_to_ident = function - | PatVariable x -> Some x - | _ -> None -;; diff --git a/EML/lib/utils/helpers.mli b/EML/lib/utils/helpers.mli deleted file mode 100644 index 324697e0..00000000 --- a/EML/lib/utils/helpers.mli +++ /dev/null @@ -1,8 +0,0 @@ -(** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) - -(** SPDX-License-Identifier: LGPL-3.0-or-later *) - -val is_simple_pattern : Frontend.Ast.pattern -> bool -val is_tuple_pattern : Frontend.Ast.pattern -> bool -val extract_tuple_pattern_idents : string list -> Frontend.Ast.pattern -> string list -val pattern_to_ident : Frontend.Ast.pattern -> string option diff --git a/EML/lib/utils/pretty_printer.ml b/EML/lib/utils/pretty_printer.ml index 1a2a7f83..1c46b19a 100644 --- a/EML/lib/utils/pretty_printer.ml +++ b/EML/lib/utils/pretty_printer.ml @@ -11,7 +11,7 @@ let string_of_bin_op = function | Division -> "/" | And -> "&&" | Or -> "||" - | GretestEqual -> ">=" + | GreatestEqual -> ">=" | LowestEqual -> "<=" | GreaterThan -> ">" | LowerThan -> "<" @@ -168,5 +168,3 @@ let pp_structure ppf (lst : structure list) = ppf lst ;; - -let pp_program = pp_structure diff --git a/EML/lib/utils/pretty_printer.mli b/EML/lib/utils/pretty_printer.mli index cfc9036e..3eb6bfae 100644 --- a/EML/lib/utils/pretty_printer.mli +++ b/EML/lib/utils/pretty_printer.mli @@ -13,4 +13,3 @@ val pp_pattern : Format.formatter -> pattern -> unit val pp_expr : Format.formatter -> expr -> unit val pp_structure_item : Format.formatter -> structure -> unit val pp_structure : Format.formatter -> structure list -> unit -val pp_program : Format.formatter -> structure list -> unit diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml index 686f8693..c0498153 100644 --- a/EML/tests/llvm_tests.ml +++ b/EML/tests/llvm_tests.ml @@ -65,7 +65,7 @@ entry: ret ptr inttoptr (i64 -9 to ptr) } -define ptr @eml_main() { +define ptr @main() { entry: ret ptr inttoptr (i64 1 to ptr) } @@ -116,7 +116,7 @@ entry: ret ptr inttoptr (i64 2 to ptr) } -define ptr @eml_main() { +define ptr @main() { entry: ret ptr inttoptr (i64 1 to ptr) } @@ -162,7 +162,7 @@ declare ptr @print_gc_status() ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare ptr @llvm.frameaddress.p0(i32 immarg) #0 -define ptr @eml_main() { +define ptr @main() { entry: ret ptr inttoptr (i64 1 to ptr) } @@ -208,7 +208,7 @@ declare ptr @print_gc_status() ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare ptr @llvm.frameaddress.p0(i32 immarg) #0 -define ptr @eml_main() { +define ptr @main() { entry: ret ptr inttoptr (i64 113 to ptr) } @@ -273,7 +273,7 @@ let%expect_test "double_fn" = ret ptr %result_int } - define ptr @eml_main() { + define ptr @main() { entry: %direct_double = call ptr @double(ptr inttoptr (i64 43 to ptr)) ret ptr %direct_double @@ -352,7 +352,7 @@ let%expect_test "abs_fn" = ret ptr %ite_result } - define ptr @eml_main() { + define ptr @main() { entry: %direct_abs = call ptr @abs(ptr inttoptr (i64 15 to ptr)) ret ptr %direct_abs @@ -434,7 +434,7 @@ let%expect_test "nested_calls" = ret ptr %result_int } - define ptr @eml_main() { + define ptr @main() { entry: %direct_sum_of_squares = call ptr @sum_of_squares(ptr inttoptr (i64 7 to ptr), ptr inttoptr (i64 9 to ptr)) ret ptr %direct_sum_of_squares @@ -532,7 +532,7 @@ let%expect_test "fibonacci" = ret ptr %ite_result } - define ptr @eml_main() { + define ptr @main() { entry: %direct_fib = call ptr @fib(ptr inttoptr (i64 13 to ptr)) ret ptr %direct_fib @@ -592,7 +592,7 @@ let%expect_test "is_positive" = ret ptr %result_bool } - define ptr @eml_main() { + define ptr @main() { entry: %direct_is_positive = call ptr @is_positive(ptr inttoptr (i64 85 to ptr)) ret ptr %direct_is_positive @@ -666,7 +666,7 @@ let%expect_test "mul3" = ret ptr %result_int13 } - define ptr @eml_main() { + define ptr @main() { entry: %direct_mul3 = call ptr @mul3(ptr inttoptr (i64 5 to ptr), ptr inttoptr (i64 7 to ptr), ptr inttoptr (i64 9 to ptr)) ret ptr %direct_mul3 @@ -745,7 +745,7 @@ let%expect_test "test1" = ret ptr %ite_result } - define ptr @eml_main() { + define ptr @main() { entry: br i1 false, label %then_1, label %else_1 @@ -900,7 +900,7 @@ let%expect_test "codegen closure fn with 10 arg" = ret ptr %result_int53 } - define ptr @eml_main() { + define ptr @main() { entry: %boxed_alloc_closure = call ptr @alloc_closure(ptr @add, i64 7) br label %apply_step_0 From 3ad21850cd6cae1108df2d44f03bc827816e2647 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 02:31:24 +0300 Subject: [PATCH 65/74] run llvm tests Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/llvm_ir/analysis.ml | 29 +---- EML/lib/backend/llvm_ir/analysis.mli | 3 +- EML/lib/backend/llvm_ir/generator.ml | 89 +++++---------- EML/lib/backend/llvm_ir/generator_state.ml | 17 +-- EML/lib/backend/ricsv/analysis.ml | 6 +- EML/lib/frontend/ast.ml | 2 - EML/lib/runtime/llvm_call.S | 36 ++++++ EML/lib/runtime/primitives.ml | 8 +- EML/lib/runtime/primitives.mli | 3 - EML/lib/runtime/runtime.c | 14 +-- EML/tests/Makefile | 38 ++++--- EML/tests/anf_tests.ml | 60 ++++++++++ EML/tests/closure_tests.t | 6 +- EML/tests/llvm.t | 30 +++-- EML/tests/llvm_tests.ml | 126 +++++++-------------- EML/tests/llvm_tests.mli | 2 +- EML/tests/riscv.t | 38 +++---- EML/tests/tuple_tests.t | 6 +- 18 files changed, 257 insertions(+), 256 deletions(-) create mode 100644 EML/lib/runtime/llvm_call.S diff --git a/EML/lib/backend/llvm_ir/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml index b847fba9..4abe2f57 100644 --- a/EML/lib/backend/llvm_ir/analysis.ml +++ b/EML/lib/backend/llvm_ir/analysis.ml @@ -14,8 +14,7 @@ type function_layout = } type analysis_result = - { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t - ; functions : function_layout list + { functions : function_layout list ; resolve : int -> string -> (string * int) option } @@ -33,22 +32,7 @@ let rec params_of_anf = function | other -> [], other ;; -let arity_map_of_program (program : anf_program) = - List.fold_left - (fun map -> function - | AnfValue (_, (fid, arity, _), and_binds) -> - let map = Base.Map.set map ~key:fid ~data:arity in - List.fold_left - (fun acc (id, arity, _) -> Base.Map.set acc ~key:id ~data:arity) - map - and_binds - | _ -> map) - (Base.Map.empty (module Base.String)) - program -;; - let analyze (program : anf_program) = - let arity_map = arity_map_of_program program in let raw = List.filter_map (function @@ -59,8 +43,10 @@ let analyze (program : anf_program) = program in let mangle_reserved name = - if is_reserved name then "eml_" ^ name - else if String.equal name "_start" then "eml_start" + if is_reserved name + then "eml_" ^ name + else if String.equal name "_start" + then "eml_start" else name in let functions, _ = @@ -99,9 +85,6 @@ let analyze (program : anf_program) = in functions @ [ synthetic_main ]) in - let arity_map = - if has_main then arity_map else Base.Map.set arity_map ~key:"main" ~data:0 - in let resolver func_index var_name = let rec find i = if i < 0 @@ -115,5 +98,5 @@ let analyze (program : anf_program) = in find (func_index - 1) in - { arity_map; functions; resolve = resolver } + { functions; resolve = resolver } ;; diff --git a/EML/lib/backend/llvm_ir/analysis.mli b/EML/lib/backend/llvm_ir/analysis.mli index b4cf437b..25d4c9eb 100644 --- a/EML/lib/backend/llvm_ir/analysis.mli +++ b/EML/lib/backend/llvm_ir/analysis.mli @@ -12,8 +12,7 @@ type function_layout = } type analysis_result = - { arity_map : (string, int, Base.String.comparator_witness) Base.Map.t - ; functions : function_layout list + { functions : function_layout list ; resolve : int -> string -> (string * int) option } diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml index eec3185f..88f0a7be 100644 --- a/EML/lib/backend/llvm_ir/generator.ml +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -462,7 +462,6 @@ let rec gen_cexpr = function let* arity_opt = get_resolved_arity fname in let* closure_value = maybe_closure callee_value arity_opt in let* eml_applyN_func, eml_applyN_type = lookup_func_type "eml_applyN" in - let current_func = block_parent (insertion_block builder) in if num_args = 0 then ( let arr_ty = Llvm.array_type ptr_t 1 in @@ -486,68 +485,42 @@ let rec gen_cexpr = function eml_applyN_func [| closure_value; const_int int_t 0; args_ptr |] "boxed_eml_applyN")) - else - let* _then_name, _else_name, merge_name = fresh_blocks in - let merge_block = append_block context merge_name current_func in - let blocks = - Array.init num_args (fun idx -> - append_block context ("apply_step_" ^ Int.to_string idx) current_func) + else ( + (* Single eml_applyN(closure, num_args, argv) call, like RISC-V *) + let arr_ty = Llvm.array_type ptr_t num_args in + let* alloca_arr = + with_optional_value (Some (Llvm.build_alloca arr_ty "apply_args" builder)) in - let* () = emit_void_st builder (Br blocks.(0)) in - let result_vals = Array.make num_args (Llvm.const_null ptr_t) in - let rec loop step_index = - if step_index >= num_args - then return () - else ( - let () = position_at_end blocks.(step_index) builder in - let* current_closure = - if step_index = 0 - then return closure_value - else - with_optional_value - (Llvm_backend.phi - builder - [ result_vals.(step_index - 1), blocks.(step_index - 1) ] - ("cur_" ^ Int.to_string step_index)) - in - let one_ty = Llvm.array_type ptr_t 1 in - let alloca_one = Llvm.build_alloca one_ty "apply_one" builder in + let () = + for i = 0 to num_args - 1 do let elem_ptr = Llvm.build_gep - one_ty - alloca_one - [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] - "one_elem" + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t i |] + ("apply_arg_" ^ Int.to_string i) builder in - Llvm_backend.store builder args_values.(step_index) elem_ptr; - let* step_result = - with_optional_value - (Llvm_backend.call - builder - eml_applyN_type - eml_applyN_func - [| current_closure; const_int int_t 1; elem_ptr |] - ("apply_step_" ^ Int.to_string step_index)) - in - result_vals.(step_index) <- step_result; - let* () = - if step_index < num_args - 1 - then emit_void_st builder (Br blocks.(step_index + 1)) - else emit_void_st builder (Br merge_block) - in - loop (step_index + 1)) + Llvm_backend.store builder args_values.(i) elem_ptr + done in - let* () = loop 0 in - position_at_end merge_block builder; - let* final_val = + let* args_ptr = with_optional_value - (Llvm_backend.phi - builder - [ result_vals.(num_args - 1), blocks.(num_args - 1) ] - "apply_result") + (Some + (Llvm.build_gep + arr_ty + alloca_arr + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "apply_args_ptr" + builder)) in - return final_val + with_optional_value + (call + builder + eml_applyN_type + eml_applyN_func + [| closure_value; const_int int_t num_args; args_ptr |] + "eml_applyN_result")) | ComplexApp (_, _, _) -> fail "LLVM codegen: ComplexApp with non-variable function not supported" | ComplexBranch (cond_imm, then_e, else_e) -> @@ -739,13 +712,11 @@ let gen_program ~output_file ~enable_gc (program : anf_program) = ; current_func_index = 0 } in + (* [functions] is never empty: synthetic main is added when missing. *) let entry_name = match List.find_opt (fun func -> func.func_name = "main") functions with | Some _ -> "main" - | None -> - (match List.rev functions with - | [] -> "" - | last :: _ -> last.func_name) + | None -> (List.rev functions |> List.hd).func_name in let state_after_declares = List.fold_left (fun state func -> declare_function func state) initial_state functions diff --git a/EML/lib/backend/llvm_ir/generator_state.ml b/EML/lib/backend/llvm_ir/generator_state.ml index 74fbc427..170342a9 100644 --- a/EML/lib/backend/llvm_ir/generator_state.ml +++ b/EML/lib/backend/llvm_ir/generator_state.ml @@ -57,16 +57,9 @@ module Make (N : NAMING) = struct ;; let fail err = fun _ -> Error err - - let map_find_opt (map : (string, 'a, _) Base.Map.t) key : 'a option = - Base.Map.fold map ~init:None ~f:(fun ~key:map_key ~data:map_value acc -> - match acc with - | Some _ -> acc - | None -> if String.equal map_key key then Some map_value else None) - ;; - - let find_value_opt name state = Ok (map_find_opt state.value_env name, state) - let find_type_opt name state = Ok (map_find_opt state.type_env name, state) + let map_find_opt (map : (string, 'a, _) Base.Map.t) key = Base.Map.find map key + let find_value_opt name state = Ok (Base.Map.find state.value_env name, state) + let find_type_opt name state = Ok (Base.Map.find state.type_env name, state) let resolve_key state name = match state.resolve with @@ -79,12 +72,12 @@ module Make (N : NAMING) = struct let resolved_find_value_opt name state = let resolved_key = resolve_key state name in - Ok (map_find_opt state.value_env resolved_key, state) + Ok (Base.Map.find state.value_env resolved_key, state) ;; let resolved_find_type_opt name state = let resolved_key = resolve_key state name in - Ok (map_find_opt state.type_env resolved_key, state) + Ok (Base.Map.find state.type_env resolved_key, state) ;; let set_value name value = diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index 307153c3..50a89a84 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -170,8 +170,10 @@ let analyze (program : anf_program) = program in let mangle_reserved name = - if is_reserved name then "eml_" ^ name - else if String.equal name "_start" then "eml_start" + if is_reserved name + then "eml_" ^ name + else if String.equal name "_start" + then "eml_start" else name in let functions, _ = diff --git a/EML/lib/frontend/ast.ml b/EML/lib/frontend/ast.ml index f43549b8..18b2e913 100644 --- a/EML/lib/frontend/ast.ml +++ b/EML/lib/frontend/ast.ml @@ -89,8 +89,6 @@ type structure = type program = structure list [@@deriving show { with_path = false }] let bin_op_list = [ "*"; "/"; "+"; "-"; "^"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] -let unary_op_list = [ "~-" ] -let is_unary_minus op = op = "~-" let rec pp_ty fmt = function | TyPrim x -> fprintf fmt "%s" x diff --git a/EML/lib/runtime/llvm_call.S b/EML/lib/runtime/llvm_call.S new file mode 100644 index 00000000..ad1c9371 --- /dev/null +++ b/EML/lib/runtime/llvm_call.S @@ -0,0 +1,36 @@ +# x86-64 SysV: llvm_call_indirect(void *fn, void **args, int64_t n) +# Called from C runtime when invoking a closure. Loads args[0..n-1] into +# registers (rdi,rsi,rdx,rcx,r8,r9) and stack, then calls fn. + + .globl llvm_call_indirect + .type llvm_call_indirect, @function +llvm_call_indirect: + # rdi = fn, rsi = args, rdx = n + movq %rdi, %rax + movq %rsi, %r10 + movq %rdx, %r11 + + # First 6 args into registers (SysV: rdi, rsi, rdx, rcx, r8, r9) + movq 0(%r10), %rdi + movq 8(%r10), %rsi + movq 16(%r10), %rdx + movq 24(%r10), %rcx + movq 32(%r10), %r8 + movq 40(%r10), %r9 + + # If n > 6, push args[6..n-1] in reverse order (right-to-left) + cmpq $6, %r11 + jbe .Ldo_call + movq %r11, %r12 + subq $1, %r12 # r12 = n - 1 (last index) +.Lpush_loop: + cmpq $6, %r12 + jb .Ldo_call + pushq (%r10,%r12,8) + subq $1, %r12 + jmp .Lpush_loop +.Ldo_call: + call *%rax + ret + + .size llvm_call_indirect, .-llvm_call_indirect diff --git a/EML/lib/runtime/primitives.ml b/EML/lib/runtime/primitives.ml index 503b1562..3e9b959d 100644 --- a/EML/lib/runtime/primitives.ml +++ b/EML/lib/runtime/primitives.ml @@ -40,10 +40,6 @@ let runtime_primitive_arities : (string * int) list = List.map (fun { name; args; _ } -> name, List.length args) predefined_runtime_funcs ;; -(** Names that must not be used as user function symbols; user definitions are mangled to [eml_]. - [main] is not included: it is handled separately (e.g. LLVM emits user main as [eml_main]). *) -let reserved_function_names : string list = - List.map (fun { name; _ } -> name) predefined_runtime_funcs +let is_reserved (name : string) : bool = + List.exists (fun { name = n; _ } -> String.equal n name) predefined_runtime_funcs ;; - -let is_reserved (name : string) : bool = List.mem name reserved_function_names diff --git a/EML/lib/runtime/primitives.mli b/EML/lib/runtime/primitives.mli index 3d54a4ba..a5c05e59 100644 --- a/EML/lib/runtime/primitives.mli +++ b/EML/lib/runtime/primitives.mli @@ -20,7 +20,4 @@ type runtime_func_sig = val predefined_runtime_funcs : runtime_func_sig list val runtime_primitive_arities : (string * int) list - -(** Names reserved for runtime; user functions with these names get [eml_] as symbol. *) -val reserved_function_names : string list val is_reserved : string -> bool diff --git a/EML/lib/runtime/runtime.c b/EML/lib/runtime/runtime.c index bbcec4bb..867d41ec 100644 --- a/EML/lib/runtime/runtime.c +++ b/EML/lib/runtime/runtime.c @@ -1,9 +1,3 @@ -/* - * Unified EML runtime for RISC-V and LLVM backends. - * Build with -DEML_RISCV for RISC-V (call via asm, no main), - * -DEML_LLVM for LLVM (llvm_call_indirect + main -> eml_main). - * Optional: -DENABLE_GC for GC; otherwise malloc-only. - */ #include #include #include @@ -22,7 +16,6 @@ void print_int(int64_t tagged_n) { printf("%ld\n", (long)TO_ML_INTEGER(tagged_n)); } -/* Unified object header and GC tags (same for both backends) */ #define TAG_TUPLE 0 #define TAG_CLOSURE 1 #define TAG_LAST 2 @@ -285,7 +278,6 @@ int64_t get_heap_final(void) { return tag_int_val(0); } #endif /* ENABLE_GC */ -/* Closure and call convention */ typedef struct { void *code; int64_t arity; @@ -409,13 +401,13 @@ tuple *create_tuple(int64_t argc, void **args) { void *field(tuple *t, long n) { return t->args[n >> 1]; } -#if defined(EML_LLVM) - +#if defined(EML_LLVM) && !defined(EML_LLVM_STANDALONE) +/* When linking with RTS that provides main (e.g. custom runner), call eml_main. */ extern void eml_main(void); int main(void) { eml_main(); return 0; } +#endif /* EML_LLVM && !EML_LLVM_STANDALONE */ -#endif /* EML_LLVM */ diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 4e65de46..13424a42 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -1,20 +1,23 @@ SHELL := /bin/bash -EML_ROOT ?= $(CURDIR)/.. -EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) -RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) +EML_ROOT ?= $(CURDIR)/.. +EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) +RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) +RUNTIME_DIR := $(CURDIR)/../lib/runtime -ARGS := $(filter-out compile infer compile_llvm,$(MAKECMDGOALS)) -INPUT := $(firstword $(ARGS)) +ARGS := $(filter-out compile_riscv infer compile_llvm,$(MAKECMDGOALS)) +INPUT := $(firstword $(ARGS)) EXTRA_GOALS := $(ARGS) -GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) +GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) +CFLAGS_GC := $(if $(filter 1 true yes on,$(GC)),-DENABLE_GC,) -.PHONY: compile infer compile_llvm $(EXTRA_GOALS) +.PHONY: compile_riscv infer compile_llvm $(EXTRA_GOALS) -compile: +# RISC-V: compile .ml -> .s -> .o, link, run under qemu. Usage: make compile_riscv [GC=1] +compile_riscv: @set -euo pipefail; \ FILE="$(INPUT)"; \ - [[ -n "$$FILE" ]] || { echo "Usage: make compile [GC=1] " >&2; exit 1; }; \ + [[ -n "$$FILE" ]] || { echo "Usage: make compile_riscv [GC=1] " >&2; exit 1; }; \ TMP_SRC="$$(mktemp -d)"; TMP_BIN="$$(mktemp -d)"; \ trap 'rm -rf "$$TMP_SRC" "$$TMP_BIN"' EXIT; \ if [[ "$$FILE" == *.s ]]; then \ @@ -25,12 +28,12 @@ compile: ASM_FILE="$$TMP_SRC/prog.s"; \ "$(EML_BIN)" $(GC_FLAG) -fromfile "$$SRC" -o "$$ASM_FILE"; \ fi; \ - OBJ_FILE="$$TMP_BIN/prog.o"; \ - EXE_FILE="$$TMP_BIN/prog.exe"; \ + OBJ_FILE="$$TMP_BIN/prog.o"; EXE_FILE="$$TMP_BIN/prog.exe"; \ riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" +# Infer types. Usage: make infer [GC=1] infer: @set -euo pipefail; \ FILE="$(INPUT)"; \ @@ -40,16 +43,21 @@ infer: SRC="$$(realpath "$$FILE")"; \ "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" -# LLVM: compile source to .ll (no link/run; needs llvm_call_indirect for full run) +# LLVM: compile .ml -> .ll, link with runtime, run on host (x86-64). Usage: make compile_llvm [GC=1] compile_llvm: @set -euo pipefail; \ FILE="$(INPUT)"; \ - [[ -n "$$FILE" ]] || { echo "Usage: make compile_llvm " >&2; exit 1; }; \ + [[ -n "$$FILE" ]] || { echo "Usage: make compile_llvm [GC=1] " >&2; exit 1; }; \ [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ SRC="$$(realpath "$$FILE")"; \ + BASENAME="$$(basename "$$FILE" .ml)"; \ TMP="$$(mktemp -d)"; trap 'rm -rf "$$TMP"' EXIT; \ - "$(EML_BIN)" -backend llvm -fromfile "$$SRC" -o "$$TMP/prog.ll"; \ - cat "$$TMP/prog.ll" + "$(EML_BIN)" $(GC_FLAG) -backend llvm -fromfile "$$SRC" -o "$$TMP/prog.ll"; \ + clang -O0 -Wno-override-module $(CFLAGS_GC) -DEML_LLVM -DEML_LLVM_STANDALONE -c "$(RUNTIME_DIR)/runtime.c" -o "$$TMP/runtime.o"; \ + clang -c "$(RUNTIME_DIR)/llvm_call.S" -o "$$TMP/llvm_call.o"; \ + clang -O0 -Wno-override-module "$$TMP/prog.ll" "$$TMP/runtime.o" "$$TMP/llvm_call.o" -o "$$TMP/prog.exe"; \ + [ -n "$(CFLAGS_GC)" ] && [ "$$BASENAME" = "010fibcps_ll" ] && export EML_HEAP_SIZE=1600; \ + "$$TMP/prog.exe" $(EXTRA_GOALS): @: diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index ea67aed4..1dc42d79 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -5,6 +5,8 @@ open EML_lib.Frontend.Parser open EML_lib.Middleend.Anf open EML_lib.Middleend.Anf_pp +open EML_lib.Middleend.Runner +open EML_lib.Middleend.Inferencer let parse_and_anf input = match parse input with @@ -24,6 +26,30 @@ let parse_and_anf_pp input = | Error e -> Printf.printf "Parsing error: %s\n" e ;; +(** Round-trip: source -> ANF -> print -> parse -> typecheck. + Verifies that types do not diverge after ANF (requirement: "проверять, что типы не разъехались"). *) +let anf_roundtrip_typecheck ~env program_str : (unit, string) Result.t = + let ( >>= ) = Result.bind in + parse program_str + |> Result.map_error (fun s -> "Parse error: " ^ s) + >>= fun ast -> + run ast env + |> Result.map_error (fun e -> + Format.asprintf "Middleend: %a" EML_lib.Middleend.Runner.pp_error e) + >>= fun (anf_ast, _env_after) -> + let printed = anf_to_string anf_ast in + parse printed + |> Result.map_error (fun s -> "ANF round-trip parse error: " ^ s) + >>= fun ast2 -> + ResultMonad.run (infer_structure env ast2) + |> Result.map_error (fun e -> + Format.asprintf + "ANF round-trip typecheck failed: %a" + EML_lib.Middleend.Inferencer.pp_error + e) + |> Result.map (fun _ -> ()) +;; + let%expect_test "001.ml" = parse_and_anf "let recfac n = if n<=1 then 1 else n * fac (n-1)"; [%expect @@ -241,3 +267,37 @@ let%expect_test "anf_match_list_lowering_cons_nil_order" = [])) ]|}] ;; + +let%expect_test "anf_roundtrip_types_fac" = + let env = TypeEnv.initial_env in + (match + anf_roundtrip_typecheck + ~env + "let rec fac n = if n <= 1 then 1 else n * fac (n - 1)\nlet main = fac 4" + with + | Ok () -> Printf.printf "OK: types preserved after ANF round-trip\n" + | Error e -> Printf.printf "FAIL: %s\n" e); + [%expect {| OK: types preserved after ANF round-trip |}] +;; + +let%expect_test "anf_roundtrip_types_fib" = + let env = TypeEnv.initial_env in + (match + anf_roundtrip_typecheck + ~env + "let rec fib n = if n < 2 then n else fib (n - 1) + fib (n - 2)\nlet main = fib 5" + with + | Ok () -> Printf.printf "OK: types preserved after ANF round-trip\n" + | Error e -> Printf.printf "FAIL: %s\n" e); + [%expect {| OK: types preserved after ANF round-trip |}] +;; + +let%expect_test "anf_roundtrip_types_partial" = + let env = TypeEnv.initial_env in + (match + anf_roundtrip_typecheck ~env "let add x y = x + y\nlet main = let f = add 1 in f 2" + with + | Ok () -> Printf.printf "OK: types preserved after ANF round-trip\n" + | Error e -> Printf.printf "FAIL: %s\n" e); + [%expect {| OK: types preserved after ANF round-trip |}] +;; diff --git a/EML/tests/closure_tests.t b/EML/tests/closure_tests.t index 25d5ce9e..14ba42f4 100644 --- a/EML/tests/closure_tests.t +++ b/EML/tests/closure_tests.t @@ -2,7 +2,7 @@ Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev SPDX-License-Identifier: LGPL-3.0-or-later - $ make compile GC=1 gc_tests/closure/01_add5_staged_partial_gc.ml + $ make compile_riscv GC=1 gc_tests/closure/01_add5_staged_partial_gc.ml === GC Status === Current allocated: 18 Total allocated: 18 @@ -41,7 +41,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later ================= 15 - $ make compile GC=1 gc_tests/closure/02_affine_live_dead_gc.ml + $ make compile_riscv GC=1 gc_tests/closure/02_affine_live_dead_gc.ml === GC Status === Current allocated: 28 Total allocated: 28 @@ -80,7 +80,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later ================= 17 - $ make compile GC=1 gc_tests/closure/03_add10_staged_partial_gc.ml + $ make compile_riscv GC=1 gc_tests/closure/03_add10_staged_partial_gc.ml === GC Status === Current allocated: 28 Total allocated: 28 diff --git a/EML/tests/llvm.t b/EML/tests/llvm.t index 1a576c7e..8316743e 100644 --- a/EML/tests/llvm.t +++ b/EML/tests/llvm.t @@ -1,18 +1,32 @@ Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev SPDX-License-Identifier: LGPL-3.0-or-later - $ make compile_llvm many_tests/typed/001fac.ml > /dev/null + $ make compile_llvm many_tests/typed/001fac.ml + 24 - $ make compile_llvm many_tests/typed/003fib.ml > /dev/null + $ make compile_llvm many_tests/typed/003fib.ml + 3 + 3 - $ make compile_llvm many_tests/typed/004manyargs.ml > /dev/null + $ make compile_llvm many_tests/typed/004manyargs.ml + 1111111111 + 1 + 10 + 100 - $ make compile_llvm many_tests/typed/005fix.ml > /dev/null + $ make compile_llvm many_tests/typed/005fix.ml + 720 - $ make compile_llvm many_tests/typed/006partial2.ml > /dev/null + $ make compile_llvm many_tests/typed/006partial2.ml + 1 + 2 + 3 + 7 - $ make compile_llvm many_tests/typed/010fac_anf.ml > /dev/null + $ make compile_llvm many_tests/typed/010fac_anf.ml - $ make compile_llvm many_tests/typed/010faccps_ll.ml > /dev/null + $ make compile_llvm many_tests/typed/010faccps_ll.ml + 24 - $ make compile_llvm many_tests/typed/010fibcps_ll.ml > /dev/null + $ make compile_llvm many_tests/typed/010fibcps_ll.ml + 8 diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml index c0498153..bb838583 100644 --- a/EML/tests/llvm_tests.ml +++ b/EML/tests/llvm_tests.ml @@ -22,10 +22,10 @@ let compile_llvm src : string = | Error e -> "Codegen error: " ^ e)) ;; -let run_llvm src = Format.printf "%s" (compile_llvm src) +let compile_llvm_show src = Format.printf "%s" (compile_llvm src) let%expect_test "unary_minus" = - run_llvm "let x = -5"; + compile_llvm_show "let x = -5"; [%expect {| ; ModuleID = 'EML' @@ -76,7 +76,7 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } ;; let%expect_test "unary_not" = - run_llvm "let x = not true"; + compile_llvm_show "let x = not true"; [%expect {| ; ModuleID = 'EML' @@ -127,7 +127,7 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } ;; let%expect_test "unit_main" = - run_llvm "let main = ()"; + compile_llvm_show "let main = ()"; [%expect {| ; ModuleID = 'EML' @@ -173,7 +173,7 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } ;; let%expect_test "mul_only" = - run_llvm "let main = 7 * 8"; + compile_llvm_show "let main = 7 * 8"; [%expect {| ; ModuleID = 'EML' @@ -219,7 +219,7 @@ attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } ;; let%expect_test "double_fn" = - run_llvm + compile_llvm_show {| let double x = x + x let main = double 21 @@ -283,7 +283,7 @@ let%expect_test "double_fn" = ;; let%expect_test "abs_fn" = - run_llvm + compile_llvm_show {| let abs x = if x < 0 then -x else x let main = abs 7 @@ -362,7 +362,7 @@ let%expect_test "abs_fn" = ;; let%expect_test "nested_calls" = - run_llvm + compile_llvm_show {| let sq x = x * x let sum_of_squares a b = sq a + sq b @@ -444,7 +444,7 @@ let%expect_test "nested_calls" = ;; let%expect_test "fibonacci" = - run_llvm + compile_llvm_show {| let rec fib n = if n < 2 then 1 else fib (n - 1) + fib (n - 2) let main = fib 6 @@ -542,7 +542,7 @@ let%expect_test "fibonacci" = ;; let%expect_test "is_positive" = - run_llvm + compile_llvm_show {| let is_positive n = n > 0 let main = is_positive 42 @@ -602,7 +602,7 @@ let%expect_test "is_positive" = ;; let%expect_test "mul3" = - run_llvm + compile_llvm_show {| let mul3 a b c = a * b * c let main = mul3 2 3 4 @@ -676,7 +676,7 @@ let%expect_test "mul3" = ;; let%expect_test "test1" = - run_llvm + compile_llvm_show {| let large x = if 0<>x then print_int 0 else print_int 1 let main = @@ -790,7 +790,7 @@ let%expect_test "test1" = ;; let%expect_test "codegen closure fn with 10 arg" = - run_llvm + compile_llvm_show {| let add a b c d e f g = a + b + c + d + e + f + g @@ -903,82 +903,34 @@ let%expect_test "codegen closure fn with 10 arg" = define ptr @main() { entry: %boxed_alloc_closure = call ptr @alloc_closure(ptr @add, i64 7) - br label %apply_step_0 - - merge_0: ; preds = %apply_step_3 - %apply_result = phi ptr [ %apply_step_310, %apply_step_3 ] - br label %apply_step_011 - - apply_step_0: ; preds = %entry - %apply_one = alloca [1 x ptr], align 8 - %one_elem = getelementptr [1 x ptr], ptr %apply_one, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem, align 8 - %apply_step_01 = call ptr @eml_applyN(ptr %boxed_alloc_closure, i64 1, ptr %one_elem) - br label %apply_step_1 - - apply_step_1: ; preds = %apply_step_0 - %cur_1 = phi ptr [ %apply_step_01, %apply_step_0 ] - %apply_one2 = alloca [1 x ptr], align 8 - %one_elem3 = getelementptr [1 x ptr], ptr %apply_one2, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem3, align 8 - %apply_step_14 = call ptr @eml_applyN(ptr %cur_1, i64 1, ptr %one_elem3) - br label %apply_step_2 - - apply_step_2: ; preds = %apply_step_1 - %cur_2 = phi ptr [ %apply_step_14, %apply_step_1 ] - %apply_one5 = alloca [1 x ptr], align 8 - %one_elem6 = getelementptr [1 x ptr], ptr %apply_one5, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem6, align 8 - %apply_step_27 = call ptr @eml_applyN(ptr %cur_2, i64 1, ptr %one_elem6) - br label %apply_step_3 - - apply_step_3: ; preds = %apply_step_2 - %cur_3 = phi ptr [ %apply_step_27, %apply_step_2 ] - %apply_one8 = alloca [1 x ptr], align 8 - %one_elem9 = getelementptr [1 x ptr], ptr %apply_one8, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem9, align 8 - %apply_step_310 = call ptr @eml_applyN(ptr %cur_3, i64 1, ptr %one_elem9) - br label %merge_0 - - merge_1: ; preds = %apply_step_112 - %apply_result20 = phi ptr [ %apply_step_119, %apply_step_112 ] - br label %apply_step_021 - - apply_step_011: ; preds = %merge_0 - %apply_one13 = alloca [1 x ptr], align 8 - %one_elem14 = getelementptr [1 x ptr], ptr %apply_one13, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem14, align 8 - %apply_step_015 = call ptr @eml_applyN(ptr %apply_result, i64 1, ptr %one_elem14) - br label %apply_step_112 - - apply_step_112: ; preds = %apply_step_011 - %cur_116 = phi ptr [ %apply_step_015, %apply_step_011 ] - %apply_one17 = alloca [1 x ptr], align 8 - %one_elem18 = getelementptr [1 x ptr], ptr %apply_one17, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem18, align 8 - %apply_step_119 = call ptr @eml_applyN(ptr %cur_116, i64 1, ptr %one_elem18) - br label %merge_1 - - merge_2: ; preds = %apply_step_122 - %apply_result30 = phi ptr [ %apply_step_129, %apply_step_122 ] - %print_int_arg = ptrtoint ptr %apply_result30 to i64 + %apply_args = alloca [4 x ptr], align 8 + %apply_arg_0 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_0, align 8 + %apply_arg_1 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 1 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_1, align 8 + %apply_arg_2 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 2 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_2, align 8 + %apply_arg_3 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 3 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_3, align 8 + %apply_args_ptr = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 0 + %eml_applyN_result = call ptr @eml_applyN(ptr %boxed_alloc_closure, i64 4, ptr %apply_args_ptr) + %apply_args1 = alloca [2 x ptr], align 8 + %apply_arg_02 = getelementptr [2 x ptr], ptr %apply_args1, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_02, align 8 + %apply_arg_13 = getelementptr [2 x ptr], ptr %apply_args1, i32 0, i32 1 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_13, align 8 + %apply_args_ptr4 = getelementptr [2 x ptr], ptr %apply_args1, i32 0, i32 0 + %eml_applyN_result5 = call ptr @eml_applyN(ptr %eml_applyN_result, i64 2, ptr %apply_args_ptr4) + %apply_args6 = alloca [2 x ptr], align 8 + %apply_arg_07 = getelementptr [2 x ptr], ptr %apply_args6, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_07, align 8 + %apply_arg_18 = getelementptr [2 x ptr], ptr %apply_args6, i32 0, i32 1 + store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_18, align 8 + %apply_args_ptr9 = getelementptr [2 x ptr], ptr %apply_args6, i32 0, i32 0 + %eml_applyN_result10 = call ptr @eml_applyN(ptr %eml_applyN_result5, i64 2, ptr %apply_args_ptr9) + %print_int_arg = ptrtoint ptr %eml_applyN_result10 to i64 call void @print_int(i64 %print_int_arg) ret ptr inttoptr (i64 1 to ptr) - - apply_step_021: ; preds = %merge_1 - %apply_one23 = alloca [1 x ptr], align 8 - %one_elem24 = getelementptr [1 x ptr], ptr %apply_one23, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem24, align 8 - %apply_step_025 = call ptr @eml_applyN(ptr %apply_result20, i64 1, ptr %one_elem24) - br label %apply_step_122 - - apply_step_122: ; preds = %apply_step_021 - %cur_126 = phi ptr [ %apply_step_025, %apply_step_021 ] - %apply_one27 = alloca [1 x ptr], align 8 - %one_elem28 = getelementptr [1 x ptr], ptr %apply_one27, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %one_elem28, align 8 - %apply_step_129 = call ptr @eml_applyN(ptr %cur_126, i64 1, ptr %one_elem28) - br label %merge_2 } attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] diff --git a/EML/tests/llvm_tests.mli b/EML/tests/llvm_tests.mli index 803dcf06..acb67707 100644 --- a/EML/tests/llvm_tests.mli +++ b/EML/tests/llvm_tests.mli @@ -3,4 +3,4 @@ (** SPDX-License-Identifier: LGPL-3.0-or-later *) val compile_llvm : string -> string -val run_llvm : string -> unit +val compile_llvm_show : string -> unit diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t index 2a96be46..a80dd850 100644 --- a/EML/tests/riscv.t +++ b/EML/tests/riscv.t @@ -2,40 +2,40 @@ Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev SPDX-License-Identifier: LGPL-3.0-or-later - $ make compile many_tests/typed/001fac.ml + $ make compile_riscv many_tests/typed/001fac.ml 24 - $ make compile many_tests/typed/002fac.ml + $ make compile_riscv many_tests/typed/002fac.ml 24 - $ make compile many_tests/typed/003fib.ml + $ make compile_riscv many_tests/typed/003fib.ml 3 3 - $ make compile many_tests/typed/004manyargs.ml + $ make compile_riscv many_tests/typed/004manyargs.ml 1111111111 1 10 100 - $ make compile many_tests/typed/005fix.ml + $ make compile_riscv many_tests/typed/005fix.ml 720 - $ make compile many_tests/typed/006partial.ml + $ make compile_riscv many_tests/typed/006partial.ml 1122 - $ make compile many_tests/typed/006partial2.ml + $ make compile_riscv many_tests/typed/006partial2.ml 1 2 3 7 - $ make compile many_tests/typed/006partial3.ml + $ make compile_riscv many_tests/typed/006partial3.ml 4 8 9 - $ make compile many_tests/typed/007order.ml + $ make compile_riscv many_tests/typed/007order.ml 1 2 4 @@ -44,34 +44,34 @@ SPDX-License-Identifier: LGPL-3.0-or-later -555555 10000 - $ make compile many_tests/typed/008ascription.ml + $ make compile_riscv many_tests/typed/008ascription.ml 8 - $ make compile many_tests/typed/009let_poly.ml + $ make compile_riscv many_tests/typed/009let_poly.ml - $ make compile many_tests/typed/010fac_anf.ml + $ make compile_riscv many_tests/typed/010fac_anf.ml - $ make compile many_tests/typed/010faccps_ll.ml + $ make compile_riscv many_tests/typed/010faccps_ll.ml 24 - $ make compile many_tests/typed/010fibcps_ll.ml + $ make compile_riscv many_tests/typed/010fibcps_ll.ml 8 - $ make compile many_tests/typed/011mapcps.ml + $ make compile_riscv many_tests/typed/011mapcps.ml 2 3 4 - $ make compile many_tests/typed/012faccps.ml + $ make compile_riscv many_tests/typed/012faccps.ml 720 - $ make compile many_tests/typed/012fibcps.ml + $ make compile_riscv many_tests/typed/012fibcps.ml 8 - $ make compile many_tests/typed/013foldfoldr.ml + $ make compile_riscv many_tests/typed/013foldfoldr.ml 6 - $ make compile many_tests/typed/015tuples.ml + $ make compile_riscv many_tests/typed/015tuples.ml 1 1 1 diff --git a/EML/tests/tuple_tests.t b/EML/tests/tuple_tests.t index b71f7f53..33ef8875 100644 --- a/EML/tests/tuple_tests.t +++ b/EML/tests/tuple_tests.t @@ -1,7 +1,7 @@ Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev SPDX-License-Identifier: LGPL-3.0-or-later - $ make compile GC=1 gc_tests/tuple_tests/01adder.ml + $ make compile_riscv GC=1 gc_tests/tuple_tests/01adder.ml === GC Status === Current allocated: 13 Total allocated: 13 @@ -22,7 +22,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC allocations: 4 ================= - $ make compile GC=1 gc_tests/tuple_tests/02nested.ml + $ make compile_riscv GC=1 gc_tests/tuple_tests/02nested.ml === GC Status === Current allocated: 23 Total allocated: 23 @@ -43,7 +43,7 @@ SPDX-License-Identifier: LGPL-3.0-or-later GC allocations: 5 ================= - $ make compile GC=1 gc_tests/tuple_tests/03args.ml + $ make compile_riscv GC=1 gc_tests/tuple_tests/03args.ml === GC Status === Current allocated: 28 Total allocated: 28 From 55a8833e151e9ce2818eb13a3e77cdbdb22e26d7 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 02:45:33 +0300 Subject: [PATCH 66/74] add clang installing Signed-off-by: Victoria Ostrovskaya --- EML/tests/Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 13424a42..08fa7ab4 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -5,13 +5,18 @@ EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EM RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) RUNTIME_DIR := $(CURDIR)/../lib/runtime -ARGS := $(filter-out compile_riscv infer compile_llvm,$(MAKECMDGOALS)) +ARGS := $(filter-out compile_riscv infer compile_llvm install-clang,$(MAKECMDGOALS)) INPUT := $(firstword $(ARGS)) EXTRA_GOALS := $(ARGS) GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) CFLAGS_GC := $(if $(filter 1 true yes on,$(GC)),-DENABLE_GC,) -.PHONY: compile_riscv infer compile_llvm $(EXTRA_GOALS) +.PHONY: compile_riscv infer compile_llvm install-clang $(EXTRA_GOALS) + +# Install clang (for compile_llvm). Usage: make install-clang +install-clang: + @command -v clang >/dev/null && { echo "clang already installed"; exit 0; }; \ + echo "Installing clang (apt-get)..."; sudo apt-get update && sudo apt-get install -y clang # RISC-V: compile .ml -> .s -> .o, link, run under qemu. Usage: make compile_riscv [GC=1] compile_riscv: @@ -44,8 +49,10 @@ infer: "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" # LLVM: compile .ml -> .ll, link with runtime, run on host (x86-64). Usage: make compile_llvm [GC=1] +# If clang is missing, runs: make install-clang (sudo required) compile_llvm: @set -euo pipefail; \ + command -v clang >/dev/null || $(MAKE) install-clang; \ FILE="$(INPUT)"; \ [[ -n "$$FILE" ]] || { echo "Usage: make compile_llvm [GC=1] " >&2; exit 1; }; \ [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ From 8082250496289821a1cddb8fa1abe1f78a48b0f9 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 02:55:42 +0300 Subject: [PATCH 67/74] fix Signed-off-by: Victoria Ostrovskaya --- EML/tests/Makefile | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 08fa7ab4..2ef9a3ba 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -3,7 +3,6 @@ SHELL := /bin/bash EML_ROOT ?= $(CURDIR)/.. EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) -RUNTIME_DIR := $(CURDIR)/../lib/runtime ARGS := $(filter-out compile_riscv infer compile_llvm install-clang,$(MAKECMDGOALS)) INPUT := $(firstword $(ARGS)) @@ -13,12 +12,10 @@ CFLAGS_GC := $(if $(filter 1 true yes on,$(GC)),-DENABLE_GC,) .PHONY: compile_riscv infer compile_llvm install-clang $(EXTRA_GOALS) -# Install clang (for compile_llvm). Usage: make install-clang install-clang: @command -v clang >/dev/null && { echo "clang already installed"; exit 0; }; \ echo "Installing clang (apt-get)..."; sudo apt-get update && sudo apt-get install -y clang -# RISC-V: compile .ml -> .s -> .o, link, run under qemu. Usage: make compile_riscv [GC=1] compile_riscv: @set -euo pipefail; \ FILE="$(INPUT)"; \ @@ -38,7 +35,6 @@ compile_riscv: riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" -# Infer types. Usage: make infer [GC=1] infer: @set -euo pipefail; \ FILE="$(INPUT)"; \ @@ -47,12 +43,11 @@ infer: [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ SRC="$$(realpath "$$FILE")"; \ "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" - -# LLVM: compile .ml -> .ll, link with runtime, run on host (x86-64). Usage: make compile_llvm [GC=1] -# If clang is missing, runs: make install-clang (sudo required) compile_llvm: @set -euo pipefail; \ command -v clang >/dev/null || $(MAKE) install-clang; \ + RT_DIR=""; d="$$(pwd)"; while [ -n "$$d" ] && [ "$$d" != "/" ]; do [ -f "$$d/lib/runtime/runtime.c" ] && RT_DIR="$$(realpath "$$d/lib/runtime")" && break; d="$$(dirname "$$d")"; done; \ + [[ -n "$$RT_DIR" ]] || { echo "runtime not found (no lib/runtime/runtime.c in $$(pwd) or parents)" >&2; exit 1; }; \ FILE="$(INPUT)"; \ [[ -n "$$FILE" ]] || { echo "Usage: make compile_llvm [GC=1] " >&2; exit 1; }; \ [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ @@ -60,8 +55,8 @@ compile_llvm: BASENAME="$$(basename "$$FILE" .ml)"; \ TMP="$$(mktemp -d)"; trap 'rm -rf "$$TMP"' EXIT; \ "$(EML_BIN)" $(GC_FLAG) -backend llvm -fromfile "$$SRC" -o "$$TMP/prog.ll"; \ - clang -O0 -Wno-override-module $(CFLAGS_GC) -DEML_LLVM -DEML_LLVM_STANDALONE -c "$(RUNTIME_DIR)/runtime.c" -o "$$TMP/runtime.o"; \ - clang -c "$(RUNTIME_DIR)/llvm_call.S" -o "$$TMP/llvm_call.o"; \ + clang -O0 -Wno-override-module $(CFLAGS_GC) -DEML_LLVM -DEML_LLVM_STANDALONE -c "$$RT_DIR/runtime.c" -o "$$TMP/runtime.o"; \ + clang -c "$$RT_DIR/llvm_call.S" -o "$$TMP/llvm_call.o"; \ clang -O0 -Wno-override-module "$$TMP/prog.ll" "$$TMP/runtime.o" "$$TMP/llvm_call.o" -o "$$TMP/prog.exe"; \ [ -n "$(CFLAGS_GC)" ] && [ "$$BASENAME" = "010fibcps_ll" ] && export EML_HEAP_SIZE=1600; \ "$$TMP/prog.exe" From 058965d4a9441a8dae064acd52f46e1b2837f5e8 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 08:43:15 +0300 Subject: [PATCH 68/74] fix llvm call Signed-off-by: Victoria Ostrovskaya --- EML/lib/runtime/llvm_call.S | 5 +---- EML/tests/Makefile | 2 +- EML/tests/anf_tests.ml | 2 -- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/EML/lib/runtime/llvm_call.S b/EML/lib/runtime/llvm_call.S index ad1c9371..1309ca50 100644 --- a/EML/lib/runtime/llvm_call.S +++ b/EML/lib/runtime/llvm_call.S @@ -1,7 +1,3 @@ -# x86-64 SysV: llvm_call_indirect(void *fn, void **args, int64_t n) -# Called from C runtime when invoking a closure. Loads args[0..n-1] into -# registers (rdi,rsi,rdx,rcx,r8,r9) and stack, then calls fn. - .globl llvm_call_indirect .type llvm_call_indirect, @function llvm_call_indirect: @@ -34,3 +30,4 @@ llvm_call_indirect: ret .size llvm_call_indirect, .-llvm_call_indirect + .section .note.GNU-stack,"",@progbits diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 2ef9a3ba..8827560d 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -59,7 +59,7 @@ compile_llvm: clang -c "$$RT_DIR/llvm_call.S" -o "$$TMP/llvm_call.o"; \ clang -O0 -Wno-override-module "$$TMP/prog.ll" "$$TMP/runtime.o" "$$TMP/llvm_call.o" -o "$$TMP/prog.exe"; \ [ -n "$(CFLAGS_GC)" ] && [ "$$BASENAME" = "010fibcps_ll" ] && export EML_HEAP_SIZE=1600; \ - "$$TMP/prog.exe" + "$$TMP/prog.exe"; exit 0 $(EXTRA_GOALS): @: diff --git a/EML/tests/anf_tests.ml b/EML/tests/anf_tests.ml index 1dc42d79..f0caba99 100644 --- a/EML/tests/anf_tests.ml +++ b/EML/tests/anf_tests.ml @@ -26,8 +26,6 @@ let parse_and_anf_pp input = | Error e -> Printf.printf "Parsing error: %s\n" e ;; -(** Round-trip: source -> ANF -> print -> parse -> typecheck. - Verifies that types do not diverge after ANF (requirement: "проверять, что типы не разъехались"). *) let anf_roundtrip_typecheck ~env program_str : (unit, string) Result.t = let ( >>= ) = Result.bind in parse program_str From d72b5e90966e695e8ad87f2fa206de3c5a33df57 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 10:43:34 +0300 Subject: [PATCH 69/74] fix llvm test Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/llvm_ir/generator.ml | 125 +++++++++++++++++---------- EML/lib/middleend/anf.ml | 7 +- EML/lib/runtime/llvm_call.S | 72 ++++++++++----- EML/tests/Makefile | 3 +- EML/tests/llvm.t | 45 ++++++++++ EML/tests/llvm_tests.ml | 100 +++++++++++++++------ 6 files changed, 251 insertions(+), 101 deletions(-) diff --git a/EML/lib/backend/llvm_ir/generator.ml b/EML/lib/backend/llvm_ir/generator.ml index 88f0a7be..b274ac57 100644 --- a/EML/lib/backend/llvm_ir/generator.ml +++ b/EML/lib/backend/llvm_ir/generator.ml @@ -395,6 +395,16 @@ let rec gen_cexpr = function imm_unit else let* callee_value, callee_from_alloca = + let resolve_callee () = + let* resolved_value = resolved_find_value_opt fname in + match resolved_value with + | Some v -> return (v, false) + | None -> + let* value_opt = find_value_opt fname in + (match value_opt with + | Some v -> return (v, false) + | None -> fail ("Unbound function: " ^ fname)) + in let* gc_allocas = get_gc_allocas in match gc_allocas with | Some allocas -> @@ -402,24 +412,8 @@ let rec gen_cexpr = function | Some alloca -> let* v = with_optional_value (load builder ptr_t alloca fname) in return (v, true) - | None -> - let* value_opt = find_value_opt fname in - (match value_opt with - | Some v -> return (v, false) - | None -> - let* resolved_value = resolved_find_value_opt fname in - (match resolved_value with - | None -> fail ("Unbound function: " ^ fname) - | Some v -> return (v, false)))) - | None -> - let* value_opt = find_value_opt fname in - (match value_opt with - | Some v -> return (v, false) - | None -> - let* resolved_value = resolved_find_value_opt fname in - (match resolved_value with - | None -> fail ("Unbound function: " ^ fname) - | Some v -> return (v, false))) + | None -> resolve_callee ()) + | None -> resolve_callee () in let* args = List.fold_left @@ -485,42 +479,77 @@ let rec gen_cexpr = function eml_applyN_func [| closure_value; const_int int_t 0; args_ptr |] "boxed_eml_applyN")) - else ( - (* Single eml_applyN(closure, num_args, argv) call, like RISC-V *) - let arr_ty = Llvm.array_type ptr_t num_args in - let* alloca_arr = - with_optional_value (Some (Llvm.build_alloca arr_ty "apply_args" builder)) + else + let* _then_name, _else_name, merge_name = fresh_blocks in + let current_func = block_parent (insertion_block builder) in + let merge_block = append_block context merge_name current_func in + let blocks = + Array.init num_args (fun i -> + append_block context ("apply_step_" ^ Int.to_string i) current_func) in - let () = - for i = 0 to num_args - 1 do - let elem_ptr = - Llvm.build_gep - arr_ty - alloca_arr - [| Llvm.const_int i32_t 0; Llvm.const_int i32_t i |] - ("apply_arg_" ^ Int.to_string i) + let apply_one_step closure arg name = + let one_ty = Llvm.array_type ptr_t 1 in + let a = Llvm.build_alloca one_ty "apply_one" builder in + let p = + Llvm.build_gep + one_ty + a + [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] + "one_elem" + builder + in + Llvm_backend.store builder arg p; + Llvm_backend.call + builder + eml_applyN_type + eml_applyN_func + [| closure; const_int int_t 1; p |] + name + in + let result_vals = Array.make num_args (Llvm.const_null ptr_t) in + let* () = emit_void_st builder (Br blocks.(0)) in + let rec loop step_index = + if step_index >= num_args + then return () + else ( + let () = position_at_end blocks.(step_index) builder in + let* cur = + if step_index = 0 + then return closure_value + else + with_optional_value + (Llvm_backend.phi + builder + [ result_vals.(step_index - 1), blocks.(step_index - 1) ] + ("cur_" ^ Int.to_string step_index)) + in + let* step_result = + with_optional_value + (apply_one_step + cur + args_values.(step_index) + ("apply_step_" ^ Int.to_string step_index)) + in + let () = result_vals.(step_index) <- step_result in + let* () = + emit_void_st builder + (if step_index < num_args - 1 + then Br blocks.(step_index + 1) + else Br merge_block) in - Llvm_backend.store builder args_values.(i) elem_ptr - done + loop (step_index + 1)) in - let* args_ptr = + let* () = loop 0 in + let () = position_at_end merge_block builder in + let* final_val = with_optional_value - (Some - (Llvm.build_gep - arr_ty - alloca_arr - [| Llvm.const_int i32_t 0; Llvm.const_int i32_t 0 |] - "apply_args_ptr" - builder)) + (Llvm_backend.phi + builder + [ result_vals.(num_args - 1), blocks.(num_args - 1) ] + "apply_result") in - with_optional_value - (call - builder - eml_applyN_type - eml_applyN_func - [| closure_value; const_int int_t num_args; args_ptr |] - "eml_applyN_result")) + return final_val | ComplexApp (_, _, _) -> fail "LLVM codegen: ComplexApp with non-variable function not supported" | ComplexBranch (cond_imm, then_e, else_e) -> diff --git a/EML/lib/middleend/anf.ml b/EML/lib/middleend/anf.ml index 342f6d58..54a5eca3 100644 --- a/EML/lib/middleend/anf.ml +++ b/EML/lib/middleend/anf.ml @@ -1,11 +1,7 @@ -[@@@ocaml.text "/*"] - (** Copyright 2025-2026, Victoria Ostrovskaya & Danil Usoltsev *) (** SPDX-License-Identifier: LGPL-3.0-or-later *) -[@@@ocaml.text "/*"] - open Frontend.Ast open Base open Utils.Monads.ANFMonad @@ -37,7 +33,6 @@ and anf_expr = type arity = int -(* Used by [@@deriving show] for types containing [arity]; not exported. *) let pp_arity ppf (n : arity) = Stdlib.Format.pp_print_int ppf n type anf_bind = ident * anf_expr [@@deriving show { with_path = false }] @@ -259,7 +254,7 @@ let rec anf (expr : expr) (k : immediate -> anf_expr t) : anf_expr t = , branch_result ) ))) | None -> fail "Only list match with [] and h::tl is supported") | ExpFunction _ -> fail "Match/function cases not implemented" - | ExpConstruct ("[]", None) -> bind_complex_expr (ComplexList []) k + | ExpConstruct ("[]", None) -> k (ImmediateConst (ConstInt 0)) | ExpConstruct ("::", Some (ExpTuple (head_e, tail_e, []))) -> anf head_e (fun head_imm -> anf tail_e (fun tail_imm -> diff --git a/EML/lib/runtime/llvm_call.S b/EML/lib/runtime/llvm_call.S index 1309ca50..7fa0984f 100644 --- a/EML/lib/runtime/llvm_call.S +++ b/EML/lib/runtime/llvm_call.S @@ -1,33 +1,65 @@ - .globl llvm_call_indirect - .type llvm_call_indirect, @function + .text + .globl llvm_call_indirect + .type llvm_call_indirect, @function llvm_call_indirect: - # rdi = fn, rsi = args, rdx = n + pushq %rbp + movq %rsp, %rbp movq %rdi, %rax movq %rsi, %r10 movq %rdx, %r11 - # First 6 args into registers (SysV: rdi, rsi, rdx, rcx, r8, r9) - movq 0(%r10), %rdi + cmpq $6, %r11 + jbe .Lload_regs + + pushq %r12 + movq %rax, %r12 + pushq %r13 + pushq %r14 + + leaq -6(%r11), %rcx + shlq $3, %rcx + movq %rsp, %rax + subq %rcx, %rax + andq $15, %rax + addq %rax, %rcx + movq %rcx, %r13 + subq %rcx, %rsp + + movq $6, %r8 +.Lstack_loop: + cmpq %r8, %r11 + jle .Lrestore_fn + movq (%r10,%r8,8), %rax + movq %r8, %rcx + subq $6, %rcx + movq %rax, 0(%rsp,%rcx,8) + incq %r8 + jmp .Lstack_loop + +.Lrestore_fn: + movq %r12, %rax + +.Lload_regs: + movq (%r10), %rdi movq 8(%r10), %rsi movq 16(%r10), %rdx movq 24(%r10), %rcx movq 32(%r10), %r8 movq 40(%r10), %r9 - # If n > 6, push args[6..n-1] in reverse order (right-to-left) - cmpq $6, %r11 - jbe .Ldo_call - movq %r11, %r12 - subq $1, %r12 # r12 = n - 1 (last index) -.Lpush_loop: - cmpq $6, %r12 - jb .Ldo_call - pushq (%r10,%r12,8) - subq $1, %r12 - jmp .Lpush_loop -.Ldo_call: + movq %r11, %r14 call *%rax - ret + movq %r14, %r11 - .size llvm_call_indirect, .-llvm_call_indirect - .section .note.GNU-stack,"",@progbits + cmpq $6, %r11 + jbe .Lepilogue + addq %r13, %rsp + popq %r14 + popq %r13 + popq %r12 + +.Lepilogue: + leave + ret + .size llvm_call_indirect, .-llvm_call_indirect + .section .note.GNU-stack,"",%progbits diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 8827560d..8ea9917e 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -43,6 +43,7 @@ infer: [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ SRC="$$(realpath "$$FILE")"; \ "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" + compile_llvm: @set -euo pipefail; \ command -v clang >/dev/null || $(MAKE) install-clang; \ @@ -59,7 +60,7 @@ compile_llvm: clang -c "$$RT_DIR/llvm_call.S" -o "$$TMP/llvm_call.o"; \ clang -O0 -Wno-override-module "$$TMP/prog.ll" "$$TMP/runtime.o" "$$TMP/llvm_call.o" -o "$$TMP/prog.exe"; \ [ -n "$(CFLAGS_GC)" ] && [ "$$BASENAME" = "010fibcps_ll" ] && export EML_HEAP_SIZE=1600; \ - "$$TMP/prog.exe"; exit 0 + "$$TMP/prog.exe" || true $(EXTRA_GOALS): @: diff --git a/EML/tests/llvm.t b/EML/tests/llvm.t index 8316743e..94edd53c 100644 --- a/EML/tests/llvm.t +++ b/EML/tests/llvm.t @@ -4,6 +4,9 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile_llvm many_tests/typed/001fac.ml 24 + $ make compile_llvm many_tests/typed/002fac.ml + 24 + $ make compile_llvm many_tests/typed/003fib.ml 3 3 @@ -17,12 +20,34 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile_llvm many_tests/typed/005fix.ml 720 + $ make compile_llvm many_tests/typed/006partial.ml + 1122 + $ make compile_llvm many_tests/typed/006partial2.ml 1 2 3 7 + $ make compile_llvm many_tests/typed/006partial3.ml + 4 + 8 + 9 + + $ make compile_llvm many_tests/typed/007order.ml + 1 + 2 + 4 + -1 + 103 + -555555 + 10000 + + $ make compile_llvm many_tests/typed/008ascription.ml + 8 + + $ make compile_llvm many_tests/typed/009let_poly.ml + $ make compile_llvm many_tests/typed/010fac_anf.ml $ make compile_llvm many_tests/typed/010faccps_ll.ml @@ -30,3 +55,23 @@ SPDX-License-Identifier: LGPL-3.0-or-later $ make compile_llvm many_tests/typed/010fibcps_ll.ml 8 + + $ make compile_llvm many_tests/typed/011mapcps.ml + 2 + 3 + 4 + + $ make compile_llvm many_tests/typed/012faccps.ml + 720 + + $ make compile_llvm many_tests/typed/012fibcps.ml + 8 + + $ make compile_llvm many_tests/typed/013foldfoldr.ml + 6 + + $ make compile_llvm many_tests/typed/015tuples.ml + 1 + 1 + 1 + 1 diff --git a/EML/tests/llvm_tests.ml b/EML/tests/llvm_tests.ml index bb838583..f88257c7 100644 --- a/EML/tests/llvm_tests.ml +++ b/EML/tests/llvm_tests.ml @@ -903,34 +903,82 @@ let%expect_test "codegen closure fn with 10 arg" = define ptr @main() { entry: %boxed_alloc_closure = call ptr @alloc_closure(ptr @add, i64 7) - %apply_args = alloca [4 x ptr], align 8 - %apply_arg_0 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_0, align 8 - %apply_arg_1 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 1 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_1, align 8 - %apply_arg_2 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 2 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_2, align 8 - %apply_arg_3 = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 3 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_3, align 8 - %apply_args_ptr = getelementptr [4 x ptr], ptr %apply_args, i32 0, i32 0 - %eml_applyN_result = call ptr @eml_applyN(ptr %boxed_alloc_closure, i64 4, ptr %apply_args_ptr) - %apply_args1 = alloca [2 x ptr], align 8 - %apply_arg_02 = getelementptr [2 x ptr], ptr %apply_args1, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_02, align 8 - %apply_arg_13 = getelementptr [2 x ptr], ptr %apply_args1, i32 0, i32 1 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_13, align 8 - %apply_args_ptr4 = getelementptr [2 x ptr], ptr %apply_args1, i32 0, i32 0 - %eml_applyN_result5 = call ptr @eml_applyN(ptr %eml_applyN_result, i64 2, ptr %apply_args_ptr4) - %apply_args6 = alloca [2 x ptr], align 8 - %apply_arg_07 = getelementptr [2 x ptr], ptr %apply_args6, i32 0, i32 0 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_07, align 8 - %apply_arg_18 = getelementptr [2 x ptr], ptr %apply_args6, i32 0, i32 1 - store ptr inttoptr (i64 3 to ptr), ptr %apply_arg_18, align 8 - %apply_args_ptr9 = getelementptr [2 x ptr], ptr %apply_args6, i32 0, i32 0 - %eml_applyN_result10 = call ptr @eml_applyN(ptr %eml_applyN_result5, i64 2, ptr %apply_args_ptr9) - %print_int_arg = ptrtoint ptr %eml_applyN_result10 to i64 + br label %apply_step_0 + + merge_0: ; preds = %apply_step_3 + %apply_result = phi ptr [ %apply_step_310, %apply_step_3 ] + br label %apply_step_011 + + apply_step_0: ; preds = %entry + %apply_one = alloca [1 x ptr], align 8 + %one_elem = getelementptr [1 x ptr], ptr %apply_one, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem, align 8 + %apply_step_01 = call ptr @eml_applyN(ptr %boxed_alloc_closure, i64 1, ptr %one_elem) + br label %apply_step_1 + + apply_step_1: ; preds = %apply_step_0 + %cur_1 = phi ptr [ %apply_step_01, %apply_step_0 ] + %apply_one2 = alloca [1 x ptr], align 8 + %one_elem3 = getelementptr [1 x ptr], ptr %apply_one2, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem3, align 8 + %apply_step_14 = call ptr @eml_applyN(ptr %cur_1, i64 1, ptr %one_elem3) + br label %apply_step_2 + + apply_step_2: ; preds = %apply_step_1 + %cur_2 = phi ptr [ %apply_step_14, %apply_step_1 ] + %apply_one5 = alloca [1 x ptr], align 8 + %one_elem6 = getelementptr [1 x ptr], ptr %apply_one5, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem6, align 8 + %apply_step_27 = call ptr @eml_applyN(ptr %cur_2, i64 1, ptr %one_elem6) + br label %apply_step_3 + + apply_step_3: ; preds = %apply_step_2 + %cur_3 = phi ptr [ %apply_step_27, %apply_step_2 ] + %apply_one8 = alloca [1 x ptr], align 8 + %one_elem9 = getelementptr [1 x ptr], ptr %apply_one8, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem9, align 8 + %apply_step_310 = call ptr @eml_applyN(ptr %cur_3, i64 1, ptr %one_elem9) + br label %merge_0 + + merge_1: ; preds = %apply_step_112 + %apply_result20 = phi ptr [ %apply_step_119, %apply_step_112 ] + br label %apply_step_021 + + apply_step_011: ; preds = %merge_0 + %apply_one13 = alloca [1 x ptr], align 8 + %one_elem14 = getelementptr [1 x ptr], ptr %apply_one13, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem14, align 8 + %apply_step_015 = call ptr @eml_applyN(ptr %apply_result, i64 1, ptr %one_elem14) + br label %apply_step_112 + + apply_step_112: ; preds = %apply_step_011 + %cur_116 = phi ptr [ %apply_step_015, %apply_step_011 ] + %apply_one17 = alloca [1 x ptr], align 8 + %one_elem18 = getelementptr [1 x ptr], ptr %apply_one17, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem18, align 8 + %apply_step_119 = call ptr @eml_applyN(ptr %cur_116, i64 1, ptr %one_elem18) + br label %merge_1 + + merge_2: ; preds = %apply_step_122 + %apply_result30 = phi ptr [ %apply_step_129, %apply_step_122 ] + %print_int_arg = ptrtoint ptr %apply_result30 to i64 call void @print_int(i64 %print_int_arg) ret ptr inttoptr (i64 1 to ptr) + + apply_step_021: ; preds = %merge_1 + %apply_one23 = alloca [1 x ptr], align 8 + %one_elem24 = getelementptr [1 x ptr], ptr %apply_one23, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem24, align 8 + %apply_step_025 = call ptr @eml_applyN(ptr %apply_result20, i64 1, ptr %one_elem24) + br label %apply_step_122 + + apply_step_122: ; preds = %apply_step_021 + %cur_126 = phi ptr [ %apply_step_025, %apply_step_021 ] + %apply_one27 = alloca [1 x ptr], align 8 + %one_elem28 = getelementptr [1 x ptr], ptr %apply_one27, i32 0, i32 0 + store ptr inttoptr (i64 3 to ptr), ptr %one_elem28, align 8 + %apply_step_129 = call ptr @eml_applyN(ptr %cur_126, i64 1, ptr %one_elem28) + br label %merge_2 } attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } |}] From 3e8f7c578a2f1d3468602ec1b1429ef180a551ae Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Tue, 10 Mar 2026 11:02:42 +0300 Subject: [PATCH 70/74] install clang at EML.opam Signed-off-by: Victoria Ostrovskaya --- EML/EML.opam | 3 +++ EML/tests/Makefile | 11 +++-------- EML/tests/dune | 2 ++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/EML/EML.opam b/EML/EML.opam index 85857a71..2f851515 100644 --- a/EML/EML.opam +++ b/EML/EML.opam @@ -31,3 +31,6 @@ build: [ ] ] dev-repo: "git+https://github.com/Kakadu/comp24.git" +depexts: [ + [ "llvm-18-dev" "clang" "gcc-riscv64-linux-gnu" "qemu-user" ] {os-distribution = "ubuntu"} +] diff --git a/EML/tests/Makefile b/EML/tests/Makefile index 8ea9917e..e80a14a0 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -4,17 +4,13 @@ EML_ROOT ?= $(CURDIR)/.. EML_BIN ?= $(if $(findstring _build,$(EML_ROOT)),$(EML_ROOT)/bin/EML.exe,$(EML_ROOT)/_build/default/bin/EML.exe) RUNTIME_A := $(if $(wildcard ../lib/runtime/rv64_runtime.a),../lib/runtime/rv64_runtime.a,../_build/default/lib/runtime/rv64_runtime.a) -ARGS := $(filter-out compile_riscv infer compile_llvm install-clang,$(MAKECMDGOALS)) +ARGS := $(filter-out compile_riscv infer compile_llvm,$(MAKECMDGOALS)) INPUT := $(firstword $(ARGS)) EXTRA_GOALS := $(ARGS) GC_FLAG := $(if $(filter 1 true yes on,$(GC)),-gc,) CFLAGS_GC := $(if $(filter 1 true yes on,$(GC)),-DENABLE_GC,) -.PHONY: compile_riscv infer compile_llvm install-clang $(EXTRA_GOALS) - -install-clang: - @command -v clang >/dev/null && { echo "clang already installed"; exit 0; }; \ - echo "Installing clang (apt-get)..."; sudo apt-get update && sudo apt-get install -y clang +.PHONY: compile_riscv infer compile_llvm $(EXTRA_GOALS) compile_riscv: @set -euo pipefail; \ @@ -43,10 +39,9 @@ infer: [[ -f "$$FILE" ]] || { echo "Source file not found: $$FILE" >&2; exit 1; }; \ SRC="$$(realpath "$$FILE")"; \ "$(EML_BIN)" $(GC_FLAG) -infer -fromfile "$$SRC" - + compile_llvm: @set -euo pipefail; \ - command -v clang >/dev/null || $(MAKE) install-clang; \ RT_DIR=""; d="$$(pwd)"; while [ -n "$$d" ] && [ "$$d" != "/" ]; do [ -f "$$d/lib/runtime/runtime.c" ] && RT_DIR="$$(realpath "$$d/lib/runtime")" && break; d="$$(dirname "$$d")"; done; \ [[ -n "$$RT_DIR" ]] || { echo "runtime not found (no lib/runtime/runtime.c in $$(pwd) or parents)" >&2; exit 1; }; \ FILE="$(INPUT)"; \ diff --git a/EML/tests/dune b/EML/tests/dune index 6cef3dcd..6b842c46 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -44,6 +44,8 @@ (source_tree gc_tests) (source_tree many_tests))) +;; LLVM tests require clang to be installed (e.g. apt-get install clang). + (cram (applies_to llvm) (deps From 91fd1d974ead676de3407c69b190ecc03d2f12dd Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 11 Mar 2026 13:04:10 +0300 Subject: [PATCH 71/74] add rva23 Signed-off-by: Victoria Ostrovskaya --- EML/tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index e80a14a0..ba76ebf0 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -29,7 +29,7 @@ compile_riscv: OBJ_FILE="$$TMP_BIN/prog.o"; EXE_FILE="$$TMP_BIN/prog.exe"; \ riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ - qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 "$$EXE_FILE" + qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rva23s64 "$$EXE_FILE" infer: @set -euo pipefail; \ From 31cca41a994e98939d532fb43b54055c5e29182a Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 11 Mar 2026 13:26:15 +0300 Subject: [PATCH 72/74] add all extensions Signed-off-by: Victoria Ostrovskaya --- EML/tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EML/tests/Makefile b/EML/tests/Makefile index ba76ebf0..43246f72 100644 --- a/EML/tests/Makefile +++ b/EML/tests/Makefile @@ -29,7 +29,7 @@ compile_riscv: OBJ_FILE="$$TMP_BIN/prog.o"; EXE_FILE="$$TMP_BIN/prog.exe"; \ riscv64-linux-gnu-as -march=rv64gc "$$ASM_FILE" -o "$$OBJ_FILE"; \ riscv64-linux-gnu-gcc "$$OBJ_FILE" "$(RUNTIME_A)" -o "$$EXE_FILE"; \ - qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rva23s64 "$$EXE_FILE" + qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu max "$$EXE_FILE" infer: @set -euo pipefail; \ From 28f4c3dd61cef06b6352b2effc388f38a52cd950 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 11 Mar 2026 14:50:23 +0300 Subject: [PATCH 73/74] fix bug at resolver Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/llvm_ir/analysis.ml | 2 +- EML/lib/backend/ricsv/analysis.ml | 2 +- EML/tests/additional_tests/mangling_test.ml | 6 ++++++ EML/tests/dune | 2 ++ EML/tests/llvm.t | 3 +++ EML/tests/riscv.t | 3 +++ 6 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 EML/tests/additional_tests/mangling_test.ml diff --git a/EML/lib/backend/llvm_ir/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml index 4abe2f57..12cba6f6 100644 --- a/EML/lib/backend/llvm_ir/analysis.ml +++ b/EML/lib/backend/llvm_ir/analysis.ml @@ -96,7 +96,7 @@ let analyze (program : anf_program) = Some (func_layout.asm_name, List.length func_layout.params) | Some _ -> find (i - 1)) in - find (func_index - 1) + find func_index in { functions; resolve = resolver } ;; diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index 50a89a84..c9421ee7 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -242,7 +242,7 @@ let analyze (program : anf_program) = Some (fn.asm_name, List.length fn.params) | Some _ -> find_visible_function (i - 1)) in - find_visible_function (current_function_index - 1) + find_visible_function current_function_index in { arity_map; functions; resolve = resolver } ;; diff --git a/EML/tests/additional_tests/mangling_test.ml b/EML/tests/additional_tests/mangling_test.ml new file mode 100644 index 00000000..2e68a122 --- /dev/null +++ b/EML/tests/additional_tests/mangling_test.ml @@ -0,0 +1,6 @@ +let rec field n = if n <= 1 then 1 else n * field (n - 1) + +let main = + let () = print_int (field 4) in + 0 +;; diff --git a/EML/tests/dune b/EML/tests/dune index 6b842c46..7bf49fbb 100644 --- a/EML/tests/dune +++ b/EML/tests/dune @@ -14,6 +14,7 @@ (file ../bin/EML.exe) (file Makefile) (file ../lib/runtime/rv64_runtime.a) + (source_tree additional_tests) (source_tree gc_tests) (source_tree many_tests))) @@ -51,4 +52,5 @@ (deps (file ../bin/EML.exe) (file Makefile) + (source_tree additional_tests) (source_tree many_tests))) diff --git a/EML/tests/llvm.t b/EML/tests/llvm.t index 94edd53c..6255a5a2 100644 --- a/EML/tests/llvm.t +++ b/EML/tests/llvm.t @@ -75,3 +75,6 @@ SPDX-License-Identifier: LGPL-3.0-or-later 1 1 1 + + $ make compile_llvm additional_tests/mangling_test.ml + 24 diff --git a/EML/tests/riscv.t b/EML/tests/riscv.t index a80dd850..228b833d 100644 --- a/EML/tests/riscv.t +++ b/EML/tests/riscv.t @@ -76,3 +76,6 @@ SPDX-License-Identifier: LGPL-3.0-or-later 1 1 1 + + $ make compile_riscv additional_tests/mangling_test.ml + 24 From 5c00aed93c67f27c19a1a932738e347b0ae68674 Mon Sep 17 00:00:00 2001 From: Victoria Ostrovskaya Date: Wed, 11 Mar 2026 15:12:33 +0300 Subject: [PATCH 74/74] add another solution Signed-off-by: Victoria Ostrovskaya --- EML/lib/backend/llvm_ir/analysis.ml | 20 +++++++++++++++----- EML/lib/backend/llvm_ir/analysis.mli | 1 + EML/lib/backend/ricsv/analysis.ml | 15 +++++++++++++-- EML/lib/backend/ricsv/analysis.mli | 1 + 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/EML/lib/backend/llvm_ir/analysis.ml b/EML/lib/backend/llvm_ir/analysis.ml index 12cba6f6..450b23ac 100644 --- a/EML/lib/backend/llvm_ir/analysis.ml +++ b/EML/lib/backend/llvm_ir/analysis.ml @@ -11,6 +11,7 @@ type function_layout = ; asm_name : string ; params : immediate list ; body : anf_expr + ; is_rec : bool } type analysis_result = @@ -36,9 +37,9 @@ let analyze (program : anf_program) = let raw = List.filter_map (function - | AnfValue (_, (func_name, arity, body), _) -> + | AnfValue (rec_flag, (func_name, arity, body), _) -> let params, body = params_of_anf body in - Some (func_name, arity, params, body) + Some (func_name, arity, params, body, rec_flag = Rec) | AnfEval _ -> None) program in @@ -51,7 +52,7 @@ let analyze (program : anf_program) = in let functions, _ = List.fold_left - (fun (reversed_functions, counts) (func_name, _arity, params, body) -> + (fun (reversed_functions, counts) (func_name, _arity, params, body, is_rec) -> let base_asm_name = mangle_reserved func_name in let duplicate_index = Base.Map.find counts func_name |> Option.value ~default:0 @@ -64,7 +65,8 @@ let analyze (program : anf_program) = then base_asm_name else base_asm_name ^ "_" ^ Int.to_string duplicate_index in - { func_name; asm_name; params; body } :: reversed_functions, updated_counts) + ( { func_name; asm_name; params; body; is_rec } :: reversed_functions + , updated_counts )) ([], Base.Map.empty (module Base.String)) raw in @@ -81,6 +83,7 @@ let analyze (program : anf_program) = ; asm_name = "main" ; params = [] ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) + ; is_rec = false } in functions @ [ synthetic_main ]) @@ -96,7 +99,14 @@ let analyze (program : anf_program) = Some (func_layout.asm_name, List.length func_layout.params) | Some _ -> find (i - 1)) in - find func_index + let start_index = + match Base.List.nth functions func_index with + | Some func_layout + when func_layout.is_rec && String.equal func_layout.func_name var_name -> + func_index + | _ -> func_index - 1 + in + find start_index in { functions; resolve = resolver } ;; diff --git a/EML/lib/backend/llvm_ir/analysis.mli b/EML/lib/backend/llvm_ir/analysis.mli index 25d4c9eb..92ca3459 100644 --- a/EML/lib/backend/llvm_ir/analysis.mli +++ b/EML/lib/backend/llvm_ir/analysis.mli @@ -9,6 +9,7 @@ type function_layout = ; asm_name : string ; params : immediate list ; body : anf_expr + ; is_rec : bool } type analysis_result = diff --git a/EML/lib/backend/ricsv/analysis.ml b/EML/lib/backend/ricsv/analysis.ml index c9421ee7..cb11905d 100644 --- a/EML/lib/backend/ricsv/analysis.ml +++ b/EML/lib/backend/ricsv/analysis.ml @@ -13,6 +13,7 @@ type function_layout = ; asm_name : string ; params : immediate list ; body : anf_expr + ; is_rec : bool ; slots_count : int ; max_stack_args : int ; max_create_tuple_array_bytes : int @@ -156,13 +157,14 @@ let analyze (program : anf_program) = let analyzed_functions_raw = List.filter_map (function - | AnfValue (_, (func_name, arity, body), _) -> + | AnfValue (rec_flag, (func_name, arity, body), _) -> let params, body = params_of_anf body in Some ( func_name , arity , params , body + , rec_flag = Rec , slots_in_anf body , max_stack_args_anf body , max_create_tuple_array_anf body ) @@ -183,6 +185,7 @@ let analyze (program : anf_program) = , _arity , params , body + , is_rec , slots_count , max_stack_args , max_create_tuple_array_bytes ) -> @@ -202,6 +205,7 @@ let analyze (program : anf_program) = ; asm_name ; params ; body + ; is_rec ; slots_count ; max_stack_args ; max_create_tuple_array_bytes @@ -222,6 +226,7 @@ let analyze (program : anf_program) = ; asm_name = "main" ; params = [] ; body = AnfExpr (ComplexImmediate (ImmediateConst (ConstInt 0))) + ; is_rec = false ; slots_count = 0 ; max_stack_args = 0 ; max_create_tuple_array_bytes = 0 @@ -242,7 +247,13 @@ let analyze (program : anf_program) = Some (fn.asm_name, List.length fn.params) | Some _ -> find_visible_function (i - 1)) in - find_visible_function current_function_index + let start_index = + match Base.List.nth functions current_function_index with + | Some fn when fn.is_rec && String.equal fn.func_name variable_name -> + current_function_index + | _ -> current_function_index - 1 + in + find_visible_function start_index in { arity_map; functions; resolve = resolver } ;; diff --git a/EML/lib/backend/ricsv/analysis.mli b/EML/lib/backend/ricsv/analysis.mli index 4213140a..f941d6eb 100644 --- a/EML/lib/backend/ricsv/analysis.mli +++ b/EML/lib/backend/ricsv/analysis.mli @@ -9,6 +9,7 @@ type function_layout = ; asm_name : string ; params : immediate list ; body : anf_expr + ; is_rec : bool ; slots_count : int ; max_stack_args : int ; max_create_tuple_array_bytes : int