From d7250f437a31877802f000fc8aef0431bf6b9a2b Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 21:08:25 -0500 Subject: [PATCH 01/62] Just use `Int` for `Hashcons` --- src/lib/hashmap.ml | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/lib/hashmap.ml b/src/lib/hashmap.ml index 297f6267..b069c846 100644 --- a/src/lib/hashmap.ml +++ b/src/lib/hashmap.ml @@ -4,8 +4,8 @@ open Regular.Std module Make(K : Hashtbl.Key) = struct module M = Map.Make(K) module P = Patricia_tree.Make(struct - include Int63 - let size = 63 + include Int + let size = Sys.int_size_in_bits end) type +'a t = 'a M.t P.t @@ -21,10 +21,8 @@ module Make(K : Hashtbl.Key) = struct let empty = P.empty let is_empty = P.is_empty - let hash' k = Int63.of_int @@ K.hash k - let find_exn t k = - hash' k |> P.find_exn t |> + K.hash k |> P.find_exn t |> Fn.flip Map.find k |> function | None -> raise Not_found | Some v -> v @@ -35,17 +33,17 @@ module Make(K : Hashtbl.Key) = struct let mem t k = try ignore (find_exn t k); true with | Not_found -> false - let singleton k v = P.singleton (hash' k) (M.singleton k v) + let singleton k v = P.singleton (K.hash k) (M.singleton k v) - let set t ~key ~data = P.update t (hash' key) ~f:(function + let set t ~key ~data = P.update t (K.hash key) ~f:(function | Some m -> Map.set m ~key ~data | None -> M.singleton key data) - let add_multi t ~key ~data = P.update t (hash' key) ~f:(function + let add_multi t ~key ~data = P.update t (K.hash key) ~f:(function | Some m -> Map.add_multi m ~key ~data | None -> M.singleton key [data]) - let add_exn t ~key ~data = P.update t (hash' key) ~f:(function + let add_exn t ~key ~data = P.update t (K.hash key) ~f:(function | None -> M.singleton key data | Some m -> match Map.add m ~key ~data with | `Duplicate -> raise Duplicate @@ -54,23 +52,23 @@ module Make(K : Hashtbl.Key) = struct let add t ~key ~data = try `Ok (add_exn t ~key ~data) with | Duplicate -> `Duplicate - let remove t k = P.change t (hash' k) + let remove t k = P.change t (K.hash k) ~f:(Option.bind ~f:(fun m -> let m' = Map.remove m k in Option.some_if (not @@ Map.is_empty m') m')) - let update t k ~f = P.update t (hash' k) ~f:(function + let update t k ~f = P.update t (K.hash k) ~f:(function | None -> M.singleton k @@ f None | Some m -> Map.update m k ~f) let update_with t k ~has ~nil = - P.update_with t (hash' k) + P.update_with t (K.hash k) ~nil:(fun () -> M.singleton k @@ nil ()) ~has:(fun m -> Map.update m k ~f:(function | Some v -> has v | None -> nil ())) - let change t k ~f = P.change t (hash' k) ~f:(function + let change t k ~f = P.change t (K.hash k) ~f:(function | None -> f None |> Option.map ~f:(fun v -> M.singleton k v) | Some m -> let m' = Map.change m k ~f in From 18321c6f0b0bcd9bda6241786a3c3f3c27012e2e Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 4 Nov 2025 04:29:43 -0500 Subject: [PATCH 02/62] sroa --- src/lib/passes/passes.ml | 3 + src/lib/passes/sroa/sroa.ml | 47 +++++++ src/lib/passes/sroa/sroa.mli | 18 +++ src/lib/passes/sroa/sroa_impl.ml | 203 +++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+) create mode 100644 src/lib/passes/sroa/sroa.ml create mode 100644 src/lib/passes/sroa/sroa.mli create mode 100644 src/lib/passes/sroa/sroa_impl.ml diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index 3ff18ea9..6e0c2ab4 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -26,6 +26,9 @@ let retype tenv m = let optimize tenv m = let module Cv = Context.Virtual in + (* let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in *) + (* let*? m = Module.map_funs_err m ~f:Remove_dead_vars.run in *) + (* let*? tenv = retype tenv m in *) let*? m = Module.map_funs_err m ~f:Promote_slots.run in let*? tenv = retype tenv m in let*? m = Module.map_funs_err m ~f:(Sccp.run tenv) in diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml new file mode 100644 index 00000000..888a6e8c --- /dev/null +++ b/src/lib/passes/sroa/sroa.ml @@ -0,0 +1,47 @@ +open Core +open Virtual +open Sroa_impl + +module V = Make(struct + module Insn = struct + type t = Insn.t + type op = Insn.op + + let op = Insn.op + let label = Insn.label + + let load_or_store_to (o : op) = match o with + | `load (_, (#Type.basic as ty), `var x) -> Some (x, ty) + | `store ((#Type.basic as ty), _, `var x) -> Some (x, ty) + | _ -> None + + let subst_load_or_store (o : op) ~f = match o with + | `load (x, ty, `var a) -> `load (x, ty, `var (f a)) + | `store (ty, v, `var a) -> `store (ty, v, `var (f a)) + | op -> op + + let offset (o : op) = match o with + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) -> + Some (x, Bv.to_int64 i) + | `bop (_, `sub _, `var x, `int (i, _)) -> + Some (x, Int64.neg @@ Bv.to_int64 i) + | _ -> None + + let lhs = Insn.lhs_of_op + let free_vars = Insn.free_vars_of_op + end + + module Ctrl = Ctrl + module Blk = Blk + module Func = Func + module Cfg = Cfg + end) + +open Context.Syntax + +let run fn = + let* () = Context.unless (Dict.mem (Func.dict fn) Tags.ssa) @@ fun () -> + Context.failf "In SROA: function $%s is not in SSA form" + (Func.name fn) () in + V.run fn diff --git a/src/lib/passes/sroa/sroa.mli b/src/lib/passes/sroa/sroa.mli new file mode 100644 index 00000000..f8f40cb6 --- /dev/null +++ b/src/lib/passes/sroa/sroa.mli @@ -0,0 +1,18 @@ +(** Performs SROA (Scalar Replacement of Aggregates). + + This pass aims to analyze the usage of slots in a function, where + individual fields of the slot can be separated into their own + unique slots themselves. + + This is typical when a slot is allocated to hold a structure and + pointer arithmetic is used to store to and load from individual + fields of this structure. If these fields can become their own slots, + then they can possibly promoted to SSA variables, enabling more + optimization opportunities. + + The function must be in SSA form for this pass. +*) + +open Virtual + +val run : func -> func Context.t diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml new file mode 100644 index 00000000..01101215 --- /dev/null +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -0,0 +1,203 @@ +open Core +open Regular.Std +open Graphlib.Std + +module Scalar = struct + module T = struct + type t = Var.t * int64 [@@deriving equal, compare, sexp] + end + include T + module Map = Map.Make(T) +end + +type scalar = Scalar.t [@@deriving equal, sexp] + +module Scalars = struct + type t = Virtual.slot Scalar.Map.t + let empty : t = Scalar.Map.empty + let find (m : t) slot offset = Map.find m (slot, offset) + let mem (m : t) slot offset = Map.mem m (slot, offset) + let add (m : t) slot offset data = Map.set m ~key:(slot, offset) ~data +end + +type scalars = Scalars.t + +type value = + | Top + | Bottom + | Offset of scalar +[@@deriving equal, sexp] + +module Value = struct + type t = value [@@deriving equal, sexp] + + let merge a b = match a, b with + | Offset s1, Offset s2 when equal_scalar s1 s2 -> a + | Offset _, Offset _ -> Top + | Top, _ | _, Top -> Top + | Bottom, _ -> b + | _, Bottom -> a +end + +module State = struct + type t = value Var.Map.t [@@deriving equal, sexp] + + let merge (a : t) (b : t) = + Map.merge_skewed a b ~combine:(fun ~key:_ a b -> Value.merge a b) + + let derive (s : t) ptr offset = match Map.find s ptr with + | Some Offset (ptr', offset') -> Offset (ptr', Int64.(offset + offset')) + | Some (Top | Bottom as v) -> v + | None -> Bottom +end + +type state = State.t [@@deriving equal, sexp] + +(* XXX: do we need this? *) +(* let escaping fv x s = *) +(* Set.fold (fv x) ~init:s ~f:(fun s v -> *) +(* match Map.find s v with *) +(* | Some Offset (ptr, _) -> Map.set s ~key:ptr ~data:Top *) +(* | Some _ | None -> s) *) + +module type L = sig + module Insn : sig + type t + type op + val op : t -> op + val label : t -> Label.t + val load_or_store_to : op -> (Var.t * Type.basic) option + val subst_load_or_store : op -> f:(Var.t -> Var.t) -> op + val offset : op -> scalar option + val lhs : op -> Var.t option + val free_vars : op -> Var.Set.t + + (* TODO: should we just do this? *) + (* val transfer : state -> op -> state *) + end + + module Ctrl : sig + type t + val free_vars : t -> Var.Set.t + end + + module Blk : sig + type t + val label : t -> Label.t + val insns : ?rev:bool -> t -> Insn.t seq + val ctrl : t -> Ctrl.t + val map_insns : t -> f:(Label.t -> Insn.op -> Insn.op) -> t + end + + module Func : sig + type t + val slots : ?rev:bool -> t -> Virtual.slot seq + val blks : ?rev:bool -> t -> Blk.t seq + val map_of_blks : t -> Blk.t Label.Tree.t + val map_blks : t -> f:(Blk.t -> Blk.t) -> t + val insert_slot : t -> Virtual.slot -> t + end + + module Cfg : sig + include Label.Graph_s + val create : Func.t -> t + end +end + +module Make(M : L) : sig + val run : M.Func.t -> M.Func.t Context.t +end = struct + open M + + let transfer_op s op = + let value = match Insn.offset op with + | Some (ptr, offset) -> State.derive s ptr offset + | None -> Bottom in + let s = match Insn.lhs op with + | Some lhs -> Map.set s ~key:lhs ~data:value + | None -> s in + match value with + | Bottom | Top -> + (* escaping Insn.free_vars op s *) + s + | Offset _ -> s + + let transfer_insn s i = transfer_op s @@ Insn.op i + + let transfer blks l s = + Label.Tree.find blks l |> + Option.value_map ~default:s ~f:(fun b -> + let s = Blk.insns b |> Seq.fold ~init:s ~f:transfer_insn in + (* escaping Ctrl.free_vars (Blk.ctrl b) s *) + s + ) + + let initialize fn = + let slots = + Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> + let key = Virtual.Slot.var s in + let data = Offset (key, 0L) in + Map.set acc ~key ~data) in + let nodes = Label.Map.singleton Label.pseudoentry slots in + Solution.create nodes Var.Map.empty + + let analyze fn = + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + Graphlib.fixpoint (module Cfg) cfg + ~init:(initialize fn) + ~start:Label.pseudoentry + ~equal:State.equal + ~merge:State.merge + ~f:(transfer blks) + + let build_scalar_map fn s : scalars Context.t = + let open Context.Syntax in + Func.blks fn |> Context.Seq.fold ~init:Scalars.empty ~f:(fun acc b -> + let s = Solution.get s @@ Blk.label b in + Format.eprintf "%a: %a\n%!" Label.pp (Blk.label b) Sexp.pp_hum (sexp_of_state s); + Blk.insns b |> Context.Seq.fold ~init:(acc, s) ~f:(fun (acc, s) i -> + let op = Insn.op i in + let s' = transfer_insn s i in + let+ acc = + match Insn.load_or_store_to op with + | None -> !!acc + | Some (ptr, ty) -> match Map.find s ptr with + | Some Offset (slot, offset) + when Scalars.mem acc slot offset -> !!acc + | Some Offset (slot, offset) -> + let* x = Context.Var.fresh in + let size = Type.sizeof_basic ty / 8 in + let*? slot' = Virtual.Slot.create x ~size ~align:size in + !!(Scalars.add acc slot offset slot') + | _ -> !!acc in + acc, s') >>| fst) + + let insert_new_slots fn (m : scalars) = + Map.fold m ~init:fn ~f:(fun ~key:_ ~data:s fn -> + Func.insert_slot fn s) + + let rewrite fn s m = + Func.map_blks fn ~f:(fun b -> + let s = ref @@ Solution.get s @@ Blk.label b in + Blk.map_insns b ~f:(fun _ op -> + let op' = match Insn.load_or_store_to op with + | None -> op + | Some (ptr, _) -> match Map.find !s ptr with + | Some Offset (slot, offset) -> + Scalars.find m slot offset |> + Option.value_map ~default:op ~f:(fun s -> + Insn.subst_load_or_store op ~f:(fun x -> + if Var.(x <> ptr) then x + else Virtual.Slot.var s)) + | _ -> op in + s := transfer_op !s op; + op')) + + let run fn = + let open Context.Syntax in + let s = analyze fn in + let+ m = build_scalar_map fn s in + let fn = insert_new_slots fn m in + rewrite fn s m +end From b1558bb0b340fb40200d877ffdc6256527fe525b Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 8 Nov 2025 17:11:46 -0500 Subject: [PATCH 03/62] Relaxed guard on zext for `move_ri` callback --- src/lib/machine/x86/x86_amd64_isel.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_isel.ml b/src/lib/machine/x86/x86_amd64_isel.ml index 898abb80..1b117ba1 100644 --- a/src/lib/machine/x86/x86_amd64_isel.ml +++ b/src/lib/machine/x86/x86_amd64_isel.ml @@ -128,10 +128,10 @@ end = struct !!![I.movsd (Oreg (x, xt)) (Oreg (y, yt))] | _ -> !!None - let move_ri_x_y env = + let move_ri_x_y ?(zx = false) env = let*! x, xt = S.regvar env "x" in let*! y, yt = S.imm env "y" in - let*! () = guard @@ Type.equal_basic xt (bty yt) in + let*! () = guard (zx || Type.equal_basic xt (bty yt)) in if Bv.(y = zero) then !!![xor_gpr_self x xt] else @@ -2217,7 +2217,7 @@ end = struct let zext = [ move_rr_x_y ~zx:true; - move_ri_x_y; + move_ri_x_y ~zx:true; ] let fext ty = [ From 9814437f5f0b780af987d26783b59f9dc993e2db Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 8 Nov 2025 17:55:01 -0500 Subject: [PATCH 04/62] progress --- src/lib/passes/passes.ml | 10 +- .../promote_slots/promote_slots_impl.ml | 15 +- src/lib/passes/sroa/sroa.ml | 127 ++++- src/lib/passes/sroa/sroa.mli | 1 + src/lib/passes/sroa/sroa_impl.ml | 455 ++++++++++++++---- src/test/data/opt/gcdext.vir.opt.sysv | 70 ++- src/test/data/opt/retmem.vir.opt.sysv | 26 +- src/test/data/opt/storetoload1.vir.opt | 9 +- src/test/data/opt/vaarg1.vir.opt.sysv | 138 +++--- src/test/data/opt/vaarg1.vir.opt.sysv.amd64 | 69 ++- .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 72 ++- src/test/data/opt/vaarg2.driver.sysv.amd64.c | 16 + src/test/data/opt/vaarg2.vir.opt.sysv | 119 +++-- src/test/test_opt.ml | 1 + 14 files changed, 727 insertions(+), 401 deletions(-) create mode 100644 src/test/data/opt/vaarg2.driver.sysv.amd64.c diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index 6e0c2ab4..526b24c5 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -26,9 +26,9 @@ let retype tenv m = let optimize tenv m = let module Cv = Context.Virtual in - (* let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in *) - (* let*? m = Module.map_funs_err m ~f:Remove_dead_vars.run in *) - (* let*? tenv = retype tenv m in *) + let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in + let*? m = Module.map_funs_err m ~f:Remove_dead_vars.run in + let*? tenv = retype tenv m in let*? m = Module.map_funs_err m ~f:Promote_slots.run in let*? tenv = retype tenv m in let*? m = Module.map_funs_err m ~f:(Sccp.run tenv) in @@ -60,6 +60,10 @@ let to_abi tenv m = ~data:(Seq.to_list @@ Module.data m) let optimize_abi m = + let* m = + Abi.Module.funs m |> + Context.Seq.map ~f:Sroa.run_abi >>| + Fun.compose (Abi.Module.with_funs m) Seq.to_list in let*? m = Abi.Module.map_funs_err m ~f:Promote_slots.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in let m = Abi.Module.map_funs m ~f:Remove_disjoint_blks.run_abi in diff --git a/src/lib/passes/promote_slots/promote_slots_impl.ml b/src/lib/passes/promote_slots/promote_slots_impl.ml index 880c9dcd..213048c4 100644 --- a/src/lib/passes/promote_slots/promote_slots_impl.ml +++ b/src/lib/passes/promote_slots/promote_slots_impl.ml @@ -5,6 +5,8 @@ open Virtual module E = Monad.Result.Error +let debug = false + open E.Let module type L = sig @@ -116,9 +118,18 @@ module Make(M : L) = struct Func.slots env.fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> match Qualify.go env s with - | Bad -> acc - | Write (_, t) -> Map.set acc ~key:(Slot.var s) ~data:t + | Bad -> + if debug then + Format.eprintf "slot %a is bad\n%!" Var.pp (Slot.var s); + acc + | Write (_, t) -> + if debug then + Format.eprintf "promoting %a\n%!" Var.pp (Slot.var s); + Map.set acc ~key:(Slot.var s) ~data:t | Read _ -> + if debug then + Format.eprintf "slot %a is read, but never written to\n%!" + Var.pp (Slot.var s); (* In this case, we read from the slot but never stored anything to it. It's undefined behavior, but it's also what the programmer intended, so we should cancel this promotion. *) diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index 888a6e8c..a0953f17 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -2,37 +2,122 @@ open Core open Virtual open Sroa_impl +let var_set_of_option = function + | Some x -> Var.Set.singleton x + | None -> Var.Set.empty + +let offset = function + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) -> + Some (x, Bv.to_int64 i) + | `bop (_, `sub _, `var x, `int (i, _)) -> + Some (x, Int64.neg @@ Bv.to_int64 i) + | _ -> None + +(* TODO: more cases? *) +let copy_of = function + | `uop (_, `copy _, `var x) -> Some x + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) + | `bop (_, `sub _, `var x, `int (i, _)) + when Bv.(i = zero) -> Some x + | _ -> None + +let escapes mty fv = function + | `store (ty, `var x, `var y) when mty ty -> Var.Set.of_list [x; y] + | `store (_, `var x, _) -> Var.Set.singleton x + | `load (_, ty, `var x) when mty ty -> Var.Set.singleton x + | `load _ -> Var.Set.empty + | o when Option.is_some (offset o) -> Var.Set.empty + | o when Option.is_some (copy_of o) -> Var.Set.empty + | o -> fv o +[@@specialise] + +let load_or_store_to o = match o with + | `load (_, (#Type.basic as b), `var x) -> Some (x, b, Load) + | `store ((#Type.basic as b), _, `var x) -> Some (x, b, Store) + | _ -> None + +let subst_load_or_store o ~f = match o with + | `load (x, (#Type.basic as b), `var a) -> `load (x, b, `var (f a)) + | `store ((#Type.basic as b), v, `var a) -> `store (b, v, `var (f a)) + | op -> op + +let add x ty b o = + assert Int64.(o > 0L); + let i = Bv.(int64 o mod modulus (Type.sizeof_imm_base ty)) in + `bop (x, `add (ty :> Type.basic), `var b, `int (i, (ty :> Type.imm))) + +let is_named = function + | `name _ -> true + | _ -> false + module V = Make(struct module Insn = struct type t = Insn.t type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op let label = Insn.label + let lhs = Insn.lhs_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let fv = Insn.free_vars_of_op + let escapes = (escapes is_named fv :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let subst_load_or_store ~f = (subst_load_or_store ~f :> op -> _) + let add x ty b o = (add x ty b o :> op) + end - let load_or_store_to (o : op) = match o with - | `load (_, (#Type.basic as ty), `var x) -> Some (x, ty) - | `store ((#Type.basic as ty), _, `var x) -> Some (x, ty) - | _ -> None + module Ctrl = struct + type t = ctrl + let escapes = Ctrl.free_vars + end - let subst_load_or_store (o : op) ~f = match o with - | `load (x, ty, `var a) -> `load (x, ty, `var (f a)) - | `store (ty, v, `var a) -> `store (ty, v, `var (f a)) - | op -> op - - let offset (o : op) = match o with - | `bop (_, `add _, `var x, `int (i, _)) - | `bop (_, `add _, `int (i, _), `var x) -> - Some (x, Bv.to_int64 i) - | `bop (_, `sub _, `var x, `int (i, _)) -> - Some (x, Int64.neg @@ Bv.to_int64 i) + module Blk = Blk + module Func = Func + module Cfg = Cfg + end) + +module A = Make(struct + open Abi + + module Insn = struct + type t = Insn.t + type op = Insn.op + + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + + let op = Insn.op + let label = Insn.label + + let lhs = function + | `bop (x, _, _, _) + | `uop (x, _, _) + | `sel (x, _, _, _, _) + | `load (x, _, _) + -> Some x | _ -> None - let lhs = Insn.lhs_of_op - let free_vars = Insn.free_vars_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let fv = Insn.free_vars_of_op + let escapes = (escapes (const false) fv :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let subst_load_or_store ~f = (subst_load_or_store ~f :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + + module Ctrl = struct + type t = ctrl + (* TODO: see if we can relax passing of block params *) + let escapes = Ctrl.free_vars end - module Ctrl = Ctrl module Blk = Blk module Func = Func module Cfg = Cfg @@ -45,3 +130,9 @@ let run fn = Context.failf "In SROA: function $%s is not in SSA form" (Func.name fn) () in V.run fn + +let run_abi fn = + let* () = Context.unless (Dict.mem (Abi.Func.dict fn) Tags.ssa) @@ fun () -> + Context.failf "In SROA (ABI): function $%s is not in SSA form" + (Abi.Func.name fn) () in + A.run fn diff --git a/src/lib/passes/sroa/sroa.mli b/src/lib/passes/sroa/sroa.mli index f8f40cb6..0f02dd46 100644 --- a/src/lib/passes/sroa/sroa.mli +++ b/src/lib/passes/sroa/sroa.mli @@ -16,3 +16,4 @@ open Virtual val run : func -> func Context.t +val run_abi : Abi.func -> Abi.func Context.t diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 01101215..1ef4c0bc 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -1,84 +1,136 @@ +(* A conservative implementation of SROA (Scalar Replacement of Aggregates). + + Most notably, we don't consider compound/aggregate types at all. Why? + Because they are second-class citizens in our IR, and they are highly + dependent on the implementation details of the target ABI. + + After ABI lowering, they are desugared, after which this pass becomes + more useful. +*) + open Core open Regular.Std open Graphlib.Std +let debug = false + +(* A scalar access. *) module Scalar = struct module T = struct - type t = Var.t * int64 [@@deriving equal, compare, sexp] + type t = Var.t * int64 [@@deriving compare, equal, hash, sexp] end include T module Map = Map.Make(T) + module Table = Hashtbl.Make(T) end -type scalar = Scalar.t [@@deriving equal, sexp] +type scalar = Scalar.t [@@deriving compare, equal, hash, sexp] +type scalars = Virtual.slot Scalar.Map.t -module Scalars = struct - type t = Virtual.slot Scalar.Map.t - let empty : t = Scalar.Map.empty - let find (m : t) slot offset = Map.find m (slot, offset) - let mem (m : t) slot offset = Map.mem m (slot, offset) - let add (m : t) slot offset data = Map.set m ~key:(slot, offset) ~data -end +(* Lattice of scalar accesses. -type scalars = Scalars.t + [Top]: the access is inconsistent or escapes + [Offset (s, o)]: access to slot [s] at offset [o] +*) type value = | Top - | Bottom | Offset of scalar [@@deriving equal, sexp] +let pp_value ppf = function + | Top -> Format.fprintf ppf "\u{22a4}" + | Offset (slot, offset) -> Format.fprintf ppf "%a+0x%Lx" Var.pp slot offset + +let pp_bot ppf () = Format.fprintf ppf "\u{22a5}" [@@ocaml.warning "-32"] + module Value = struct type t = value [@@deriving equal, sexp] + (* Join two abstract values. *) let merge a b = match a, b with | Offset s1, Offset s2 when equal_scalar s1 s2 -> a | Offset _, Offset _ -> Top | Top, _ | _, Top -> Top - | Bottom, _ -> b - | _, Bottom -> a end -module State = struct +type slots = Virtual.slot Var.Map.t + +module State : sig type t = value Var.Map.t [@@deriving equal, sexp] + val merge : t -> t -> t + val derive : slots -> t -> Var.t -> int64 -> value option +end = struct + (* NB: the keys are the LHS of a given instruction *) + type t = value Var.Map.t [@@deriving equal, sexp] + + let merge a b = Map.merge_skewed a b + ~combine:(fun ~key:_ a b -> Value.merge a b) - let merge (a : t) (b : t) = - Map.merge_skewed a b ~combine:(fun ~key:_ a b -> Value.merge a b) + let is_bad slots ptr offset = + Int64.(offset < 0L) || match Map.find slots ptr with + | Some s -> + let size = Int64.of_int @@ Virtual.Slot.size s in + Int64.(offset >= size) + | None -> false - let derive (s : t) ptr offset = match Map.find s ptr with - | Some Offset (ptr', offset') -> Offset (ptr', Int64.(offset + offset')) - | Some (Top | Bottom as v) -> v - | None -> Bottom + let derive slots s ptr offset = match Map.find s ptr with + | (Some Top | None) as v -> v + | Some Offset (ptr', offset') -> + let offset'' = Int64.(offset + offset') in + (* Out of bounds offset to a slot should be undefined. *) + let value = + if is_bad slots ptr' offset'' then Top + else Offset (ptr', offset'') in + Some value end type state = State.t [@@deriving equal, sexp] -(* XXX: do we need this? *) -(* let escaping fv x s = *) -(* Set.fold (fv x) ~init:s ~f:(fun s v -> *) -(* match Map.find s v with *) -(* | Some Offset (ptr, _) -> Map.set s ~key:ptr ~data:Top *) -(* | Some _ | None -> s) *) +let pp_state ppf s = + let pp_sep ppf () = Format.fprintf ppf "@ " in + let pp_elt ppf (x, v) = Format.fprintf ppf "%a[%a]" Var.pp x pp_value v in + let pp_elts = Format.pp_print_list ~pp_sep pp_elt in + Format.fprintf ppf "@[%a@]" pp_elts @@ Map.to_alist s +[@@ocaml.warning "-32"] + +type load_or_store = Load | Store + +let pp_load_or_store ppf = function + | Load -> Format.fprintf ppf "load" + | Store -> Format.fprintf ppf "store" + +let is_store = function + | Load -> false + | Store -> true module type L = sig module Insn : sig type t type op + + val create : label:Label.t -> op -> t + + (* General accessors. *) val op : t -> op val label : t -> Label.t - val load_or_store_to : op -> (Var.t * Type.basic) option - val subst_load_or_store : op -> f:(Var.t -> Var.t) -> op - val offset : op -> scalar option + + (* Used during analysis. *) val lhs : op -> Var.t option - val free_vars : op -> Var.Set.t + val offset : op -> scalar option + val copy_of : op -> Var.t option + val escapes : op -> Var.Set.t - (* TODO: should we just do this? *) - (* val transfer : state -> op -> state *) + (* Used during replacement. *) + val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option + val subst_load_or_store : f:(Var.t -> Var.t) -> op -> op + val with_op : t -> op -> t + val add : Var.t -> Type.imm_base -> Var.t -> int64 -> op end module Ctrl : sig type t - val free_vars : t -> Var.Set.t + val escapes : t -> Var.Set.t end module Blk : sig @@ -87,6 +139,7 @@ module type L = sig val insns : ?rev:bool -> t -> Insn.t seq val ctrl : t -> Ctrl.t val map_insns : t -> f:(Label.t -> Insn.op -> Insn.op) -> t + val with_insns : t -> Insn.t list -> t end module Func : sig @@ -95,6 +148,7 @@ module type L = sig val blks : ?rev:bool -> t -> Blk.t seq val map_of_blks : t -> Blk.t Label.Tree.t val map_blks : t -> f:(Blk.t -> Blk.t) -> t + val with_blks : t -> Blk.t list -> t Or_error.t val insert_slot : t -> Virtual.slot -> t end @@ -109,95 +163,288 @@ module Make(M : L) : sig end = struct open M - let transfer_op s op = + let escaping f x s = + Set.fold (f x) ~init:s ~f:(fun s v -> + match Map.find s v with + | Some Offset (ptr, _) -> + Map.set s ~key:ptr ~data:Top + | Some _ | None -> s) + + let transfer_op slots s op = let value = match Insn.offset op with - | Some (ptr, offset) -> State.derive s ptr offset - | None -> Bottom in - let s = match Insn.lhs op with - | Some lhs -> Map.set s ~key:lhs ~data:value - | None -> s in - match value with - | Bottom | Top -> - (* escaping Insn.free_vars op s *) - s - | Offset _ -> s - - let transfer_insn s i = transfer_op s @@ Insn.op i - - let transfer blks l s = + | Some (ptr, offset) -> State.derive slots s ptr offset + | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in + let s = match value, Insn.lhs op with + | Some v, Some lhs -> Map.set s ~key:lhs ~data:v + | None, _ | _, None -> s in + escaping Insn.escapes op s + + let transfer slots blks l s = Label.Tree.find blks l |> Option.value_map ~default:s ~f:(fun b -> - let s = Blk.insns b |> Seq.fold ~init:s ~f:transfer_insn in - (* escaping Ctrl.free_vars (Blk.ctrl b) s *) - s - ) + Blk.insns b |> Seq.map ~f:Insn.op |> + Seq.fold ~init:s ~f:(transfer_op slots) |> + escaping Ctrl.escapes (Blk.ctrl b)) - let initialize fn = + let initialize slots blks = + (* Set all slots to point to their own base address. *) + let slots = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in + (* Any slot that escapes should immediately be set to `Top`. *) let slots = - Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> - let key = Virtual.Slot.var s in - let data = Offset (key, 0L) in - Map.set acc ~key ~data) in + Label.Tree.fold blks ~init:slots ~f:(fun ~key:_ ~data init -> + Blk.insns data |> Seq.fold ~init ~f:(fun s i -> + escaping Insn.escapes (Insn.op i) s) |> + escaping Ctrl.escapes (Blk.ctrl data)) in + (* Start at pseudoentry. *) let nodes = Label.Map.singleton Label.pseudoentry slots in Solution.create nodes Var.Map.empty - let analyze fn = + let analyze slots fn = let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in Graphlib.fixpoint (module Cfg) cfg - ~init:(initialize fn) + ~init:(initialize slots blks) ~start:Label.pseudoentry ~equal:State.equal ~merge:State.merge - ~f:(transfer blks) + ~f:(transfer slots blks) - let build_scalar_map fn s : scalars Context.t = - let open Context.Syntax in - Func.blks fn |> Context.Seq.fold ~init:Scalars.empty ~f:(fun acc b -> - let s = Solution.get s @@ Blk.label b in - Format.eprintf "%a: %a\n%!" Label.pp (Blk.label b) Sexp.pp_hum (sexp_of_state s); - Blk.insns b |> Context.Seq.fold ~init:(acc, s) ~f:(fun (acc, s) i -> - let op = Insn.op i in - let s' = transfer_insn s i in - let+ acc = - match Insn.load_or_store_to op with - | None -> !!acc - | Some (ptr, ty) -> match Map.find s ptr with - | Some Offset (slot, offset) - when Scalars.mem acc slot offset -> !!acc - | Some Offset (slot, offset) -> - let* x = Context.Var.fresh in - let size = Type.sizeof_basic ty / 8 in - let*? slot' = Virtual.Slot.create x ~size ~align:size in - !!(Scalars.add acc slot offset slot') - | _ -> !!acc in - acc, s') >>| fst) - - let insert_new_slots fn (m : scalars) = - Map.fold m ~init:fn ~f:(fun ~key:_ ~data:s fn -> - Func.insert_slot fn s) - - let rewrite fn s m = - Func.map_blks fn ~f:(fun b -> + let collect_slots fn = + Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> + Map.set acc ~key:(Virtual.Slot.var s) ~data:s) + + let is_base s offset size = match offset with + | 0L -> Virtual.Slot.size s = size + | _ -> false + + (* A memory access for a slot. *) + type access = { + insn : Insn.t; + off : int64; + ty : Type.basic; + ldst : load_or_store; + } + + let cmp_access a b = Int64.compare a.off b.off + + let pp_access ppf a = + Format.fprintf ppf "%a[%a.%a +0x%Lx]" + Label.pp (Insn.label a.insn) + pp_load_or_store a.ldst + Type.pp_basic a.ty + a.off + + let collect_accesses slots fn s : access list Var.Map.t = + (* Group all memory accesses by their corresponding slot. *) + Func.blks fn |> Seq.fold ~init:Var.Map.empty ~f:(fun init b -> let s = ref @@ Solution.get s @@ Blk.label b in - Blk.map_insns b ~f:(fun _ op -> - let op' = match Insn.load_or_store_to op with - | None -> op - | Some (ptr, _) -> match Map.find !s ptr with - | Some Offset (slot, offset) -> - Scalars.find m slot offset |> - Option.value_map ~default:op ~f:(fun s -> - Insn.subst_load_or_store op ~f:(fun x -> - if Var.(x <> ptr) then x - else Virtual.Slot.var s)) - | _ -> op in - s := transfer_op !s op; - op')) + Blk.insns b |> Seq.fold ~init ~f:(fun acc i -> + let op = Insn.op i in + let acc = match Insn.load_or_store_to op with + | None -> acc + | Some (ptr, ty, ldst) -> match Map.find !s ptr with + | Some Offset (base, off) -> + Map.add_multi acc ~key:base ~data:{insn = i; off; ty; ldst} + | _ -> acc in + s := transfer_op slots !s op; + acc)) |> + (* Filter out slots that are not splittable. *) + Map.map ~f:(List.sort ~compare:cmp_access) |> + Map.filteri ~f:(fun ~key ~data -> + let rec ok = function + | [] | [_] -> true + | x :: y :: rest -> + let sz = Type.sizeof_basic x.ty / 8 in + ((* No partial overlaps. *) + Int64.(x.off + of_int sz <= y.off) || + (* Allow exact re-use of the same region. *) + Int64.(x.off = y.off) && Type.equal_basic x.ty y.ty + ) && ok (y :: rest) in + let res = ok data in + if debug && not res then + Format.eprintf "filtering out accesses for %a\n%!" Var.pp key; + res) + + let overlaps oa sa ob sb = + Int64.(oa < ob + of_int sb && ob < oa + of_int sa) + + type partition = { + off : int64; + size : int; + mems : access list; + } + + let cmp_partition a b = Int64.compare a.off b.off + + let pp_partition ppf p = + Format.fprintf ppf "0x%Lx:%d: (%a)" + p.off p.size + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") + pp_access) p.mems + + let partition_acesses m : partition list Var.Map.t = + let rec merge acc c = function + | [] -> List.sort (c :: acc) ~compare:cmp_partition + | x :: xs -> + let sx = Type.sizeof_basic x.ty / 8 in + if overlaps c.off c.size x.off sx then + let o' = Int64.min c.off x.off in + let ec = Int64.(c.off + of_int c.size) in + let ex = Int64.(x.off + of_int sx) in + let e' = Int64.max ec ex in + let s' = Int64.(to_int_exn (e' - o')) in + merge acc { + off = o'; + size = s'; + mems = x :: c.mems; + } xs + else + merge (c :: acc) { + off = x.off; + size = sx; + mems = [x]; + } xs in + Map.map m ~f:(fun (accesses : access list) -> + List.sort accesses ~compare:(fun a b -> + Int64.compare a.off b.off) |> function + | [] -> [] + | (x : access) :: xs -> + merge [] { + off = x.off; + size = Type.sizeof_basic x.ty / 8; + mems = [x]; + } xs) + + (* Turn each partition into a concrete slot. *) + let materialize_partitions slots m = + let open Context.Syntax in + Map.to_sequence m |> Context.Seq.fold + ~init:Scalar.Map.empty ~f:(fun init (base, ps) -> + let s = Map.find_exn slots base in + List.filter ps ~f:(fun p -> + not @@ is_base s p.off p.size) |> + Context.List.fold ~init ~f:(fun acc p -> + let* x = Context.Var.fresh in + let*? s = Virtual.Slot.create x ~size:p.size ~align:p.size in + if debug then + Format.eprintf "new slot %a, base=%a, off=0x%Lx, size=%d\n%!" + Var.pp x Var.pp base p.off p.size; + !!(Map.set acc ~key:(base, p.off) ~data:s))) + + let cover_exact m base off size = + Map.find m base |> Option.bind ~f:(fun ps -> + let rec go o r acc = function + | _ when r <= 0 -> Some (List.rev acc) + | [] -> None + | p :: ps -> + let pe = Int64.(p.off + of_int p.size) in + let re = Int64.(o + of_int r) in + if Int64.(p.off <= o && re <= pe) then + (* Request satisfies entire partition. *) + Some (List.rev (p.off :: acc)) + else if Int64.(p.off = o) && p.size <= r then + (* Request is partly covered by the partition. *) + let o' = Int64.(o + of_int p.size) in + go o' (r - p.size) (p.off :: acc) ps + else if Int64.(p.off < o) then + (* Partition starts before this request. *) + go o r acc ps + else None in + go off size [] ps) + + (* pre: op is a store *) + let split_into_parts op ptr base covers m = + List.filter_map covers ~f:(fun o -> + Map.find m (base, o) |> Option.map ~f:(fun s -> + Insn.subst_load_or_store op + ~f:(const @@ Virtual.Slot.var s))) + + let insert_new_slots fn m = Map.fold m ~init:fn + ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data) + + let rewrite_one parts m s i = + let open Context.Syntax in + let op = Insn.op i in + let* word = Context.target >>| Target.word in + match Insn.load_or_store_to op with + | None -> !![i] + | Some (ptr, ty, load_or_store) -> + if debug then + Format.eprintf "%a: looking at %a.%a to %a\n%!" + Label.pp (Insn.label i) + pp_load_or_store load_or_store + Type.pp_basic ty + Var.pp ptr; + let sz = Type.sizeof_basic ty / 8 in + match Map.find !s ptr with + | Some Top | None -> !![i] + | Some Offset (base, off) -> + match cover_exact parts base off sz with + | Some [o] -> + if debug then + Format.eprintf "exact=0x%Lx, off=0x%Lx, base=%a\n%!" o off Var.pp base; + let o' = Int64.(off - o) in + begin match Map.find m (base, o) with + | None -> !![i] + | Some s when Int64.(o' = 0L) -> + if debug then + Format.eprintf "found slot %a\n%!" Var.pp (Virtual.Slot.var s); + let op' = Insn.subst_load_or_store op ~f:(const @@ Virtual.Slot.var s) in + !![Insn.with_op i op'] + | Some s -> + if debug then + Format.eprintf "found slot %a\n%!" Var.pp (Virtual.Slot.var s); + let* l = Context.Label.fresh in + let* y = Context.Var.fresh in + let a = Insn.add y word (Virtual.Slot.var s) o' in + let op' = Insn.subst_load_or_store op ~f:(const y) in !![ + Insn.create ~label:l a; + Insn.with_op i op'; + ] + end + | Some covers when is_store load_or_store -> + if debug then + Format.eprintf "%a: splitting\n%!" Label.pp @@ Insn.label i; + split_into_parts op ptr base covers m |> + Context.List.map ~f:(fun op -> + let+ l = Context.Label.fresh in + Insn.create ~label:l op) + | Some covers -> + if debug then + Format.eprintf "multi or no-part load: %d\n%!" (List.length covers); + !![i] + | None -> + if debug then + Format.eprintf "no parts found\n%!"; + !![i] + + let rewrite_with_partitions slots fn s parts m = + let open Context.Syntax in + let* blks = + Func.blks fn |> Context.Seq.map ~f:(fun b -> + let s = ref @@ Solution.get s @@ Blk.label b in + let+ insns = + Blk.insns b |> Context.Seq.map ~f:(fun i -> + let+ is = rewrite_one parts m s i in + s := transfer_op slots !s @@ Insn.op i; + is) + >>| Fn.compose List.concat Seq.to_list in + Blk.with_insns b insns) >>| Seq.to_list in + Context.lift_err @@ Func.with_blks fn blks let run fn = let open Context.Syntax in - let s = analyze fn in - let+ m = build_scalar_map fn s in + let slots = collect_slots fn in + let s = analyze slots fn in + let accs = collect_accesses slots fn s in + let parts = partition_acesses accs in + if debug then + Map.iteri parts ~f:(fun ~key ~data -> + Format.eprintf "partitions for %a:\n%!" Var.pp key; + List.iter data ~f:(fun p -> + Format.eprintf " %a\n%!" pp_partition p)); + let* m = materialize_partitions slots parts in let fn = insert_new_slots fn m in - rewrite fn s m + rewrite_with_partitions slots fn s parts m end diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv index d13e608d..bf24853a 100644 --- a/src/test/data/opt/gcdext.vir.opt.sysv +++ b/src/test/data/opt/gcdext.vir.opt.sysv @@ -3,54 +3,48 @@ module gcdext export function $gcd(w %a/rdi, w %b/rsi) { %res = slot 16, align 8 %r = slot 16, align 8 - %8 = slot 16, align 8 %13 = slot 16, align 8 %18 = slot 16, align 8 @2: - %1 = eq.w %a, 0x0_w ; @30 - %2 = add.l %res, 0x8_l ; @31 - %3 = add.l %res, 0x4_l ; @32 - br %1, @3, @4 + %1.1 = eq.w %a, 0x0_w ; @30 + %2.1 = add.l %res, 0x8_l ; @31 + %3.1 = add.l %res, 0x4_l ; @32 + br %1.1, @3, @4 @3: st.w %b, %res ; @6 - st.w 0x0_w, %3 ; @8 - st.w 0x1_w, %2 ; @10 - %19 = ld.l %res ; @49 - st.l %19, %18 ; @50 - %21 = ld.l %2 ; @52 - %22 = add.l %18, 0x8_l ; @53 - st.l %21, %22 ; @54 + st.w 0x0_w, %3.1 ; @8 + st.w 0x1_w, %2.1 ; @10 + %19.1 = ld.l %res ; @49 + st.l %19.1, %18 ; @50 + %21.1 = ld.l %2.1 ; @52 + %22.1 = add.l %18, 0x8_l ; @53 + st.l %21.1, %22.1 ; @54 jmp @29(%18) @4: %m.1 = rem.w %b, %a ; @12 - %27/l/rax, %28/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 - %26 = add.l %8, 0x8_l ; @58 - st.l %27, %8 ; @59 - st.l %28, %26 ; @60 - %9 = ld.l %8 ; @37 - st.l %9, %r ; @38 - %11 = ld.l %26 ; @40 - %12 = add.l %r, 0x8_l ; @41 - st.l %11, %12 ; @42 + %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 + st.l %27.1, %r ; @38 + %12.1 = add.l %r, 0x8_l ; @41 + st.l %28.1, %12.1 ; @42 %rg.1 = ld.w %r ; @15 - %4 = add.l %r, 0x4_l ; @33 - %rx.1 = ld.w %4 ; @17 - %ry.1 = ld.w %12 ; @19 + %4.1 = add.l %r, 0x4_l ; @33 + %rx.1 = ld.w %4.1 ; @17 + %ry.1 = ld.w %12.1 ; @19 st.w %rg.1, %res ; @20 %nx.1 = div.w %b, %a ; @21 - %6 = mul.w %nx.1, %rx.1 ; @35 - %7 = sub.w %ry.1, %6 ; @36 - st.w %7, %3 ; @25 - st.w %rx.1, %2 ; @27 - %14 = ld.l %res ; @43 - st.l %14, %13 ; @44 - %16 = ld.l %2 ; @46 - %17 = add.l %13, 0x8_l ; @47 - st.l %16, %17 ; @48 + %6.1 = mul.w %nx.1, %rx.1 ; @35 + %7.1 = sub.w %ry.1, %6.1 ; @36 + st.w %7.1, %3.1 ; @25 + st.w %rx.1, %2.1 ; @27 + %14.1 = ld.l %res ; @43 + st.l %14.1, %13 ; @44 + %16.1 = ld.l %2.1 ; @46 + %17.1 = add.l %13, 0x8_l ; @47 + st.l %16.1, %17.1 ; @48 jmp @29(%13) -@29(%0): - %23 = ld.l %0 ; @55 - %24 = add.l %0, 0x8_l ; @56 - %25 = ld.l %24 ; @57 - ret rax/%23, rdx/%25 +@29(%0.1): + %23.1 = ld.l %0.1 ; @55 + %24.1 = add.l %0.1, 0x8_l ; @56 + %25.1 = ld.l %24.1 ; @57 + ret rax/%23.1, rdx/%25.1 } diff --git a/src/test/data/opt/retmem.vir.opt.sysv b/src/test/data/opt/retmem.vir.opt.sysv index 5514c598..096623cd 100644 --- a/src/test/data/opt/retmem.vir.opt.sysv +++ b/src/test/data/opt/retmem.vir.opt.sysv @@ -1,25 +1,13 @@ module retmem export function $foo(l %3/rdi, l %a/rsi, l %b/rdx, l %c/rcx, l %d/r8) { - %x = slot 32, align 8 @2: - st.l %a, %x ; @3 - %0 = add.l %x, 0x8_l ; @11 - st.l %b, %0 ; @5 - %1 = add.l %x, 0x10_l ; @12 - st.l %c, %1 ; @7 - %2 = add.l %x, 0x18_l ; @13 - st.l %d, %2 ; @9 - %4 = ld.l %x ; @14 - st.l %4, %3 ; @15 - %6 = ld.l %0 ; @17 - %7 = add.l %3, 0x8_l ; @18 - st.l %6, %7 ; @19 - %9 = ld.l %1 ; @21 - %10 = add.l %3, 0x10_l ; @22 - st.l %9, %10 ; @23 - %12 = ld.l %2 ; @25 - %13 = add.l %3, 0x18_l ; @26 - st.l %12, %13 ; @27 + st.l %a, %3 ; @15 + %7.1 = add.l %3, 0x8_l ; @18 + st.l %b, %7.1 ; @19 + %10.1 = add.l %3, 0x10_l ; @22 + st.l %c, %10.1 ; @23 + %13.1 = add.l %3, 0x18_l ; @26 + st.l %d, %13.1 ; @27 ret rax/%3 } diff --git a/src/test/data/opt/storetoload1.vir.opt b/src/test/data/opt/storetoload1.vir.opt index a86e4b11..5ab4aaf0 100644 --- a/src/test/data/opt/storetoload1.vir.opt +++ b/src/test/data/opt/storetoload1.vir.opt @@ -3,10 +3,9 @@ module foo export function $foo() { %x = slot 16, align 16 @2: - %0 = add.l %x, 0x8_l ; @9 - %1 = and.l %0, 0xf_l ; @10 - %2 = itrunc.w %1 ; @11 - st.w %2, %0 ; @6 - st.w %2, $a ; @8 + %1 = add.l %x, 0x8_l ; @9 + %2 = and.l %1, 0xf_l ; @10 + %3 = itrunc.w %2 ; @11 + st.w %3, $a ; @8 ret } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv b/src/test/data/opt/vaarg1.vir.opt.sysv index 8c6f33d9..00c3a0e6 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv +++ b/src/test/data/opt/vaarg1.vir.opt.sysv @@ -4,89 +4,83 @@ export function $foo(b %11/rax, l %i/rdi, ...) { %ap = slot 24, align 8 %r = slot 16, align 8 %5 = slot 176, align 16 - %21 = slot 16, align 8 @19: - %6 = add.l %5, 0x8_l ; @20 - regstore rsi, %6 ; @21 - %7 = add.l %5, 0x10_l ; @22 - regstore rdx, %7 ; @23 - %8 = add.l %5, 0x18_l ; @24 - regstore rcx, %8 ; @25 - %9 = add.l %5, 0x20_l ; @26 - regstore r8, %9 ; @27 - %10 = add.l %5, 0x28_l ; @28 - regstore r9, %10 ; @29 - %12 = eq.b %11, 0x0_b ; @30 - br %12, @2, @18 + %6.1 = add.l %5, 0x8_l ; @20 + regstore rsi, %6.1 ; @21 + %7.1 = add.l %5, 0x10_l ; @22 + regstore rdx, %7.1 ; @23 + %8.1 = add.l %5, 0x18_l ; @24 + regstore rcx, %8.1 ; @25 + %9.1 = add.l %5, 0x20_l ; @26 + regstore r8, %9.1 ; @27 + %10.1 = add.l %5, 0x28_l ; @28 + regstore r9, %10.1 ; @29 + %12.1 = eq.b %11, 0x0_b ; @30 + br %12.1, @2, @18 @18: - %13 = add.l %5, 0x30_l ; @31 - regstore xmm0, %13 ; @32 - %14 = add.l %5, 0x40_l ; @33 - regstore xmm1, %14 ; @34 - %15 = add.l %5, 0x50_l ; @35 - regstore xmm2, %15 ; @36 - %16 = add.l %5, 0x60_l ; @37 - regstore xmm3, %16 ; @38 - %17 = add.l %5, 0x70_l ; @39 - regstore xmm4, %17 ; @40 - %18 = add.l %5, 0x80_l ; @41 - regstore xmm5, %18 ; @42 - %19 = add.l %5, 0x90_l ; @43 - regstore xmm6, %19 ; @44 - %20 = add.l %5, 0xa0_l ; @45 - regstore xmm7, %20 ; @46 + %13.1 = add.l %5, 0x30_l ; @31 + regstore xmm0, %13.1 ; @32 + %14.1 = add.l %5, 0x40_l ; @33 + regstore xmm1, %14.1 ; @34 + %15.1 = add.l %5, 0x50_l ; @35 + regstore xmm2, %15.1 ; @36 + %16.1 = add.l %5, 0x60_l ; @37 + regstore xmm3, %16.1 ; @38 + %17.1 = add.l %5, 0x70_l ; @39 + regstore xmm4, %17.1 ; @40 + %18.1 = add.l %5, 0x80_l ; @41 + regstore xmm5, %18.1 ; @42 + %19.1 = add.l %5, 0x90_l ; @43 + regstore xmm6, %19.1 ; @44 + %20.1 = add.l %5, 0xa0_l ; @45 + regstore xmm7, %20.1 ; @46 jmp @2 @2: st.w 0x8_w, %ap ; @53 - %26 = add.l %ap, 0x4_l ; @54 - st.w 0x30_w, %26 ; @55 - %27 = stkargs ; @56 - %28 = add.l %ap, 0x8_l ; @57 - st.l %27, %28 ; @58 - %29 = add.l %ap, 0x10_l ; @59 - st.l %5, %29 ; @60 + %26.1 = add.l %ap, 0x4_l ; @54 + st.w 0x30_w, %26.1 ; @55 + %27.1 = stkargs ; @56 + %28.1 = add.l %ap, 0x8_l ; @57 + st.l %27.1, %28.1 ; @58 + %29.1 = add.l %ap, 0x10_l ; @59 + st.l %5, %29.1 ; @60 jmp @62 @62: - %31 = ld.w %26 ; @68 - %32 = le.w %31, 0xa0_w ; @69 - br %32, @63, @65 + %31.1 = ld.w %26.1 ; @68 + %32.1 = le.w %31.1, 0xa0_w ; @69 + br %32.1, @63, @65 @63: - %33 = ld.w %ap ; @70 - %34 = le.w %33, 0x28_w ; @71 - br %34, @64, @65 + %33.1 = ld.w %ap ; @70 + %34.1 = le.w %33.1, 0x28_w ; @71 + br %34.1, @64, @65 @64: - %37 = zext.l %31 ; @74 - %38 = add.l %5, %37 ; @75 - %39 = zext.l %33 ; @76 - %40 = add.l %5, %39 ; @77 - %41 = add.w %31, 0x10_l ; @78 - %42 = add.w %33, 0x8_l ; @79 - st.w %41, %26 ; @80 - st.w %42, %ap ; @81 - jmp @66(%38, %40) + %37.1 = zext.l %31.1 ; @74 + %38.1 = add.l %5, %37.1 ; @75 + %39.1 = zext.l %33.1 ; @76 + %40.1 = add.l %5, %39.1 ; @77 + %41.1 = add.w %31.1, 0x10_l ; @78 + %42.1 = add.w %33.1, 0x8_l ; @79 + st.w %41.1, %26.1 ; @80 + st.w %42.1, %ap ; @81 + jmp @66(%38.1, %40.1) @65: - %44 = ld.l %28 ; @83 - %45 = add.l %44, 0x8_l ; @84 - %46 = add.l %44, 0x10_l ; @85 - st.l %46, %28 ; @86 - jmp @66(%44, %45) -@66(%47, %48): - %49 = ld.l %47 ; @87 - st.l %49, %21 ; @88 - %50 = ld.l %48 ; @89 - %51 = add.l %21, 0x8_l ; @90 - st.l %50, %51 ; @91 + %44.1 = ld.l %28.1 ; @83 + %45.1 = add.l %44.1, 0x8_l ; @84 + %46.1 = add.l %44.1, 0x10_l ; @85 + st.l %46.1, %28.1 ; @86 + jmp @66(%44.1, %45.1) +@66(%47.1, %48.1): + %49.1 = ld.l %47.1 ; @87 + %50.1 = ld.l %48.1 ; @89 jmp @61 @61: - %22 = ld.l %21 ; @47 - st.l %22, %r ; @48 - %24 = ld.l %51 ; @50 - %25 = add.l %r, 0x8_l ; @51 - st.l %24, %25 ; @52 + st.l %49.1, %r ; @48 + %25.1 = add.l %r, 0x8_l ; @51 + st.l %50.1, %25.1 ; @52 %f1.1 = ld.d %r ; @6 - %0 = add.d %f1.1, 1.234_d ; @13 - %2 = ftosi.d.l %0 ; @15 - %3 = add.l %24, %2 ; @16 - %4 = add.l %3, %i ; @17 - ret rax/%4 + %0.1 = add.d %f1.1, 1.234_d ; @13 + %2.1 = ftosi.d.l %0.1 ; @15 + %3.1 = add.l %50.1, %2.1 ; @16 + %4.1 = add.l %3.1, %i ; @17 + ret rax/%4.1 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 index 6320d075..0f74a027 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 @@ -4,7 +4,6 @@ export function $foo { ; returns: rax %ap = slot 24, align 8 %r = slot 16, align 8 %5 = slot 176, align 16 - %21 = slot 16, align 8 @19: mov %11:b, al ; @20 mov %i:l, rdi ; @121 @@ -29,58 +28,54 @@ export function $foo { ; returns: rax @2: mov dword ptr [%ap], 0x8_w ; @53 mov dword ptr [%ap + 0x4], 0x30_w ; @55 - lea %27:l, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [%ap + 0x8], %27:l ; @58 + lea %27.1:l, qword ptr [rbp + 0x10] ; @56 + mov qword ptr [%ap + 0x8], %27.1:l ; @58 mov qword ptr [%ap + 0x10], %5:l ; @60 jmp @62 ; @114 @62: - mov %31:w, dword ptr [%ap + 0x4] ; @68 - cmp %31:w, 0xa0_w ; @109 + mov %31.1:w, dword ptr [%ap + 0x4] ; @68 + cmp %31.1:w, 0xa0_w ; @109 jbe @63 ; @110 jmp @65 ; @111 @63: - mov %33:w, dword ptr [%ap] ; @70 - cmp %33:w, 0x28_w ; @104 + mov %33.1:w, dword ptr [%ap] ; @70 + cmp %33.1:w, 0x28_w ; @104 jbe @64 ; @105 jmp @65 ; @106 @65: - mov %44:l, qword ptr [%ap + 0x8] ; @83 - lea %45:l, qword ptr [%44 + 0x8] ; @84 - lea %46:l, qword ptr [%44 + 0x10] ; @85 - mov qword ptr [%ap + 0x8], %46:l ; @86 - mov %47:l, %44:l ; @101 - mov %48:l, %45:l ; @102 + mov %44.1:l, qword ptr [%ap + 0x8] ; @83 + lea %45.1:l, qword ptr [%44.1 + 0x8] ; @84 + lea %46.1:l, qword ptr [%44.1 + 0x10] ; @85 + mov qword ptr [%ap + 0x8], %46.1:l ; @86 + mov %47.1:l, %44.1:l ; @101 + mov %48.1:l, %45.1:l ; @102 jmp @66 ; @103 @64: - mov %37:w, %31:w ; @74 - lea %38:l, qword ptr [%5 + %37*1] ; @75 - mov %39:w, %33:w ; @76 - lea %40:l, qword ptr [%5 + %39*1] ; @77 - lea %41:w, qword ptr [%31 + 0x10] ; @78 - lea %42:w, qword ptr [%33 + 0x8] ; @79 - mov dword ptr [%ap + 0x4], %41:w ; @80 - mov dword ptr [%ap], %42:w ; @81 - mov %47:l, %38:l ; @98 - mov %48:l, %40:l ; @99 + mov %37.1:w, %31.1:w ; @74 + lea %38.1:l, qword ptr [%5 + %37.1*1] ; @75 + mov %39.1:w, %33.1:w ; @76 + lea %40.1:l, qword ptr [%5 + %39.1*1] ; @77 + lea %41.1:w, qword ptr [%31.1 + 0x10] ; @78 + lea %42.1:w, qword ptr [%33.1 + 0x8] ; @79 + mov dword ptr [%ap + 0x4], %41.1:w ; @80 + mov dword ptr [%ap], %42.1:w ; @81 + mov %47.1:l, %38.1:l ; @98 + mov %48.1:l, %40.1:l ; @99 jmp @66 ; @100 @66: - mov %49:l, qword ptr [%47] ; @87 - mov qword ptr [%21], %49:l ; @88 - mov %50:l, qword ptr [%48] ; @89 - mov qword ptr [%21 + 0x8], %50:l ; @91 + mov %49.1:l, qword ptr [%47.1] ; @87 + mov %50.1:l, qword ptr [%48.1] ; @89 jmp @61 ; @97 @61: - mov %22:l, qword ptr [%21] ; @47 - mov qword ptr [%r], %22:l ; @48 - mov %24:l, qword ptr [%21 + 0x8] ; @50 - mov qword ptr [%r + 0x8], %24:l ; @52 + mov qword ptr [%r], %49.1:l ; @48 + mov qword ptr [%r + 0x8], %50.1:l ; @52 movsd %f1.1:d, qword ptr [%r] ; @6 - movsd %0:d, %f1.1:d ; @13 - addsd %0:d, qword ptr [rip + @94] ; @95 + movsd %0.1:d, %f1.1:d ; @13 + addsd %0.1:d, qword ptr [rip + @94] ; @95 .fp64 @94, 1.234 ; @96 - cvtsd2si %2:l, %0:d ; @15 - lea %3:l, qword ptr [%24 + %2*1] ; @16 - lea %4:l, qword ptr [%3 + %i*1] ; @17 - mov rax, %4:l ; @92 + cvtsd2si %2.1:l, %0.1:d ; @15 + lea %3.1:l, qword ptr [%50.1 + %2.1*1] ; @16 + lea %4.1:l, qword ptr [%3.1 + %i*1] ; @17 + mov rax, %4.1:l ; @92 ret ; @93 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index 0815c0cb..0fcdf52b 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -4,62 +4,58 @@ export function $foo { ; returns: rax @19: push rbp ; @124 mov rbp, rsp ; @125 - sub rsp, 0xf0_l ; @126 - mov qword ptr [rbp - 0xe8], rsi ; @21 - mov qword ptr [rbp - 0xe0], rdx ; @23 - mov qword ptr [rbp - 0xd8], rcx ; @25 - mov qword ptr [rbp - 0xd0], r8 ; @27 - mov qword ptr [rbp - 0xc8], r9 ; @29 + sub rsp, 0xe0_l ; @126 + mov qword ptr [rbp - 0xd8], rsi ; @21 + mov qword ptr [rbp - 0xd0], rdx ; @23 + mov qword ptr [rbp - 0xc8], rcx ; @25 + mov qword ptr [rbp - 0xc0], r8 ; @27 + mov qword ptr [rbp - 0xb8], r9 ; @29 test al, al ; @116 je @2 ; @117 @18: - movdqa xmmword ptr [rbp - 0xc0], xmm0 ; @32 - movdqa xmmword ptr [rbp - 0xb0], xmm1 ; @34 - movdqa xmmword ptr [rbp - 0xa0], xmm2 ; @36 - movdqa xmmword ptr [rbp - 0x90], xmm3 ; @38 - movdqa xmmword ptr [rbp - 0x80], xmm4 ; @40 - movdqa xmmword ptr [rbp - 0x70], xmm5 ; @42 - movdqa xmmword ptr [rbp - 0x60], xmm6 ; @44 - movdqa xmmword ptr [rbp - 0x50], xmm7 ; @46 + movdqa xmmword ptr [rbp - 0xb0], xmm0 ; @32 + movdqa xmmword ptr [rbp - 0xa0], xmm1 ; @34 + movdqa xmmword ptr [rbp - 0x90], xmm2 ; @36 + movdqa xmmword ptr [rbp - 0x80], xmm3 ; @38 + movdqa xmmword ptr [rbp - 0x70], xmm4 ; @40 + movdqa xmmword ptr [rbp - 0x60], xmm5 ; @42 + movdqa xmmword ptr [rbp - 0x50], xmm6 ; @44 + movdqa xmmword ptr [rbp - 0x40], xmm7 ; @46 @2: - mov dword ptr [rbp - 0x40], 0x8_w ; @53 - mov dword ptr [rbp - 0x3c], 0x30_w ; @55 + mov dword ptr [rbp - 0x30], 0x8_w ; @53 + mov dword ptr [rbp - 0x2c], 0x30_w ; @55 lea rax, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [rbp - 0x38], rax ; @58 - lea rax, qword ptr [rbp - 0xf0] ; @60 - mov qword ptr [rbp - 0x30], rax ; @123 + mov qword ptr [rbp - 0x28], rax ; @58 + lea rax, qword ptr [rbp - 0xe0] ; @60 + mov qword ptr [rbp - 0x20], rax ; @123 @62: - mov esi, dword ptr [rbp - 0x3c] ; @68 + mov esi, dword ptr [rbp - 0x2c] ; @68 cmp esi, 0xa0_w ; @109 ja @65 ; @110 @63: - mov edx, dword ptr [rbp - 0x40] ; @70 + mov edx, dword ptr [rbp - 0x30] ; @70 cmp edx, 0x28_w ; @104 jbe @64 ; @105 @65: - mov rcx, qword ptr [rbp - 0x38] ; @83 - lea rax, qword ptr [rcx + 0x8] ; @84 - lea rdx, qword ptr [rcx + 0x10] ; @85 - mov qword ptr [rbp - 0x38], rdx ; @86 + mov rax, qword ptr [rbp - 0x28] ; @83 + lea rcx, qword ptr [rax + 0x8] ; @84 + lea rdx, qword ptr [rax + 0x10] ; @85 + mov qword ptr [rbp - 0x28], rdx ; @86 jmp @66 ; @103 @64: - lea rcx, qword ptr [rbp + rsi*1 - 0xf0] ; @75 - lea rax, qword ptr [rbp + rdx*1 - 0xf0] ; @77 + lea rax, qword ptr [rbp + rsi*1 - 0xe0] ; @75 + lea rcx, qword ptr [rbp + rdx*1 - 0xe0] ; @77 add esi, 0x10_w ; @78 add edx, 0x8_w ; @79 - mov dword ptr [rbp - 0x3c], esi ; @80 - mov dword ptr [rbp - 0x40], edx ; @81 + mov dword ptr [rbp - 0x2c], esi ; @80 + mov dword ptr [rbp - 0x30], edx ; @81 @66: - mov rcx, qword ptr [rcx] ; @87 - mov qword ptr [rbp - 0x18], rcx ; @88 - mov rax, qword ptr [rax] ; @89 - mov qword ptr [rbp - 0x10], rax ; @91 + mov rax, qword ptr [rax] ; @87 + mov rcx, qword ptr [rcx] ; @89 @61: - mov rax, qword ptr [rbp - 0x18] ; @47 - mov qword ptr [rbp - 0x28], rax ; @48 - mov rcx, qword ptr [rbp - 0x10] ; @50 - mov qword ptr [rbp - 0x20], rcx ; @52 - movsd xmm0, qword ptr [rbp - 0x28] ; @6 + mov qword ptr [rbp - 0x18], rax ; @48 + mov qword ptr [rbp - 0x10], rcx ; @52 + movsd xmm0, qword ptr [rbp - 0x18] ; @6 addsd xmm0, qword ptr [rip + @94] ; @95 .fp64 @94, 1.234 ; @96 cvtsd2si rax, xmm0 ; @15 diff --git a/src/test/data/opt/vaarg2.driver.sysv.amd64.c b/src/test/data/opt/vaarg2.driver.sysv.amd64.c new file mode 100644 index 00000000..4abf0674 --- /dev/null +++ b/src/test/data/opt/vaarg2.driver.sysv.amd64.c @@ -0,0 +1,16 @@ +#include + +extern long foo(long i, ...); +extern char bar(char b, ...); /* NB: bar actually has no named arguments */ + +struct t { + long l1; + long l2; +}; + +int main() { + struct t s = {10, 32}; + assert(foo(5, s) == 47); + assert(!bar(0)); + assert(bar(1)); +} diff --git a/src/test/data/opt/vaarg2.vir.opt.sysv b/src/test/data/opt/vaarg2.vir.opt.sysv index c6f36e3d..d860794a 100644 --- a/src/test/data/opt/vaarg2.vir.opt.sysv +++ b/src/test/data/opt/vaarg2.vir.opt.sysv @@ -2,83 +2,72 @@ module vaarg2 export function $foo(b %9/rax, l %i/rdi, ...) { %ap = slot 24, align 8 - %r = slot 16, align 8 %3 = slot 176, align 16 - %19 = slot 16, align 8 @18: - %4 = add.l %3, 0x8_l ; @19 - regstore rsi, %4 ; @20 - %5 = add.l %3, 0x10_l ; @21 - regstore rdx, %5 ; @22 - %6 = add.l %3, 0x18_l ; @23 - regstore rcx, %6 ; @24 - %7 = add.l %3, 0x20_l ; @25 - regstore r8, %7 ; @26 - %8 = add.l %3, 0x28_l ; @27 - regstore r9, %8 ; @28 - %10 = eq.b %9, 0x0_b ; @29 - br %10, @2, @17 + %4.1 = add.l %3, 0x8_l ; @19 + regstore rsi, %4.1 ; @20 + %5.1 = add.l %3, 0x10_l ; @21 + regstore rdx, %5.1 ; @22 + %6.1 = add.l %3, 0x18_l ; @23 + regstore rcx, %6.1 ; @24 + %7.1 = add.l %3, 0x20_l ; @25 + regstore r8, %7.1 ; @26 + %8.1 = add.l %3, 0x28_l ; @27 + regstore r9, %8.1 ; @28 + %10.1 = eq.b %9, 0x0_b ; @29 + br %10.1, @2, @17 @17: - %11 = add.l %3, 0x30_l ; @30 - regstore xmm0, %11 ; @31 - %12 = add.l %3, 0x40_l ; @32 - regstore xmm1, %12 ; @33 - %13 = add.l %3, 0x50_l ; @34 - regstore xmm2, %13 ; @35 - %14 = add.l %3, 0x60_l ; @36 - regstore xmm3, %14 ; @37 - %15 = add.l %3, 0x70_l ; @38 - regstore xmm4, %15 ; @39 - %16 = add.l %3, 0x80_l ; @40 - regstore xmm5, %16 ; @41 - %17 = add.l %3, 0x90_l ; @42 - regstore xmm6, %17 ; @43 - %18 = add.l %3, 0xa0_l ; @44 - regstore xmm7, %18 ; @45 + %11.1 = add.l %3, 0x30_l ; @30 + regstore xmm0, %11.1 ; @31 + %12.1 = add.l %3, 0x40_l ; @32 + regstore xmm1, %12.1 ; @33 + %13.1 = add.l %3, 0x50_l ; @34 + regstore xmm2, %13.1 ; @35 + %14.1 = add.l %3, 0x60_l ; @36 + regstore xmm3, %14.1 ; @37 + %15.1 = add.l %3, 0x70_l ; @38 + regstore xmm4, %15.1 ; @39 + %16.1 = add.l %3, 0x80_l ; @40 + regstore xmm5, %16.1 ; @41 + %17.1 = add.l %3, 0x90_l ; @42 + regstore xmm6, %17.1 ; @43 + %18.1 = add.l %3, 0xa0_l ; @44 + regstore xmm7, %18.1 ; @45 jmp @2 @2: st.w 0x8_w, %ap ; @52 - %24 = add.l %ap, 0x4_l ; @53 - st.w 0x30_w, %24 ; @54 - %25 = stkargs ; @55 - %26 = add.l %ap, 0x8_l ; @56 - st.l %25, %26 ; @57 - %27 = add.l %ap, 0x10_l ; @58 - st.l %3, %27 ; @59 + %24.1 = add.l %ap, 0x4_l ; @53 + st.w 0x30_w, %24.1 ; @54 + %25.1 = stkargs ; @55 + %26.1 = add.l %ap, 0x8_l ; @56 + st.l %25.1, %26.1 ; @57 + %27.1 = add.l %ap, 0x10_l ; @58 + st.l %3, %27.1 ; @59 jmp @61 @61: - %28 = ld.w %ap ; @65 - %29 = le.w %28, 0x20_w ; @66 - br %29, @62, @63 + %28.1 = ld.w %ap ; @65 + %29.1 = le.w %28.1, 0x20_w ; @66 + br %29.1, @62, @63 @62: - %32 = zext.l %28 ; @69 - %33 = add.l %3, %32 ; @70 - %34 = add.w %28, 0x10_w ; @71 - st.w %34, %ap ; @72 - jmp @64(%33) + %32.1 = zext.l %28.1 ; @69 + %33.1 = add.l %3, %32.1 ; @70 + %34.1 = add.w %28.1, 0x10_w ; @71 + st.w %34.1, %ap ; @72 + jmp @64(%33.1) @63: - %36 = ld.l %26 ; @74 - %37 = add.l %36, 0x10_l ; @75 - st.l %37, %26 ; @76 - jmp @64(%36) -@64(%38): - %39 = ld.l %38 ; @77 - st.l %39, %19 ; @78 - %40 = add.l %38, 0x8_l ; @79 - %41 = ld.l %40 ; @80 - %42 = add.l %19, 0x8_l ; @81 - st.l %41, %42 ; @82 + %36.1 = ld.l %26.1 ; @74 + %37.1 = add.l %36.1, 0x10_l ; @75 + st.l %37.1, %26.1 ; @76 + jmp @64(%36.1) +@64(%38.1): + %39.1 = ld.l %38.1 ; @77 + %40.1 = add.l %38.1, 0x8_l ; @79 + %41.1 = ld.l %40.1 ; @80 jmp @60 @60: - %20 = ld.l %19 ; @46 - st.l %20, %r ; @47 - %22 = ld.l %42 ; @49 - %23 = add.l %r, 0x8_l ; @50 - st.l %22, %23 ; @51 - %f1.1 = ld.l %r ; @6 - %1 = add.l %f1.1, %22 ; @15 - %2 = add.l %1, %i ; @16 - ret rax/%2 + %1.1 = add.l %39.1, %41.1 ; @15 + %2.1 = add.l %1.1, %i ; @16 + ret rax/%2.1 } export function $bar(b %49/rax, ...) { diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index 08b24787..cfb97c78 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -430,6 +430,7 @@ let native_suite = "Test native code" >::: [ "Quicksort, swap inlined (SysV AMD64)" >:: test_sysv_amd64_native "qsort_inline_swap"; "Spill test 1 (SysV AMD64)" >:: test_sysv_amd64_native "spill1"; "Variadic function arguments 1 (SysV AMD64)" >:: test_sysv_amd64_native "vaarg1"; + "Variadic function arguments 2 (SysV AMD64)" >:: test_sysv_amd64_native "vaarg2"; "Palindrome (SysV AMD64)" >:: test_sysv_amd64_native "palindrome"; "Integer pow (SysV AMD64)" >:: test_sysv_amd64_native "int_pow"; "AND test (SysV AMD64)" >:: test_sysv_amd64_native "and_test"; From 62bf90e34528eca1fa85eb6201b70038fd5fcc6d Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 15:25:17 -0500 Subject: [PATCH 05/62] `def_of_op` for `Abi.Insn` --- src/lib/virtual/abi/abi_insn.ml | 4 +++- src/lib/virtual/virtual.mli | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lib/virtual/abi/abi_insn.ml b/src/lib/virtual/abi/abi_insn.ml index b4bdf4b2..c2f3d18e 100644 --- a/src/lib/virtual/abi/abi_insn.ml +++ b/src/lib/virtual/abi/abi_insn.ml @@ -171,7 +171,7 @@ module Tag = struct "non-tail" (module Unit) end -let def i = match i.op with +let def_of_op = function | `bop (x, _, _, _) | `uop (x, _, _) | `sel (x, _, _, _, _) @@ -180,3 +180,5 @@ let def i = match i.op with | `stkargs x -> Var.Set.singleton x | `store _ | `regstore _ | `regassign _ -> Var.Set.empty | `call (xs, _, _) -> Var.Set.of_list @@ List.map xs ~f:fst3 + +let def i = def_of_op i.op diff --git a/src/lib/virtual/virtual.mli b/src/lib/virtual/virtual.mli index 23a3aee7..49a00e3a 100644 --- a/src/lib/virtual/virtual.mli +++ b/src/lib/virtual/virtual.mli @@ -693,7 +693,10 @@ module Abi : sig (** Transforms the underlying operation. *) val map : t -> f:(op -> op) -> t - (** Returns the set of defined variables of the instruction. *) + (** Returns the set of defined variables of the underlying op. *) + val def_of_op : op -> Var.Set.t + + (** Equivalent to [def_of_op (op i)] *) val def : t -> Var.Set.t (** Same as [pp_op]. *) From 6470f005ae9254e1890d20e8a484b6c58b514439 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 15:25:31 -0500 Subject: [PATCH 06/62] style --- src/lib/passes/passes.ml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index 526b24c5..db6b1ff1 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -21,8 +21,7 @@ let initialize m = !!(tenv, m) let retype tenv m = - Module.funs m |> - Seq.to_list |> Typecheck.update_fns tenv + Module.funs m |> Seq.to_list |> Typecheck.update_fns tenv let optimize tenv m = let module Cv = Context.Virtual in From b8a1add0fe5f2b81fde62ed9019896b9d19a0e2c Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 15:26:09 -0500 Subject: [PATCH 07/62] Some cleanup --- src/lib/passes/sroa/sroa.ml | 90 +++++--- src/lib/passes/sroa/sroa_impl.ml | 359 +++++++++++++++++-------------- 2 files changed, 259 insertions(+), 190 deletions(-) diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index a0953f17..45d11dec 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -1,4 +1,5 @@ open Core +open Regular.Std open Virtual open Sroa_impl @@ -6,6 +7,7 @@ let var_set_of_option = function | Some x -> Var.Set.singleton x | None -> Var.Set.empty +(* Instructions that can produce a scalar `Offset` value. *) let offset = function | `bop (_, `add _, `var x, `int (i, _)) | `bop (_, `add _, `int (i, _), `var x) -> @@ -14,7 +16,11 @@ let offset = function Some (x, Int64.neg @@ Bv.to_int64 i) | _ -> None -(* TODO: more cases? *) +(* Instructions that behave like `copy` instead of causing + a variable to "escape". + + TODO: more cases? +*) let copy_of = function | `uop (_, `copy _, `var x) -> Some x | `bop (_, `add _, `var x, `int (i, _)) @@ -33,14 +39,14 @@ let escapes mty fv = function | o -> fv o [@@specialise] -let load_or_store_to o = match o with +let load_or_store_to = function | `load (_, (#Type.basic as b), `var x) -> Some (x, b, Load) | `store ((#Type.basic as b), _, `var x) -> Some (x, b, Store) | _ -> None -let subst_load_or_store o ~f = match o with - | `load (x, (#Type.basic as b), `var a) -> `load (x, b, `var (f a)) - | `store ((#Type.basic as b), v, `var a) -> `store (b, v, `var (f a)) +let replace_load_or_store_addr a = function + | `load (x, (#Type.basic as b), _) -> `load (x, b, `var a) + | `store ((#Type.basic as b), v, _) -> `store (b, v, `var a) | op -> op let add x ty b o = @@ -52,31 +58,68 @@ let is_named = function | `name _ -> true | _ -> false +let local l args = l, List.filter_map args ~f:var_of_operand + +let table enum d ds tbl = + enum tbl |> Seq.map ~f:snd |> + Seq.map ~f:(fun (`label (l, args)) -> local l args) |> + Seq.to_list |> List.cons (local d ds) + +let locals enum = function + | `hlt -> [] + | `jmp #global -> [] + | `jmp `label (l, args) -> + [local l args] + | `br (_, #global, #global) -> [] + | `br (_, `label (y, ys), #global) -> + [local y ys] + | `br (_, #global, `label (n, ns)) -> + [local n ns] + | `br (_, `label (y, ys), `label (n, ns)) -> + [local y ys; local n ns] + | `ret _ -> [] + | `sw (_, _, `label (d, ds), tbl) -> + table enum d ds tbl + +(* XXX: we don't yet have a story for how to handle block + parameters. *) +let escapes_ctrl fv = function + | `hlt -> Var.Set.empty + | `jmp `var x -> Var.Set.singleton x + | `jmp _ -> Var.Set.empty + | `br (c, `var y, `var n) -> Var.Set.of_list [c; y; n] + | `br (c, _, `var n) -> Var.Set.of_list [c; n] + | `br (c, `var y, _) -> Var.Set.of_list [c; y] + | `br (c, _, _) -> Var.Set.singleton c + | `ret _ as c -> fv c + | `sw (_, `var i, _, _) -> Var.Set.singleton i + | `sw _ -> Var.Set.empty +[@@ocaml.warning "-32"] + module V = Make(struct module Insn = struct type t = Insn.t type op = Insn.op - let create ~label op = Insn.create op ~label let with_op = Insn.with_op - let op = Insn.op let label = Insn.label - let lhs = Insn.lhs_of_op + let lhs = var_set_of_option @. Insn.lhs_of_op let offset = (offset :> op -> _) let copy_of = (copy_of :> op -> _) let fv = Insn.free_vars_of_op let escapes = (escapes is_named fv :> op -> _) let load_or_store_to = (load_or_store_to :> op -> _) - let subst_load_or_store ~f = (subst_load_or_store ~f :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) let add x ty b o = (add x ty b o :> op) end - module Ctrl = struct type t = ctrl - let escapes = Ctrl.free_vars + let fv = Ctrl.free_vars + let escapes = fv + (* let escapes = (escapes_ctrl fv :> t -> _) *) + let locals = (locals Ctrl.Table.enum :> t -> _) end - module Blk = Blk module Func = Func module Cfg = Cfg @@ -84,40 +127,29 @@ module V = Make(struct module A = Make(struct open Abi - module Insn = struct type t = Insn.t type op = Insn.op - let create ~label op = Insn.create op ~label let with_op = Insn.with_op - let op = Insn.op let label = Insn.label - - let lhs = function - | `bop (x, _, _, _) - | `uop (x, _, _) - | `sel (x, _, _, _, _) - | `load (x, _, _) - -> Some x - | _ -> None - + let lhs = Insn.def_of_op let offset = (offset :> op -> _) let copy_of = (copy_of :> op -> _) let fv = Insn.free_vars_of_op let escapes = (escapes (const false) fv :> op -> _) let load_or_store_to = (load_or_store_to :> op -> _) - let subst_load_or_store ~f = (subst_load_or_store ~f :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) let add x ty b o = (add x ty b o :> op) end - module Ctrl = struct type t = ctrl - (* TODO: see if we can relax passing of block params *) - let escapes = Ctrl.free_vars + let fv = Ctrl.free_vars + let escapes = fv + (* let escapes = (escapes_ctrl fv :> t -> _) *) + let locals = (locals Ctrl.Table.enum :> t -> _) end - module Blk = Blk module Func = Func module Cfg = Cfg diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 1ef4c0bc..f21420f7 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -14,6 +14,9 @@ open Graphlib.Std let debug = false +let (@.) = Fn.compose +let (@<) = Fn.flip + (* A scalar access. *) module Scalar = struct module T = struct @@ -40,30 +43,33 @@ type value = let pp_value ppf = function | Top -> Format.fprintf ppf "\u{22a4}" - | Offset (slot, offset) -> Format.fprintf ppf "%a+0x%Lx" Var.pp slot offset + | Offset (slot, offset) -> + let neg = Int64.is_negative offset in + let pre, off = if neg then '-', Int64.neg offset else '+', offset in + Format.fprintf ppf "%a%c0x%Lx" Var.pp slot pre off let pp_bot ppf () = Format.fprintf ppf "\u{22a5}" [@@ocaml.warning "-32"] module Value = struct type t = value [@@deriving equal, sexp] - - (* Join two abstract values. *) let merge a b = match a, b with | Offset s1, Offset s2 when equal_scalar s1 s2 -> a - | Offset _, Offset _ -> Top - | Top, _ | _, Top -> Top + | _ -> Top end type slots = Virtual.slot Var.Map.t module State : sig type t = value Var.Map.t [@@deriving equal, sexp] + val empty : t val merge : t -> t -> t val derive : slots -> t -> Var.t -> int64 -> value option end = struct (* NB: the keys are the LHS of a given instruction *) type t = value Var.Map.t [@@deriving equal, sexp] + let empty = Var.Map.empty + let merge a b = Map.merge_skewed a b ~combine:(fun ~key:_ a b -> Value.merge a b) @@ -74,6 +80,7 @@ end = struct Int64.(offset >= size) | None -> false + (* Normalize the scalar referred to by `ptr` and `offset`. *) let derive slots s ptr offset = match Map.find s ptr with | (Some Top | None) as v -> v | Some Offset (ptr', offset') -> @@ -89,7 +96,7 @@ type state = State.t [@@deriving equal, sexp] let pp_state ppf s = let pp_sep ppf () = Format.fprintf ppf "@ " in - let pp_elt ppf (x, v) = Format.fprintf ppf "%a[%a]" Var.pp x pp_value v in + let pp_elt ppf (x, v) = Format.fprintf ppf "(%a@ %a)" Var.pp x pp_value v in let pp_elts = Format.pp_print_list ~pp_sep pp_elt in Format.fprintf ppf "@[%a@]" pp_elts @@ Map.to_alist s [@@ocaml.warning "-32"] @@ -116,14 +123,14 @@ module type L = sig val label : t -> Label.t (* Used during analysis. *) - val lhs : op -> Var.t option + val lhs : op -> Var.Set.t val offset : op -> scalar option val copy_of : op -> Var.t option val escapes : op -> Var.Set.t (* Used during replacement. *) val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option - val subst_load_or_store : f:(Var.t -> Var.t) -> op -> op + val replace_load_or_store_addr : Var.t -> op -> op val with_op : t -> op -> t val add : Var.t -> Type.imm_base -> Var.t -> int64 -> op end @@ -131,14 +138,15 @@ module type L = sig module Ctrl : sig type t val escapes : t -> Var.Set.t + val locals : t -> (Label.t * Var.t list) list end module Blk : sig type t val label : t -> Label.t + val args : ?rev:bool -> t -> Var.t seq val insns : ?rev:bool -> t -> Insn.t seq val ctrl : t -> Ctrl.t - val map_insns : t -> f:(Label.t -> Insn.op -> Insn.op) -> t val with_insns : t -> Insn.t list -> t end @@ -147,7 +155,6 @@ module type L = sig val slots : ?rev:bool -> t -> Virtual.slot seq val blks : ?rev:bool -> t -> Blk.t seq val map_of_blks : t -> Blk.t Label.Tree.t - val map_blks : t -> f:(Blk.t -> Blk.t) -> t val with_blks : t -> Blk.t list -> t Or_error.t val insert_slot : t -> Virtual.slot -> t end @@ -163,6 +170,8 @@ module Make(M : L) : sig end = struct open M + (* Set all known scalars to `Top` according to `f`, which is the + set of variables that escape. *) let escaping f x s = Set.fold (f x) ~init:s ~f:(fun s v -> match Map.find s v with @@ -170,36 +179,61 @@ end = struct Map.set s ~key:ptr ~data:Top | Some _ | None -> s) + (* Transfer function for a single instruction. *) let transfer_op slots s op = let value = match Insn.offset op with | Some (ptr, offset) -> State.derive slots s ptr offset | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in - let s = match value, Insn.lhs op with - | Some v, Some lhs -> Map.set s ~key:lhs ~data:v - | None, _ | _, None -> s in + let s = match value with + | None -> s + | Some v -> + Insn.lhs op |> Set.fold ~init:s + ~f:(fun s key -> Map.set s ~key ~data:v) in escaping Insn.escapes op s + let blkargs blks (l, xs) = + Label.Tree.find blks l |> + Option.value_map ~default:[] ~f:(fun b -> + let args = Seq.to_list @@ Blk.args b in + match List.zip xs args with + | Unequal_lengths -> [] + | Ok args' -> args') + + (* Transfer for control-flow instruction. *) + let transfer_ctrl blks s c = + let init = escaping Ctrl.escapes c s in + (* Propagate the block parameters we are passing. *) + Ctrl.locals c |> List.bind ~f:(blkargs blks) |> + List.fold ~init ~f:(fun acc (src, dst) -> + if Var.(src = dst) then acc + else match Map.find acc src with + | Some v -> Map.set acc ~key:dst ~data:v + | None -> acc) + + (* Transfer function for a block. *) let transfer slots blks l s = Label.Tree.find blks l |> Option.value_map ~default:s ~f:(fun b -> Blk.insns b |> Seq.map ~f:Insn.op |> Seq.fold ~init:s ~f:(transfer_op slots) |> - escaping Ctrl.escapes (Blk.ctrl b)) + transfer_ctrl blks @< Blk.ctrl b) + (* Initial constraints. *) let initialize slots blks = (* Set all slots to point to their own base address. *) - let slots = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in - (* Any slot that escapes should immediately be set to `Top`. *) - let slots = - Label.Tree.fold blks ~init:slots ~f:(fun ~key:_ ~data init -> - Blk.insns data |> Seq.fold ~init ~f:(fun s i -> - escaping Insn.escapes (Insn.op i) s) |> - escaping Ctrl.escapes (Blk.ctrl data)) in - (* Start at pseudoentry. *) - let nodes = Label.Map.singleton Label.pseudoentry slots in - Solution.create nodes Var.Map.empty - - let analyze slots fn = + let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in + (* Any slot that directly escapes should immediately be set to `Top`. *) + Label.Tree.fold blks ~init ~f:(fun ~key:_ ~data init -> + Blk.insns data |> Seq.fold ~init ~f:(fun s i -> + escaping Insn.escapes (Insn.op i) s) |> + escaping Ctrl.escapes (Blk.ctrl data)) |> + Label.Map.singleton Label.pseudoentry |> + Solution.create @< State.empty + + type solution = (Label.t, state) Solution.t + + (* Run the dataflow analysis. *) + let analyze slots fn : solution = let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in Graphlib.fixpoint (module Cfg) cfg @@ -209,14 +243,11 @@ end = struct ~merge:State.merge ~f:(transfer slots blks) + (* All slots mapped to their names. *) let collect_slots fn = Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> Map.set acc ~key:(Virtual.Slot.var s) ~data:s) - let is_base s offset size = match offset with - | 0L -> Virtual.Slot.size s = size - | _ -> false - (* A memory access for a slot. *) type access = { insn : Insn.t; @@ -225,16 +256,26 @@ end = struct ldst : load_or_store; } - let cmp_access a b = Int64.compare a.off b.off + type accesses = access list Var.Map.t + + let basic_size ty = Type.sizeof_basic ty / 8 + let sizeof_access a = basic_size a.ty + + let cmp_access a b = + match Int64.compare a.off b.off with + | 0 -> Int.compare (sizeof_access a) (sizeof_access b) + | c -> c let pp_access ppf a = - Format.fprintf ppf "%a[%a.%a +0x%Lx]" + let neg = Int64.is_negative a.off in + let pre, off = if neg then '-', Int64.neg a.off else '+', a.off in + Format.fprintf ppf "(%a %a.%a %c0x%Lx)" Label.pp (Insn.label a.insn) pp_load_or_store a.ldst Type.pp_basic a.ty - a.off + pre off - let collect_accesses slots fn s : access list Var.Map.t = + let collect_accesses slots fn (s : solution) : accesses = (* Group all memory accesses by their corresponding slot. *) Func.blks fn |> Seq.fold ~init:Var.Map.empty ~f:(fun init b -> let s = ref @@ Solution.get s @@ Blk.label b in @@ -251,15 +292,15 @@ end = struct (* Filter out slots that are not splittable. *) Map.map ~f:(List.sort ~compare:cmp_access) |> Map.filteri ~f:(fun ~key ~data -> + let check x y= + let sx = sizeof_access x in + (* No partial overlaps. *) + Int64.(x.off + of_int sx <= y.off) || + (* Allow exact re-use of the same region. *) + cmp_access x y = 0 in let rec ok = function - | [] | [_] -> true - | x :: y :: rest -> - let sz = Type.sizeof_basic x.ty / 8 in - ((* No partial overlaps. *) - Int64.(x.off + of_int sz <= y.off) || - (* Allow exact re-use of the same region. *) - Int64.(x.off = y.off) && Type.equal_basic x.ty y.ty - ) && ok (y :: rest) in + | x :: ((y :: _) as xs) -> check x y && ok xs + | [] | [_] -> true in let res = ok data in if debug && not res then Format.eprintf "filtering out accesses for %a\n%!" Var.pp key; @@ -268,31 +309,48 @@ end = struct let overlaps oa sa ob sb = Int64.(oa < ob + of_int sb && ob < oa + of_int sa) + let within oa sa ob sb = + Int64.(oa >= ob && oa + of_int sa <= ob + of_int sb) + + (* A partition of memory accesses at a particular offset+size range. *) type partition = { off : int64; size : int; mems : access list; } + type partitions = partition list Var.Map.t + let cmp_partition a b = Int64.compare a.off b.off + (* Check if a partition covers the entire slot `s`. *) + let is_entire_slot s p = match p.off with + | 0L -> Virtual.Slot.size s = p.size + | _ -> false + let pp_partition ppf p = - Format.fprintf ppf "0x%Lx:%d: (%a)" + Format.fprintf ppf "0x%Lx:%d: @[%a@]" p.off p.size (Format.pp_print_list - ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") + ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_access) p.mems - let partition_acesses m : partition list Var.Map.t = + (* Sort the memory accesses into self-contained, non-overlapping + partitions, which are the fully-or-partially scalarized sub-objects + of the aggregate. *) + let partition_acesses m : partitions = let rec merge acc c = function | [] -> List.sort (c :: acc) ~compare:cmp_partition | x :: xs -> - let sx = Type.sizeof_basic x.ty / 8 in - if overlaps c.off c.size x.off sx then + let sx = sizeof_access x in + if Int64.(c.off = x.off) && c.size = sx then + (* Access exactly matches the current partition. *) + merge acc {c with mems = x :: c.mems} xs + else if overlaps c.off c.size x.off sx then + (* Access overlaps with current partition, so the partition + must increase in size. *) let o' = Int64.min c.off x.off in - let ec = Int64.(c.off + of_int c.size) in - let ex = Int64.(x.off + of_int sx) in - let e' = Int64.max ec ex in + let e' = Int64.(max (c.off + of_int c.size) (x.off + of_int sx)) in let s' = Int64.(to_int_exn (e' - o')) in merge acc { off = o'; @@ -300,151 +358,130 @@ end = struct mems = x :: c.mems; } xs else + (* No overlap, so we start a new partition. *) merge (c :: acc) { off = x.off; size = sx; mems = [x]; } xs in - Map.map m ~f:(fun (accesses : access list) -> - List.sort accesses ~compare:(fun a b -> - Int64.compare a.off b.off) |> function + (* pre: each access list is sorted *) + Map.map m ~f:(function | [] -> [] | (x : access) :: xs -> merge [] { off = x.off; - size = Type.sizeof_basic x.ty / 8; + size = sizeof_access x; mems = [x]; } xs) (* Turn each partition into a concrete slot. *) - let materialize_partitions slots m = + let materialize_partitions slots parts : scalars Context.t = + Map.to_sequence parts |> Seq.filter_map ~f:(fun (base, ps) -> + Map.find slots base |> Option.map ~f:(fun s -> base, ps, s)) |> + Context.Seq.fold ~init:Scalar.Map.empty ~f:(fun init (base, ps, s) -> + Seq.of_list ps |> Seq.filter ~f:(not @. is_entire_slot s) |> + Context.Seq.fold ~init ~f:(fun acc p -> + let open Context.Syntax in + (* TODO: look through `p.mems` and see if there is a store + that is larger than other acesses (i.e. `st.l` followed + by one or more `ld.w`). If so, this partition could be + broken down further if we modify the store instruction(s). *) + let* x = Context.Var.fresh in + let*? s = Virtual.Slot.create x ~size:p.size ~align:p.size in + if debug then + Format.eprintf "new slot %a, base=%a, off=0x%Lx, size=%d\n%!" + Var.pp x Var.pp base p.off p.size; + !!(Map.set acc ~key:(base, p.off) ~data:s))) + + (* Find the corresponding partition for [base+off, base+off+size). *) + let find_partition (parts : partitions) base off size = + Map.find parts base |> + Option.bind ~f:(List.find ~f:(fun p -> + within off size p.off p.size)) + + (* Exact cover for a scalar at `base + off`. *) + let rewrite_insn_exact (m : scalars) i ~exact ~base ~off = let open Context.Syntax in - Map.to_sequence m |> Context.Seq.fold - ~init:Scalar.Map.empty ~f:(fun init (base, ps) -> - let s = Map.find_exn slots base in - List.filter ps ~f:(fun p -> - not @@ is_base s p.off p.size) |> - Context.List.fold ~init ~f:(fun acc p -> - let* x = Context.Var.fresh in - let*? s = Virtual.Slot.create x ~size:p.size ~align:p.size in - if debug then - Format.eprintf "new slot %a, base=%a, off=0x%Lx, size=%d\n%!" - Var.pp x Var.pp base p.off p.size; - !!(Map.set acc ~key:(base, p.off) ~data:s))) - - let cover_exact m base off size = - Map.find m base |> Option.bind ~f:(fun ps -> - let rec go o r acc = function - | _ when r <= 0 -> Some (List.rev acc) - | [] -> None - | p :: ps -> - let pe = Int64.(p.off + of_int p.size) in - let re = Int64.(o + of_int r) in - if Int64.(p.off <= o && re <= pe) then - (* Request satisfies entire partition. *) - Some (List.rev (p.off :: acc)) - else if Int64.(p.off = o) && p.size <= r then - (* Request is partly covered by the partition. *) - let o' = Int64.(o + of_int p.size) in - go o' (r - p.size) (p.off :: acc) ps - else if Int64.(p.off < o) then - (* Partition starts before this request. *) - go o r acc ps - else None in - go off size [] ps) - - (* pre: op is a store *) - let split_into_parts op ptr base covers m = - List.filter_map covers ~f:(fun o -> - Map.find m (base, o) |> Option.map ~f:(fun s -> - Insn.subst_load_or_store op - ~f:(const @@ Virtual.Slot.var s))) - - let insert_new_slots fn m = Map.fold m ~init:fn - ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data) - - let rewrite_one parts m s i = + if debug then + Format.eprintf "exact=0x%Lx, off=0x%Lx, base=%a\n%!" + exact off Var.pp base; + let op = Insn.op i in + let delta = Int64.(off - exact) in + match Map.find m (base, exact) with + | None -> + if debug then + Format.eprintf "no slot found\n%!"; + !![i] + | Some s when Int64.(delta = 0L) -> + if debug then + Format.eprintf "found slot %a (base)\n%!" + Var.pp (Virtual.Slot.var s); + (* Store to base of new slot. *) + let addr = Virtual.Slot.var s in + let op' = Insn.replace_load_or_store_addr addr op in + !![Insn.with_op i op'] + | Some s -> + if debug then + Format.eprintf "found slot %a (delta 0x%Lx)\n%!" + Var.pp (Virtual.Slot.var s) delta; + (* Compute offset of new slot and store to it. *) + let* l = Context.Label.fresh in + let* y = Context.Var.fresh in + let+ word = Context.target >>| Target.word in + let a = Insn.add y word (Virtual.Slot.var s) delta in + let op' = Insn.replace_load_or_store_addr y op in + [Insn.create ~label:l a; Insn.with_op i op'] + + (* Rewrite an instruction. *) + let rewrite_insn parts (m : scalars) (s : state) i = let open Context.Syntax in let op = Insn.op i in - let* word = Context.target >>| Target.word in match Insn.load_or_store_to op with | None -> !![i] - | Some (ptr, ty, load_or_store) -> + | Some (ptr, ty, ldst) -> if debug then Format.eprintf "%a: looking at %a.%a to %a\n%!" Label.pp (Insn.label i) - pp_load_or_store load_or_store + pp_load_or_store ldst Type.pp_basic ty Var.pp ptr; - let sz = Type.sizeof_basic ty / 8 in - match Map.find !s ptr with + match Map.find s ptr with | Some Top | None -> !![i] | Some Offset (base, off) -> - match cover_exact parts base off sz with - | Some [o] -> - if debug then - Format.eprintf "exact=0x%Lx, off=0x%Lx, base=%a\n%!" o off Var.pp base; - let o' = Int64.(off - o) in - begin match Map.find m (base, o) with - | None -> !![i] - | Some s when Int64.(o' = 0L) -> - if debug then - Format.eprintf "found slot %a\n%!" Var.pp (Virtual.Slot.var s); - let op' = Insn.subst_load_or_store op ~f:(const @@ Virtual.Slot.var s) in - !![Insn.with_op i op'] - | Some s -> - if debug then - Format.eprintf "found slot %a\n%!" Var.pp (Virtual.Slot.var s); - let* l = Context.Label.fresh in - let* y = Context.Var.fresh in - let a = Insn.add y word (Virtual.Slot.var s) o' in - let op' = Insn.subst_load_or_store op ~f:(const y) in !![ - Insn.create ~label:l a; - Insn.with_op i op'; - ] - end - | Some covers when is_store load_or_store -> - if debug then - Format.eprintf "%a: splitting\n%!" Label.pp @@ Insn.label i; - split_into_parts op ptr base covers m |> - Context.List.map ~f:(fun op -> - let+ l = Context.Label.fresh in - Insn.create ~label:l op) - | Some covers -> - if debug then - Format.eprintf "multi or no-part load: %d\n%!" (List.length covers); - !![i] + match find_partition parts base off @@ basic_size ty with + | Some p -> rewrite_insn_exact m i ~exact:p.off ~base ~off | None -> if debug then Format.eprintf "no parts found\n%!"; !![i] - let rewrite_with_partitions slots fn s parts m = + let rewrite_with_partitions slots fn (s : solution) parts m = let open Context.Syntax in - let* blks = - Func.blks fn |> Context.Seq.map ~f:(fun b -> - let s = ref @@ Solution.get s @@ Blk.label b in - let+ insns = - Blk.insns b |> Context.Seq.map ~f:(fun i -> - let+ is = rewrite_one parts m s i in - s := transfer_op slots !s @@ Insn.op i; - is) - >>| Fn.compose List.concat Seq.to_list in - Blk.with_insns b insns) >>| Seq.to_list in + let* blks = Func.blks fn |> Context.Seq.map ~f:(fun b -> + let s = ref @@ Solution.get s @@ Blk.label b in + let+ insns = Blk.insns b |> Context.Seq.map ~f:(fun i -> + let+ is = rewrite_insn parts m !s i in + s := transfer_op slots !s @@ Insn.op i; is) + >>| List.concat @. Seq.to_list in + Blk.with_insns b insns) >>| Seq.to_list in Context.lift_err @@ Func.with_blks fn blks + let insert_new_slots fn m = Map.fold m ~init:fn + ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data) + let run fn = let open Context.Syntax in let slots = collect_slots fn in - let s = analyze slots fn in - let accs = collect_accesses slots fn s in - let parts = partition_acesses accs in - if debug then - Map.iteri parts ~f:(fun ~key ~data -> - Format.eprintf "partitions for %a:\n%!" Var.pp key; - List.iter data ~f:(fun p -> - Format.eprintf " %a\n%!" pp_partition p)); - let* m = materialize_partitions slots parts in - let fn = insert_new_slots fn m in - rewrite_with_partitions slots fn s parts m + if Map.is_empty slots then !!fn else + let s = analyze slots fn in + let accs = collect_accesses slots fn s in + let parts = partition_acesses accs in + if debug then + Map.iteri parts ~f:(fun ~key ~data -> + Format.eprintf "partitions for %a:\n%!" Var.pp key; + List.iter data ~f:(fun p -> + Format.eprintf " %a\n%!" pp_partition p)); + let* m = materialize_partitions slots parts in + let fn = insert_new_slots fn m in + rewrite_with_partitions slots fn s parts m end From 745caf288953da33e5fb68cb34b574201e43a6db Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 16:09:01 -0500 Subject: [PATCH 08/62] `Resolve_constant_blk_args` for ABI --- src/lib/dune | 1 + src/lib/egraph/egraph_input.ml | 38 ++++++++-- src/lib/passes/passes.ml | 6 +- src/lib/passes/passes.mli | 1 + src/lib/passes/resolve_constant_blk_args.ml | 81 ++++++++++++++++++--- src/lib/phi_values.ml | 60 ++++++++------- src/lib/subst_mapper_abi.ml | 74 +++++++++++++++++++ 7 files changed, 214 insertions(+), 47 deletions(-) create mode 100644 src/lib/subst_mapper_abi.ml diff --git a/src/lib/dune b/src/lib/dune index 539cebce..f7b26f42 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -41,6 +41,7 @@ resolver_intf sm subst_mapper + subst_mapper_abi tags var_internal virtual_lexer diff --git a/src/lib/egraph/egraph_input.ml b/src/lib/egraph/egraph_input.ml index 6c479930..7ccf2040 100644 --- a/src/lib/egraph/egraph_input.ml +++ b/src/lib/egraph/egraph_input.ml @@ -12,11 +12,39 @@ module Operands = Set.Make(struct type t = operand [@@deriving compare, sexp] end) -module Phis = Phi_values.Make(struct - type t = Operands.t [@@deriving equal] - let one = Operands.singleton - let join = Set.union - end) +module Phis_lang = struct + module Ctrl = struct + type t = ctrl + + let table d ds tbl = + Ctrl.Table.enum tbl |> Seq.map ~f:snd |> + Seq.map ~f:(fun (`label (l, args)) -> l, args) |> + Seq.to_list |> List.cons (d, ds) + + let locals = function + | `hlt -> [] + | `jmp #global -> [] + | `jmp `label (l, args) -> [l, args] + | `br (_, #global, #global) -> [] + | `br (_, `label (y, ys), #global) -> [y, ys] + | `br (_, #global, `label (n, ns)) -> [n, ns] + | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns] + | `ret _ -> [] + | `sw (_, _, `label (d, ds), tbl) -> table d ds tbl + end + + module Blk = Blk + module Func = Func + module Cfg = Cfg +end + +module Phis_domain = struct + type t = Operands.t [@@deriving equal] + let one = Operands.singleton + let join = Set.union +end + +module Phis = Phi_values.Make(Phis_lang)(Phis_domain) (* General information about the function we're translating. *) type t = { diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index db6b1ff1..eaf56e7c 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -59,10 +59,8 @@ let to_abi tenv m = ~data:(Seq.to_list @@ Module.data m) let optimize_abi m = - let* m = - Abi.Module.funs m |> - Context.Seq.map ~f:Sroa.run_abi >>| - Fun.compose (Abi.Module.with_funs m) Seq.to_list in + let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in + let* m = Context.Virtual.Module.map_funs_abi m ~f:Sroa.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Promote_slots.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in let m = Abi.Module.map_funs m ~f:Remove_disjoint_blks.run_abi in diff --git a/src/lib/passes/passes.mli b/src/lib/passes/passes.mli index be95b1f8..2b2d1b5f 100644 --- a/src/lib/passes/passes.mli +++ b/src/lib/passes/passes.mli @@ -121,6 +121,7 @@ end *) module Resolve_constant_blk_args : sig val run : func -> func Or_error.t + val run_abi : Abi.func -> Abi.func Or_error.t end (** Performs the classic Sparse Conditional Constant diff --git a/src/lib/passes/resolve_constant_blk_args.ml b/src/lib/passes/resolve_constant_blk_args.ml index 2c72dcac..d4b426a8 100644 --- a/src/lib/passes/resolve_constant_blk_args.ml +++ b/src/lib/passes/resolve_constant_blk_args.ml @@ -1,19 +1,57 @@ open Core +open Regular.Std open Virtual -module Phis = Phi_values.Make(struct - (* `None` indicates that there may be many values for the phi, - while `Some v` indicates that `v` is its singular value. +let table enum d ds tbl = + enum tbl |> Seq.map ~f:snd |> + Seq.map ~f:(fun (`label (l, args)) -> l, args) |> + Seq.to_list |> List.cons (d, ds) - We choose `option` because filtering the substitution produced - by the analysis can use the identity function via `Map.filter_map`, - thus avoiding extra allocations. - *) - type t = operand option [@@deriving equal] +let locals enum = function + | `hlt -> [] + | `jmp #global -> [] + | `jmp `label (l, args) -> [l, args] + | `br (_, #global, #global) -> [] + | `br (_, `label (y, ys), #global) -> [y, ys] + | `br (_, #global, `label (n, ns)) -> [n, ns] + | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns] + | `ret _ -> [] + | `sw (_, _, `label (d, ds), tbl) -> table enum d ds tbl - let one v = Some v - let join x y = if equal x y then x else None - end) +module D = struct + (* `None` indicates that there may be many values for the phi, + while `Some v` indicates that `v` is its singular value. + + We choose `option` because filtering the substitution produced + by the analysis can use the identity function via `Map.filter_map`, + thus avoiding extra allocations. + *) + type t = operand option [@@deriving equal] + + let one v = Some v + let join x y = if equal x y then x else None +end + +module Phis_v = Phi_values.Make(struct + module Ctrl = struct + type t = ctrl + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg + end)(D) + +module Phis_a = Phi_values.Make(struct + open Abi + module Ctrl = struct + type t = ctrl + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg + end)(D) let run fn = if Dict.mem (Func.dict fn) Tags.ssa then @@ -21,7 +59,7 @@ let run fn = let blks = Func.map_of_blks fn in let s = Map.filter_map ~f:Fn.id @@ - Phis.analyze ~blk:(Label.Tree.find blks) cfg in + Phis_v.analyze ~blk:(Label.Tree.find blks) cfg in let fn = if not @@ Map.is_empty s then Func.map_blks fn ~f:(fun b -> let is, k = Subst_mapper.map_blk s b in @@ -32,3 +70,22 @@ let run fn = Or_error.errorf "In Resolve_constant_blk_args: function $%s is \ not in SSA form" (Func.name fn) + +let run_abi fn = + let open Abi in + if Dict.mem (Func.dict fn) Tags.ssa then + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let s = + Map.filter_map ~f:Fn.id @@ + Phis_a.analyze ~blk:(Label.Tree.find blks) cfg in + let fn = + if not @@ Map.is_empty s then Func.map_blks fn ~f:(fun b -> + let is, k = Subst_mapper_abi.map_blk s b in + Blk.(with_ctrl (with_insns b is) k)) + else fn in + Ok fn + else + Or_error.errorf + "In Resolve_constant_blk_args (ABI): function $%s is \ + not in SSA form" (Func.name fn) diff --git a/src/lib/phi_values.ml b/src/lib/phi_values.ml index 282bc772..3a5137b3 100644 --- a/src/lib/phi_values.ml +++ b/src/lib/phi_values.ml @@ -3,43 +3,51 @@ open Core open Regular.Std open Graphlib.Std -open Virtual module type Domain = sig type t [@@deriving equal] - val one : operand -> t + val one : Virtual.operand -> t val join : t -> t -> t end -module Make(D : Domain) = struct +module type L = sig + module Ctrl : sig + type t + val locals : t -> (Label.t * Virtual.operand list) list + end + module Blk : sig + type t + val args : ?rev:bool -> t -> Var.t seq + val ctrl : t -> Ctrl.t + end + module Func : sig + type t + end + module Cfg : sig + include Label.Graph_s + val create : Func.t -> t + end +end + +module Make(M : L)(D : Domain) = struct + open M + type state = D.t Var.Map.t [@@deriving equal] - let local ~blk s : local -> state = function - | `label (l, vs) -> - blk l |> Option.value_map ~default:s ~f:(fun b -> - let args = Seq.to_list @@ Blk.args b in - match List.zip args vs with - | Unequal_lengths -> assert false - | Ok xs -> List.fold xs ~init:s ~f:(fun s (x, v) -> - Map.update s x ~f:(function - | Some vs -> D.(join vs @@ one v) - | None -> D.one v))) - - let dst ~blk s : dst -> state = function - | #local as l -> local ~blk s l - | #global -> s + let local ~blk s (l, vs) : state = + blk l |> Option.value_map ~default:s ~f:(fun b -> + let args = Seq.to_list @@ Blk.args b in + match List.zip args vs with + | Unequal_lengths -> assert false + | Ok xs -> List.fold xs ~init:s ~f:(fun s (x, v) -> + Map.update s x ~f:(function + | Some vs -> D.(join vs @@ one v) + | None -> D.one v))) let transfer ~blk l s = blk l |> Option.value_map ~default:s ~f:(fun b -> - match Blk.ctrl b with - | `hlt | `ret _ -> s - | `jmp d -> dst ~blk s d - | `br (_, y, n) -> - dst ~blk (dst ~blk s y) n - | `sw (_, _, d, tbl) -> - let init = local ~blk s d in - Ctrl.Table.enum tbl |> Seq.fold ~init - ~f:(fun s (_, l') -> local ~blk s l')) + Blk.ctrl b |> Ctrl.locals |> + List.fold ~init:s ~f:(local ~blk)) let merge = Map.merge_skewed ~combine:(fun ~key:_ -> D.join) diff --git a/src/lib/subst_mapper_abi.ml b/src/lib/subst_mapper_abi.ml new file mode 100644 index 00000000..f43f81ec --- /dev/null +++ b/src/lib/subst_mapper_abi.ml @@ -0,0 +1,74 @@ +(* Same as `Subst_mapper` with as much re-use as possible. *) + +open Core +open Regular.Std +open Virtual +open Abi +open Subst_mapper + +type t = operand Var.Map.t + +let map_callarg subst : Insn.callarg -> Insn.callarg = function + | `reg (a, r) -> `reg (map_arg subst a, r) + | `stk (a, o) -> `stk (map_arg subst a, o) + +let map_op subst (op : Insn.op) = + let arg = map_arg subst in + match op with + | `bop (x, b, l, r) -> `bop (x, b, arg l, arg r) + | `uop (x, u, a) -> `uop (x, u, arg a) + | `sel (x, t, c, l, r) -> map_sel subst x t c l r + | `call (x, f, args) -> + let f = map_global subst f in + let args = List.map args ~f:(map_callarg subst) in + `call (x, f, args) + | `load (x, t, a) -> `load (x, t, arg a) + | `store (t, v, a) -> `store (t, arg v, arg a) + | `regcopy _ -> op + | `regstore (r, a) -> `regstore (r, arg a) + | `regassign (r, a) -> `regassign (r, arg a) + | `stkargs _ -> op + +let map_insn subst i = + Insn.with_op i @@ map_op subst @@ Insn.op i + +let map_tbl_entry subst i l = i, map_local subst l + +let map_sw subst t i d tbl = + let d = map_local subst d in + let tbl = Ctrl.Table.map_exn tbl ~f:(map_tbl_entry subst) in + match i with + | `sym _ -> `sw (t, i, d, tbl) + | `var x -> match Map.find subst x with + | Some (#Ctrl.swindex as i) -> `sw (t, i, d, tbl) + | Some `int (i, _) -> + let d = Ctrl.Table.find tbl i |> Option.value ~default:d in + `jmp (d :> dst) + | Some o -> invalid "sw" o + | None -> `sw (t, i, d, tbl) + +let map_ctrl subst : ctrl -> ctrl = function + | `hlt -> `hlt + | `jmp d -> `jmp (map_dst subst d) + | `br (c, y, n) -> map_br subst c y n + | `ret xs -> `ret (List.map xs ~f:(fun (r, a) -> r, map_arg subst a)) + | `sw (t, i, d, tbl) -> map_sw subst t i d tbl + +let map_blk subst b = + let insns = Blk.insns b |> Seq.map ~f:(map_insn subst) in + Seq.to_list insns, map_ctrl subst (Blk.ctrl b) + +(* TODO: should we enable more than just the `jmp` case? With + `br` and `switch` we can have different applications of the + substitution for the same destination. *) +let map_blk_args subst b l = match Blk.ctrl b with + | `jmp `label (l', args) when Label.(l = l') -> + Some (List.map args ~f:(map_arg subst)) + | _ -> None + +let blk_extend subst b b' = + Blk.label b' |> map_blk_args subst b |> Option.bind ~f:(fun args -> + Blk.args b' |> Seq.to_list |> List.zip args |> function + | Ok l -> Option.some @@ List.fold l ~init:subst + ~f:(fun subst (o, x) -> Map.set subst ~key:x ~data:o) + | Unequal_lengths -> None) From d74c23f4b24050e9ef07b4e3a689891163d91af8 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 16:09:14 -0500 Subject: [PATCH 09/62] Expose SROA pass --- src/lib/passes/passes.ml | 1 + src/lib/passes/passes.mli | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index eaf56e7c..6d5279d4 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -11,6 +11,7 @@ module Remove_disjoint_blks = Remove_disjoint_blks module Resolve_constant_blk_args = Resolve_constant_blk_args module Sccp = Sccp module Simplify_cfg = Simplify_cfg +module Sroa = Sroa module Ssa = Ssa let initialize m = diff --git a/src/lib/passes/passes.mli b/src/lib/passes/passes.mli index 2b2d1b5f..b81867a7 100644 --- a/src/lib/passes/passes.mli +++ b/src/lib/passes/passes.mli @@ -142,6 +142,25 @@ module Simplify_cfg : sig val run : Typecheck.env -> func -> func Context.t end +(** Performs Scalar Replacement of Aggregates (SROA). + + This aims to analyze access patterns of stack slots + and break them up into individual (scalar) components. + + An important detail to note is that in the non-ABI pass, + slots being referenced in terms of user-defined compound + (aggregate) types are treated as opaque, and therefore + not optimized, as these are often ABI-dependent. + + When ABI lowering is performed, these references should + be desugared into access patterns on primitive types, + at which point the algorithm can attempt to process them. +*) +module Sroa : sig + val run : func -> func Context.t + val run_abi : Abi.func -> Abi.func Context.t +end + (** Transforms a function into semi-pruned SSA form. *) module Ssa : sig val run : func -> func Or_error.t From b4ad93af87508aeb6a0d7ebc62735dfd5322d6da Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 21:28:54 -0500 Subject: [PATCH 10/62] Native test for `sumphi` --- src/test/data/opt/sumphi.driver.sysv.amd64.c | 25 ++++++++++++++ .../opt/sumphi.vir.opt.sysv.amd64.regalloc | 34 +++++++++++++++++++ src/test/test_opt.ml | 2 ++ 3 files changed, 61 insertions(+) create mode 100644 src/test/data/opt/sumphi.driver.sysv.amd64.c create mode 100644 src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc diff --git a/src/test/data/opt/sumphi.driver.sysv.amd64.c b/src/test/data/opt/sumphi.driver.sysv.amd64.c new file mode 100644 index 00000000..89c0499e --- /dev/null +++ b/src/test/data/opt/sumphi.driver.sysv.amd64.c @@ -0,0 +1,25 @@ +#include + +struct t { + int a; + int b; +}; + +int sumphi(struct t a, struct t b, int x); + +int main() { + struct t s1 = {3, 4}; + struct t s2 = {10, -2}; + int res1 = sumphi(s1, s2, -1); + assert(res1 == 7); + int res2 = sumphi(s1, s2, 0); + assert(res2 == 8); + + struct t s3 = {5, 6}; + struct t s4 = {100, 200}; + int res3 = sumphi(s3, s4, -5); + int res4 = sumphi(s3, s4, 42); + assert(res3 == 11); + assert(res4 == 300); + return 0; +} diff --git a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc new file mode 100644 index 00000000..2ca3471b --- /dev/null +++ b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc @@ -0,0 +1,34 @@ +module sumphi + +function $sum { ; returns: rax +@2: + sub rsp, 0x8_l ; @55 + mov qword ptr [rsp], rdi ; @38 + mov eax, dword ptr [rsp + 0x4] ; @6 + add eax, dword ptr [rsp] ; @20 + add rsp, 0x8_l ; @56 + ret ; @37 +} + +export function $sumphi { ; returns: rax +@8: + push rbp ; @59 + mov rbp, rsp ; @60 + sub rsp, 0x20_l ; @61 + mov qword ptr [rbp - 0x20], rdi ; @54 + test edx, edx ; @47 + js @9 ; @48 +@10: + mov qword ptr [rbp - 0x18], rsi ; @32 + lea rax, qword ptr [rbp - 0x18] ; @45 + jmp @14 ; @46 +@9: + mov qword ptr [rbp - 0x10], rdi ; @34 + lea rax, qword ptr [rbp - 0x10] ; @43 +@14: + mov dword ptr [rbp - 0x20], 0x5_w ; @17 + mov rdi, qword ptr [rax] ; @35 + call $sum ; rdi ; @41 + leave ; @62 + ret ; @40 +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index cfb97c78..975d134d 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -413,6 +413,7 @@ let regalloc_suite = "Test register allocation" >::: [ "Analyze array (SysV AMD64)" >:: test_sysv_amd64_regalloc "analyze_array"; "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_regalloc "promote2-partial"; "Parallel moves (SysV AMD64)" >:: test_sysv_amd64_regalloc "parallel"; + "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_regalloc "sumphi"; ] let native_suite = "Test native code" >::: [ @@ -439,6 +440,7 @@ let native_suite = "Test native code" >::: [ "Analyze array (SysV AMD64)" >:: test_sysv_amd64_native "analyze_array"; "Unsigned remainder by 7 (SysV AMD64)" >:: test_sysv_amd64_native "uremby7"; "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_native "promote2-partial"; + "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi"; ] let () = run_test_tt_main @@ test_list [ From ae12c1e84bb72107c988499c6402c2ba5d780bf7 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 9 Nov 2025 22:56:01 -0500 Subject: [PATCH 11/62] Move scalars analysis to its own module --- src/lib/dune | 1 + src/lib/passes/sroa/sroa.ml | 1 + src/lib/passes/sroa/sroa_impl.ml | 240 +------------------------------ src/lib/scalars.ml | 238 ++++++++++++++++++++++++++++++ 4 files changed, 247 insertions(+), 233 deletions(-) create mode 100644 src/lib/scalars.ml diff --git a/src/lib/dune b/src/lib/dune index f7b26f42..5b5a17e9 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -39,6 +39,7 @@ patricia_tree_intf phi_values resolver_intf + scalars sm subst_mapper subst_mapper_abi diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index 45d11dec..957056c6 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -1,6 +1,7 @@ open Core open Regular.Std open Virtual +open Scalars open Sroa_impl let var_set_of_option = function diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index f21420f7..ae3738ee 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -11,242 +11,16 @@ open Core open Regular.Std open Graphlib.Std +open Scalars let debug = false -let (@.) = Fn.compose -let (@<) = Fn.flip - -(* A scalar access. *) -module Scalar = struct - module T = struct - type t = Var.t * int64 [@@deriving compare, equal, hash, sexp] - end - include T - module Map = Map.Make(T) - module Table = Hashtbl.Make(T) -end - -type scalar = Scalar.t [@@deriving compare, equal, hash, sexp] -type scalars = Virtual.slot Scalar.Map.t - -(* Lattice of scalar accesses. - - [Top]: the access is inconsistent or escapes - - [Offset (s, o)]: access to slot [s] at offset [o] -*) -type value = - | Top - | Offset of scalar -[@@deriving equal, sexp] - -let pp_value ppf = function - | Top -> Format.fprintf ppf "\u{22a4}" - | Offset (slot, offset) -> - let neg = Int64.is_negative offset in - let pre, off = if neg then '-', Int64.neg offset else '+', offset in - Format.fprintf ppf "%a%c0x%Lx" Var.pp slot pre off - -let pp_bot ppf () = Format.fprintf ppf "\u{22a5}" [@@ocaml.warning "-32"] - -module Value = struct - type t = value [@@deriving equal, sexp] - let merge a b = match a, b with - | Offset s1, Offset s2 when equal_scalar s1 s2 -> a - | _ -> Top -end - -type slots = Virtual.slot Var.Map.t - -module State : sig - type t = value Var.Map.t [@@deriving equal, sexp] - val empty : t - val merge : t -> t -> t - val derive : slots -> t -> Var.t -> int64 -> value option -end = struct - (* NB: the keys are the LHS of a given instruction *) - type t = value Var.Map.t [@@deriving equal, sexp] - - let empty = Var.Map.empty - - let merge a b = Map.merge_skewed a b - ~combine:(fun ~key:_ a b -> Value.merge a b) - - let is_bad slots ptr offset = - Int64.(offset < 0L) || match Map.find slots ptr with - | Some s -> - let size = Int64.of_int @@ Virtual.Slot.size s in - Int64.(offset >= size) - | None -> false - - (* Normalize the scalar referred to by `ptr` and `offset`. *) - let derive slots s ptr offset = match Map.find s ptr with - | (Some Top | None) as v -> v - | Some Offset (ptr', offset') -> - let offset'' = Int64.(offset + offset') in - (* Out of bounds offset to a slot should be undefined. *) - let value = - if is_bad slots ptr' offset'' then Top - else Offset (ptr', offset'') in - Some value -end - -type state = State.t [@@deriving equal, sexp] - -let pp_state ppf s = - let pp_sep ppf () = Format.fprintf ppf "@ " in - let pp_elt ppf (x, v) = Format.fprintf ppf "(%a@ %a)" Var.pp x pp_value v in - let pp_elts = Format.pp_print_list ~pp_sep pp_elt in - Format.fprintf ppf "@[%a@]" pp_elts @@ Map.to_alist s -[@@ocaml.warning "-32"] - -type load_or_store = Load | Store - -let pp_load_or_store ppf = function - | Load -> Format.fprintf ppf "load" - | Store -> Format.fprintf ppf "store" - -let is_store = function - | Load -> false - | Store -> true - -module type L = sig - module Insn : sig - type t - type op - - val create : label:Label.t -> op -> t - - (* General accessors. *) - val op : t -> op - val label : t -> Label.t - - (* Used during analysis. *) - val lhs : op -> Var.Set.t - val offset : op -> scalar option - val copy_of : op -> Var.t option - val escapes : op -> Var.Set.t - - (* Used during replacement. *) - val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option - val replace_load_or_store_addr : Var.t -> op -> op - val with_op : t -> op -> t - val add : Var.t -> Type.imm_base -> Var.t -> int64 -> op - end - - module Ctrl : sig - type t - val escapes : t -> Var.Set.t - val locals : t -> (Label.t * Var.t list) list - end - - module Blk : sig - type t - val label : t -> Label.t - val args : ?rev:bool -> t -> Var.t seq - val insns : ?rev:bool -> t -> Insn.t seq - val ctrl : t -> Ctrl.t - val with_insns : t -> Insn.t list -> t - end - - module Func : sig - type t - val slots : ?rev:bool -> t -> Virtual.slot seq - val blks : ?rev:bool -> t -> Blk.t seq - val map_of_blks : t -> Blk.t Label.Tree.t - val with_blks : t -> Blk.t list -> t Or_error.t - val insert_slot : t -> Virtual.slot -> t - end - - module Cfg : sig - include Label.Graph_s - val create : Func.t -> t - end -end - -module Make(M : L) : sig +module Make(M : Scalars.L) : sig val run : M.Func.t -> M.Func.t Context.t end = struct open M - (* Set all known scalars to `Top` according to `f`, which is the - set of variables that escape. *) - let escaping f x s = - Set.fold (f x) ~init:s ~f:(fun s v -> - match Map.find s v with - | Some Offset (ptr, _) -> - Map.set s ~key:ptr ~data:Top - | Some _ | None -> s) - - (* Transfer function for a single instruction. *) - let transfer_op slots s op = - let value = match Insn.offset op with - | Some (ptr, offset) -> State.derive slots s ptr offset - | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in - let s = match value with - | None -> s - | Some v -> - Insn.lhs op |> Set.fold ~init:s - ~f:(fun s key -> Map.set s ~key ~data:v) in - escaping Insn.escapes op s - - let blkargs blks (l, xs) = - Label.Tree.find blks l |> - Option.value_map ~default:[] ~f:(fun b -> - let args = Seq.to_list @@ Blk.args b in - match List.zip xs args with - | Unequal_lengths -> [] - | Ok args' -> args') - - (* Transfer for control-flow instruction. *) - let transfer_ctrl blks s c = - let init = escaping Ctrl.escapes c s in - (* Propagate the block parameters we are passing. *) - Ctrl.locals c |> List.bind ~f:(blkargs blks) |> - List.fold ~init ~f:(fun acc (src, dst) -> - if Var.(src = dst) then acc - else match Map.find acc src with - | Some v -> Map.set acc ~key:dst ~data:v - | None -> acc) - - (* Transfer function for a block. *) - let transfer slots blks l s = - Label.Tree.find blks l |> - Option.value_map ~default:s ~f:(fun b -> - Blk.insns b |> Seq.map ~f:Insn.op |> - Seq.fold ~init:s ~f:(transfer_op slots) |> - transfer_ctrl blks @< Blk.ctrl b) - - (* Initial constraints. *) - let initialize slots blks = - (* Set all slots to point to their own base address. *) - let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in - (* Any slot that directly escapes should immediately be set to `Top`. *) - Label.Tree.fold blks ~init ~f:(fun ~key:_ ~data init -> - Blk.insns data |> Seq.fold ~init ~f:(fun s i -> - escaping Insn.escapes (Insn.op i) s) |> - escaping Ctrl.escapes (Blk.ctrl data)) |> - Label.Map.singleton Label.pseudoentry |> - Solution.create @< State.empty - - type solution = (Label.t, state) Solution.t - - (* Run the dataflow analysis. *) - let analyze slots fn : solution = - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in - Graphlib.fixpoint (module Cfg) cfg - ~init:(initialize slots blks) - ~start:Label.pseudoentry - ~equal:State.equal - ~merge:State.merge - ~f:(transfer slots blks) - - (* All slots mapped to their names. *) - let collect_slots fn = - Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> - Map.set acc ~key:(Virtual.Slot.var s) ~data:s) + module Analysis = Scalars.Make(M) (* A memory access for a slot. *) type access = { @@ -287,7 +61,7 @@ end = struct | Some Offset (base, off) -> Map.add_multi acc ~key:base ~data:{insn = i; off; ty; ldst} | _ -> acc in - s := transfer_op slots !s op; + s := Analysis.transfer_op slots !s op; acc)) |> (* Filter out slots that are not splittable. *) Map.map ~f:(List.sort ~compare:cmp_access) |> @@ -461,7 +235,7 @@ end = struct let s = ref @@ Solution.get s @@ Blk.label b in let+ insns = Blk.insns b |> Context.Seq.map ~f:(fun i -> let+ is = rewrite_insn parts m !s i in - s := transfer_op slots !s @@ Insn.op i; is) + s := Analysis.transfer_op slots !s @@ Insn.op i; is) >>| List.concat @. Seq.to_list in Blk.with_insns b insns) >>| Seq.to_list in Context.lift_err @@ Func.with_blks fn blks @@ -471,9 +245,9 @@ end = struct let run fn = let open Context.Syntax in - let slots = collect_slots fn in + let slots = Analysis.collect_slots fn in if Map.is_empty slots then !!fn else - let s = analyze slots fn in + let s = Analysis.analyze slots fn in let accs = collect_accesses slots fn s in let parts = partition_acesses accs in if debug then diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml new file mode 100644 index 00000000..38ac8bf5 --- /dev/null +++ b/src/lib/scalars.ml @@ -0,0 +1,238 @@ +open Core +open Regular.Std +open Graphlib.Std + +let (@.) = Fn.compose +let (@<) = Fn.flip + +(* A scalar access. *) +module Scalar = struct + module T = struct + type t = Var.t * int64 [@@deriving compare, equal, hash, sexp] + end + include T + module Map = Map.Make(T) + module Table = Hashtbl.Make(T) +end + +type scalar = Scalar.t [@@deriving compare, equal, hash, sexp] +type scalars = Virtual.slot Scalar.Map.t + +(* Lattice of scalar accesses. + + [Top]: the access is inconsistent or escapes + + [Offset (s, o)]: access to slot [s] at offset [o] +*) +type value = + | Top + | Offset of scalar +[@@deriving equal, sexp] + +let pp_value ppf = function + | Top -> Format.fprintf ppf "\u{22a4}" + | Offset (slot, offset) -> + let neg = Int64.is_negative offset in + let pre, off = if neg then '-', Int64.neg offset else '+', offset in + Format.fprintf ppf "%a%c0x%Lx" Var.pp slot pre off + +let pp_bot ppf () = Format.fprintf ppf "\u{22a5}" [@@ocaml.warning "-32"] + +module Value = struct + type t = value [@@deriving equal, sexp] + let merge a b = match a, b with + | Offset s1, Offset s2 when equal_scalar s1 s2 -> a + | _ -> Top +end + +type slots = Virtual.slot Var.Map.t + +module State : sig + type t = value Var.Map.t [@@deriving equal, sexp] + val empty : t + val merge : t -> t -> t + val derive : slots -> t -> Var.t -> int64 -> value option +end = struct + (* NB: the keys are the LHS of a given instruction *) + type t = value Var.Map.t [@@deriving equal, sexp] + + let empty = Var.Map.empty + + let merge a b = Map.merge_skewed a b + ~combine:(fun ~key:_ a b -> Value.merge a b) + + let is_bad slots ptr offset = + Int64.(offset < 0L) || match Map.find slots ptr with + | Some s -> + let size = Int64.of_int @@ Virtual.Slot.size s in + Int64.(offset >= size) + | None -> false + + (* Normalize the scalar referred to by `ptr` and `offset`. *) + let derive slots s ptr offset = match Map.find s ptr with + | (Some Top | None) as v -> v + | Some Offset (ptr', offset') -> + let offset'' = Int64.(offset + offset') in + (* Out of bounds offset to a slot should be undefined. *) + let value = + if is_bad slots ptr' offset'' then Top + else Offset (ptr', offset'') in + Some value +end + +type state = State.t [@@deriving equal, sexp] + +let pp_state ppf s = + let pp_sep ppf () = Format.fprintf ppf "@ " in + let pp_elt ppf (x, v) = Format.fprintf ppf "(%a@ %a)" Var.pp x pp_value v in + let pp_elts = Format.pp_print_list ~pp_sep pp_elt in + Format.fprintf ppf "@[%a@]" pp_elts @@ Map.to_alist s +[@@ocaml.warning "-32"] + +type solution = (Label.t, state) Solution.t + +let empty_solution = Solution.create Label.Map.empty State.empty + +type load_or_store = Load | Store + +let pp_load_or_store ppf = function + | Load -> Format.fprintf ppf "load" + | Store -> Format.fprintf ppf "store" + +let is_store = function + | Load -> false + | Store -> true + +module type L = sig + module Insn : sig + type t + type op + + val create : label:Label.t -> op -> t + + (* General accessors. *) + val op : t -> op + val label : t -> Label.t + + (* Used during analysis. *) + val lhs : op -> Var.Set.t + val offset : op -> scalar option + val copy_of : op -> Var.t option + val escapes : op -> Var.Set.t + + (* Used during replacement. *) + val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option + val replace_load_or_store_addr : Var.t -> op -> op + val with_op : t -> op -> t + val add : Var.t -> Type.imm_base -> Var.t -> int64 -> op + end + + module Ctrl : sig + type t + val escapes : t -> Var.Set.t + val locals : t -> (Label.t * Var.t list) list + end + + module Blk : sig + type t + val label : t -> Label.t + val args : ?rev:bool -> t -> Var.t seq + val insns : ?rev:bool -> t -> Insn.t seq + val ctrl : t -> Ctrl.t + val with_insns : t -> Insn.t list -> t + end + + module Func : sig + type t + val slots : ?rev:bool -> t -> Virtual.slot seq + val blks : ?rev:bool -> t -> Blk.t seq + val map_of_blks : t -> Blk.t Label.Tree.t + val with_blks : t -> Blk.t list -> t Or_error.t + val insert_slot : t -> Virtual.slot -> t + end + + module Cfg : sig + include Label.Graph_s + val create : Func.t -> t + end +end + +module Make(M : L) = struct + open M + + (* Set all known scalars to `Top` according to `f`, which is the + set of variables that escape. *) + let escaping f x s = + Set.fold (f x) ~init:s ~f:(fun s v -> + match Map.find s v with + | Some Offset (ptr, _) -> + Map.set s ~key:ptr ~data:Top + | Some _ | None -> s) + + (* Transfer function for a single instruction. *) + let transfer_op slots s op = + let value = match Insn.offset op with + | Some (ptr, offset) -> State.derive slots s ptr offset + | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in + let s = match value with + | None -> s + | Some v -> + Insn.lhs op |> Set.fold ~init:s + ~f:(fun s key -> Map.set s ~key ~data:v) in + escaping Insn.escapes op s + + let blkargs blks (l, xs) = + Label.Tree.find blks l |> + Option.value_map ~default:[] ~f:(fun b -> + let args = Seq.to_list @@ Blk.args b in + match List.zip xs args with + | Unequal_lengths -> [] + | Ok args' -> args') + + (* Transfer for control-flow instruction. *) + let transfer_ctrl blks s c = + let init = escaping Ctrl.escapes c s in + (* Propagate the block parameters we are passing. *) + Ctrl.locals c |> List.bind ~f:(blkargs blks) |> + List.fold ~init ~f:(fun acc (src, dst) -> + if Var.(src = dst) then acc + else match Map.find acc src with + | Some v -> Map.set acc ~key:dst ~data:v + | None -> acc) + + (* Transfer function for a block. *) + let transfer slots blks l s = + Label.Tree.find blks l |> + Option.value_map ~default:s ~f:(fun b -> + Blk.insns b |> Seq.map ~f:Insn.op |> + Seq.fold ~init:s ~f:(transfer_op slots) |> + transfer_ctrl blks @< Blk.ctrl b) + + (* Initial constraints. *) + let initialize slots blks = + (* Set all slots to point to their own base address. *) + let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in + (* Any slot that directly escapes should immediately be set to `Top`. *) + Label.Tree.fold blks ~init ~f:(fun ~key:_ ~data init -> + Blk.insns data |> Seq.fold ~init ~f:(fun s i -> + escaping Insn.escapes (Insn.op i) s) |> + escaping Ctrl.escapes (Blk.ctrl data)) |> + Label.Map.singleton Label.pseudoentry |> + Solution.create @< State.empty + + (* All slots mapped to their names. *) + let collect_slots fn = + Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> + Map.set acc ~key:(Virtual.Slot.var s) ~data:s) + + (* Run the dataflow analysis. *) + let analyze slots fn : solution = + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + Graphlib.fixpoint (module Cfg) cfg + ~init:(initialize slots blks) + ~start:Label.pseudoentry + ~equal:State.equal + ~merge:State.merge + ~f:(transfer slots blks) +end From 28e26f44a7aa7dce9d1f314520a7872c8d0b99b0 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 11:17:21 -0500 Subject: [PATCH 12/62] Adds Allen's Interval Algebra --- src/lib/allen_interval_algebra.ml | 88 +++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/lib/allen_interval_algebra.ml diff --git a/src/lib/allen_interval_algebra.ml b/src/lib/allen_interval_algebra.ml new file mode 100644 index 00000000..fa0060aa --- /dev/null +++ b/src/lib/allen_interval_algebra.ml @@ -0,0 +1,88 @@ +(** Allen's Interval Algebra. + + {:https://en.wikipedia.org/wiki/Allen%27s_interval_algebra} +*) + +type t = + | Before + | Meets + | Overlaps + | Finished_by + | Contains + | Starts + | Equal + | Started_by + | During + | Finishes + | Overlapped_by + | Met_by + | After +[@@deriving compare, equal, sexp] + +(** Returns the converse of the relation. *) +let converse = function + | Before -> After + | After -> Before + | Meets -> Met_by + | Met_by -> Meets + | Overlaps -> Overlapped_by + | Overlapped_by -> Overlaps + | Starts -> Started_by + | Started_by -> Starts + | Finishes -> Finished_by + | Finished_by -> Finishes + | During -> Contains + | Contains -> During + | Equal -> Equal + +(** The input interval type. *) +module type S = sig + (** A point in the interval. *) + type point + + (** An inclusive interval. *) + type t + + (** The lower-bound of the interval. *) + val lo : t -> point + + (** The upper-bound of the interval. *) + val hi : t -> point + + include Base.Comparisons.Infix with type t := point +end + +(** Create the relations for an interval type. *) +module Make(M : S) = struct + open M + + let before a b = hi a < lo b [@@inline] + let meets a b = hi a = lo b [@@inline] + let overlaps a b = lo a < lo b && lo b < hi a && hi a < hi b [@@inline] + let finished_by a b = lo b < lo a && hi a = hi b [@@inline] + let contains a b = lo a < lo b && hi b < hi a [@@inline] + let starts a b = lo a = lo b && hi a < hi b [@@inline] + let equal a b = lo a = lo b && hi a = hi b [@@inline] + let started_by a b = lo a = lo b && hi a > hi b [@@inline] + let during a b = lo b < lo a && hi a < hi b [@@inline] + let finishes a b = lo a > lo b && hi a = hi b [@@inline] + let overlapped_by a b = lo b < lo a && lo a < hi b && hi b < hi a [@@inline] + let met_by a b = lo a = hi b [@@inline] + let after a b = lo a > hi b [@@inline] + + (** Relates two intervals. *) + let relate a b = + if before a b then Before + else if meets a b then Meets + else if overlaps a b then Overlaps + else if finished_by a b then Finished_by + else if contains a b then Contains + else if starts a b then Starts + else if equal a b then Equal + else if started_by a b then Started_by + else if during a b then During + else if finishes a b then Finishes + else if overlapped_by a b then Overlapped_by + else if met_by a b then Met_by + else After +end From 49b6a40629e4facf31f303cb10b984afcd8280ff Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 12:21:42 -0500 Subject: [PATCH 13/62] Remove redundant stores in `Abi_loadopt` --- src/lib/passes/abi_loadopt.ml | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/lib/passes/abi_loadopt.ml b/src/lib/passes/abi_loadopt.ml index 8c6041b9..c6eccfea 100644 --- a/src/lib/passes/abi_loadopt.ml +++ b/src/lib/passes/abi_loadopt.ml @@ -73,6 +73,7 @@ type t = { blks : Abi.blk Label.Table.t; mems : store Mem.Table.t; vars : operand Var.Table.t; + nop : Label.Hash_set.t; mutable mem : Label.t option; mutable memo : operand Hashcons.t; } @@ -107,8 +108,9 @@ let init fn = let blks = Label.Table.create () in let mems = Mem.Table.create () in let vars = Var.Table.create () in + let nop = Label.Hash_set.create () in let mem = None and memo = Hashcons.empty in - {reso; dom; rdom; lst; blks; mems; mem; vars; memo} + {reso; dom; rdom; lst; blks; mems; mem; vars; memo; nop} module Optimize = struct let var t x = match Hashtbl.find t.vars x with @@ -151,10 +153,20 @@ module Optimize = struct | _ -> `var x in t.memo <- Hashcons.set t.memo ~key:op ~data + let same_store t v v' l l' = + equal_operand v v' && t.rdom ~parent:l' l + let store t l ty v a = let v = operand t v in let a = operand t a in let key = {label = l; addr = a; ty} in + (* Mark redundant stores only if we can prove that + it is storing the same value to the same address. *) + Option.iter t.mem ~f:(fun m -> + match Hashtbl.find t.mems {key with label = m} with + | Some Value (v', l') when same_store t v v' l l' -> + Hash_set.add t.nop l + | Some _ | None -> ()); Hashtbl.set t.mems ~key ~data:(Value (v, l)); t.mem <- Some l; `store (ty, v, a) @@ -279,8 +291,13 @@ let run fn = Semi_nca.Tree.children t.dom l |> Seq.iter ~f:(fun l -> Stack.push q (l, t.mem, t.memo))); Abi.Func.map_blks fn ~f:(fun b -> - Abi.Blk.label b |> Hashtbl.find t.blks |> - Option.value ~default:b) + let b = + Abi.Blk.label b |> Hashtbl.find t.blks |> + Option.value ~default:b in + if Hash_set.is_empty t.nop then b else + Abi.Blk.insns b |> Seq.filter ~f:(fun i -> + not @@ Hash_set.mem t.nop @@ Abi.Insn.label i) |> + Seq.to_list |> Abi.Blk.with_insns b) else E.failf "In Abi_loadopt: function $%s is not in SSA form" (Abi.Func.name fn) () From a287d64061d7fb402b5981af9f00c1ad5af1007a Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 12:44:12 -0500 Subject: [PATCH 14/62] simple slot coalescing --- .../passes/coalesce_slots/coalesce_slots.ml | 88 ++++++++ .../coalesce_slots/coalesce_slots_impl.ml | 191 ++++++++++++++++++ src/lib/passes/passes.ml | 8 +- src/lib/passes/passes.mli | 10 + src/lib/passes/sroa/sroa.ml | 104 +--------- src/lib/passes/sroa_coalesce_common.ml | 94 +++++++++ src/lib/scalars.ml | 11 +- src/test/data/opt/gcdext.vir.opt.sysv | 19 +- src/test/data/opt/sumphi.vir.opt.sysv | 27 +-- .../opt/sumphi.vir.opt.sysv.amd64.regalloc | 23 +-- src/test/data/opt/unref.vir.opt.sysv | 27 +-- .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 20 +- 12 files changed, 450 insertions(+), 172 deletions(-) create mode 100644 src/lib/passes/coalesce_slots/coalesce_slots.ml create mode 100644 src/lib/passes/coalesce_slots/coalesce_slots_impl.ml create mode 100644 src/lib/passes/sroa_coalesce_common.ml diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml new file mode 100644 index 00000000..23b790dc --- /dev/null +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -0,0 +1,88 @@ +open Core +open Monads.Std +open Regular.Std +open Virtual +open Scalars +open Coalesce_slots_impl +open Sroa_coalesce_common + +module E = Monad.Result.Error + +module V = Make(struct + module Insn = struct + type t = Insn.t + type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op + let label = Insn.label + let lhs = var_set_of_option @. Insn.lhs_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let fv = Insn.free_vars_of_op + let escapes = (escapes is_named fv :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + module Ctrl = struct + type t = ctrl + let free_vars = Ctrl.free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg + end) + +module A = Make(struct + open Abi + module Insn = struct + type t = Insn.t + type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op + let label = Insn.label + let lhs = Insn.def_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let fv = Insn.free_vars_of_op + let escapes = (escapes (const false) fv :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + module Ctrl = struct + type t = ctrl + let free_vars = Ctrl.free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg + end) + +open E.Let + +let check_ssa msg n d = + if Dict.mem d Tags.ssa then Ok () + else E.failf "In Coalesce_slots%s: function $%s is not in SSA form" msg n () + +let run fn = + let+ () = check_ssa "" (Func.name fn) (Func.dict fn) in + let subst = V.run fn in + if Map.is_empty subst then fn else + Func.map_blks fn ~f:(fun b -> + let insns, ctrl = Subst_mapper.map_blk subst b in + Blk.with_ctrl (Blk.with_insns b insns) ctrl) + +let run_abi fn = + let+ () = check_ssa " (ABI)" (Abi.Func.name fn) (Abi.Func.dict fn) in + let subst = A.run fn in + if Map.is_empty subst then fn else + Abi.Func.map_blks fn ~f:(fun b -> + let insns, ctrl = Subst_mapper_abi.map_blk subst b in + Abi.Blk.with_ctrl (Abi.Blk.with_insns b insns) ctrl) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml new file mode 100644 index 00000000..94dd67e6 --- /dev/null +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -0,0 +1,191 @@ +open Core +open Regular.Std +open Graphlib.Std +open Scalars + +module Slot = Virtual.Slot +module Allen = Allen_interval_algebra + +let debug = false + +type range = { + lo : int; + hi : int; +} [@@deriving compare, equal, sexp] + +module Range = struct + type t = range [@@deriving compare, equal, sexp] + + let pp ppf r = Format.fprintf ppf "[%d, %d]" r.lo r.hi + + let bad = {lo = Int.min_value; hi = Int.max_value} + let is_bad = equal bad + + let singleton n = {lo = n; hi = n} + + (* Extend the upper-bound on the live range. *) + let use r n = {r with hi = Int.max r.hi n} + + (* Shrink the lower-bound on the live range. + + Also, a defintion counts as a use, because we need to + reference the slot, so extend the upper-bound. + *) + let def r n = { + lo = Int.min r.lo n; + hi = Int.max r.hi n; + } + + module Algebra = Allen.Make(struct + type point = int + type nonrec t = t + let lo t = t.lo [@@inline] + let hi t = t.hi [@@inline] + include Int.Replace_polymorphic_compare + end) +end + +let slot_sa slots x = + let sx = Map.find_exn slots x in + Slot.(size sx, align sx) + +let compat_size_align slots x y = + let sx, ax = slot_sa slots x in + let sy, ay = slot_sa slots y in + (* The smaller slot must not have a higher alignment. *) + not ((sx < sy && ax > ay) || (sy < sx && ay > ax)) + +(* Find compatible slots. Most importantly, their live ranges must + not interfere. *) +let equiv_range slots rs x y = + compat_size_align slots x y && + let rx = Map.find_exn rs x in + let ry = Map.find_exn rs y in + let a : Allen.t = Range.Algebra.relate rx ry in + if debug then + Format.eprintf "%a, %a: %a\n%!" + Var.pp x Var.pp y Sexp.pp (Allen.sexp_of_t a); + match a with + | Before | After -> true + | _ -> false + +let non_interfering slots rs = + Map.to_sequence rs |> + (* The results will still be correct if we omit this, but + it is more efficient to just not consider them at all. *) + Seq.filter ~f:(not @. Range.is_bad @. snd) |> + Seq.map ~f:fst |> + Var.Set.of_sequence |> + Partition.trivial |> + Partition.refine ~cmp:Var.compare ~equiv:(equiv_range slots rs) + +(* invariant: a group is never empty *) +let canon_elt slots g = + Group.enum g |> Seq.max_elt ~compare:(fun x y -> + (* Assuming that the sizes and alignments are compatible, + just pick the biggest one. *) + let sx, ax = slot_sa slots x in + let sy, ay = slot_sa slots y in + match Int.compare sy sx with + | 0 -> Int.compare ay ax + | c -> c) |> + Option.value_exn + +let make_subst slots p = + Partition.groups p |> + Seq.fold ~init:Var.Map.empty ~f:(fun init g -> + let canon = canon_elt slots g in + Group.enum g |> Seq.filter ~f:(not @. Var.equal canon) |> + Seq.fold ~init ~f:(fun acc x -> Map.set acc ~key:x ~data:(`var canon))) + +module Make(M : Scalars.L) = struct + open M + + module Analysis = Scalars.Make(M) + + let mkdef s x n = Map.update s x ~f:(function + | None -> Range.singleton n + | Some r -> Range.def r n) + + let mkuse s x n = Map.change s x ~f:(function + | Some r -> Some (Range.use r n) + | None -> None) + + let update acc s x n def = match Map.find s x with + | Some Offset (base, _) -> + if def then mkdef acc base n else mkuse acc base n + | Some Top -> Map.set acc ~key:x ~data:Range.bad + | None -> acc + + let liveness_insn acc s ip i = + let op = Insn.op i in + match Insn.load_or_store_to op with + | Some (ptr, _, ldst) -> + update acc s ptr ip @@ is_store ldst + | None -> match Insn.offset op with + | Some (ptr, _) -> + update acc s ptr ip false + | None -> match Insn.copy_of op with + | Some x -> update acc s x ip false + | None -> acc + + let liveness_ctrl acc s ip c = + Ctrl.free_vars c |> Set.fold ~init:acc + ~f:(fun acc x -> update acc s x ip false) + + let liveness cfg blks slots (s : solution) = + let ip = ref 0 in + let nums = Vec.create () in + let acc = + Graphlib.reverse_postorder_traverse + (module Cfg) ~start:Label.pseudoentry cfg |> + Seq.fold ~init:Var.Map.empty ~f:(fun acc l -> + match Label.Tree.find blks l with + | None -> acc + | Some b -> + let s = ref @@ Solution.get s l in + let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i -> + let op = Insn.op i in + let acc = liveness_insn acc !s !ip i in + Vec.push nums (Insn.label i); + s := Analysis.transfer_op slots !s op; + incr ip; + acc) in + let acc = liveness_ctrl acc !s !ip @@ Blk.ctrl b in + Vec.push nums l; + incr ip; + acc) in + acc, nums + + let run fn = + let slots = Analysis.collect_slots fn in + if Map.is_empty slots then Var.Map.empty else + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let s = Analysis.analyze ~cfg ~blks slots fn in + let rs, nums = liveness cfg blks slots s in + if debug then + Map.iter_keys slots ~f:(fun x -> + match Map.find rs x with + | None -> + Format.eprintf "%a: dead\n%!" Var.pp x + | Some r when Range.is_bad r -> + Format.eprintf "%a: top\n%!" Var.pp x + | Some r -> + Format.eprintf "%a: %a (%a to %a)\n%!" Var.pp x Range.pp r + Label.pp (Vec.get_exn nums r.lo) + Label.pp (Vec.get_exn nums r.hi) + ); + let p = non_interfering slots rs in + if debug then + Partition.groups p |> Seq.iter ~f:(fun g -> + Format.eprintf "%a\n%!" (Group.pp Var.pp) g + ); + (* TODO: detect singleton ranges: these should be dead stores *) + let subst = make_subst slots p in + if debug then + Map.iteri subst ~f:(fun ~key ~data -> + Format.eprintf "%a => %a\n%!" + Var.pp key Virtual.pp_operand data); + subst +end diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index 6d5279d4..f74244c7 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -3,6 +3,7 @@ open Virtual open Context.Syntax module Abi_loadopt = Abi_loadopt +module Coalesce_slots = Coalesce_slots module Egraph_opt = Egraph_opt module Lower_abi = Lower_abi module Promote_slots = Promote_slots @@ -26,9 +27,10 @@ let retype tenv m = let optimize tenv m = let module Cv = Context.Virtual in - let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in + let*? m = Module.map_funs_err m ~f:Coalesce_slots.run in + let*? m = Module.map_funs_err m ~f:Resolve_constant_blk_args.run in let*? m = Module.map_funs_err m ~f:Remove_dead_vars.run in - let*? tenv = retype tenv m in + let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in let*? m = Module.map_funs_err m ~f:Promote_slots.run in let*? tenv = retype tenv m in let*? m = Module.map_funs_err m ~f:(Sccp.run tenv) in @@ -60,7 +62,9 @@ let to_abi tenv m = ~data:(Seq.to_list @@ Module.data m) let optimize_abi m = + let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in let* m = Context.Virtual.Module.map_funs_abi m ~f:Sroa.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Promote_slots.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in diff --git a/src/lib/passes/passes.mli b/src/lib/passes/passes.mli index b81867a7..fdc57fba 100644 --- a/src/lib/passes/passes.mli +++ b/src/lib/passes/passes.mli @@ -64,6 +64,16 @@ module Abi_loadopt : sig val run : Abi.func -> Abi.func Or_error.t end +(** Attempts to coalesce slots of compatible size and alignment which + do not interfere (i.e. are never live at the same time). + + Assumes the function is in SSA form. +*) +module Coalesce_slots : sig + val run : func -> func Or_error.t + val run_abi : Abi.func -> Abi.func Or_error.t +end + (** Uses the [Egraph] module to perform a variety of optimizations to a function. diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index 957056c6..ef1eb408 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -3,99 +3,7 @@ open Regular.Std open Virtual open Scalars open Sroa_impl - -let var_set_of_option = function - | Some x -> Var.Set.singleton x - | None -> Var.Set.empty - -(* Instructions that can produce a scalar `Offset` value. *) -let offset = function - | `bop (_, `add _, `var x, `int (i, _)) - | `bop (_, `add _, `int (i, _), `var x) -> - Some (x, Bv.to_int64 i) - | `bop (_, `sub _, `var x, `int (i, _)) -> - Some (x, Int64.neg @@ Bv.to_int64 i) - | _ -> None - -(* Instructions that behave like `copy` instead of causing - a variable to "escape". - - TODO: more cases? -*) -let copy_of = function - | `uop (_, `copy _, `var x) -> Some x - | `bop (_, `add _, `var x, `int (i, _)) - | `bop (_, `add _, `int (i, _), `var x) - | `bop (_, `sub _, `var x, `int (i, _)) - when Bv.(i = zero) -> Some x - | _ -> None - -let escapes mty fv = function - | `store (ty, `var x, `var y) when mty ty -> Var.Set.of_list [x; y] - | `store (_, `var x, _) -> Var.Set.singleton x - | `load (_, ty, `var x) when mty ty -> Var.Set.singleton x - | `load _ -> Var.Set.empty - | o when Option.is_some (offset o) -> Var.Set.empty - | o when Option.is_some (copy_of o) -> Var.Set.empty - | o -> fv o -[@@specialise] - -let load_or_store_to = function - | `load (_, (#Type.basic as b), `var x) -> Some (x, b, Load) - | `store ((#Type.basic as b), _, `var x) -> Some (x, b, Store) - | _ -> None - -let replace_load_or_store_addr a = function - | `load (x, (#Type.basic as b), _) -> `load (x, b, `var a) - | `store ((#Type.basic as b), v, _) -> `store (b, v, `var a) - | op -> op - -let add x ty b o = - assert Int64.(o > 0L); - let i = Bv.(int64 o mod modulus (Type.sizeof_imm_base ty)) in - `bop (x, `add (ty :> Type.basic), `var b, `int (i, (ty :> Type.imm))) - -let is_named = function - | `name _ -> true - | _ -> false - -let local l args = l, List.filter_map args ~f:var_of_operand - -let table enum d ds tbl = - enum tbl |> Seq.map ~f:snd |> - Seq.map ~f:(fun (`label (l, args)) -> local l args) |> - Seq.to_list |> List.cons (local d ds) - -let locals enum = function - | `hlt -> [] - | `jmp #global -> [] - | `jmp `label (l, args) -> - [local l args] - | `br (_, #global, #global) -> [] - | `br (_, `label (y, ys), #global) -> - [local y ys] - | `br (_, #global, `label (n, ns)) -> - [local n ns] - | `br (_, `label (y, ys), `label (n, ns)) -> - [local y ys; local n ns] - | `ret _ -> [] - | `sw (_, _, `label (d, ds), tbl) -> - table enum d ds tbl - -(* XXX: we don't yet have a story for how to handle block - parameters. *) -let escapes_ctrl fv = function - | `hlt -> Var.Set.empty - | `jmp `var x -> Var.Set.singleton x - | `jmp _ -> Var.Set.empty - | `br (c, `var y, `var n) -> Var.Set.of_list [c; y; n] - | `br (c, _, `var n) -> Var.Set.of_list [c; n] - | `br (c, `var y, _) -> Var.Set.of_list [c; y] - | `br (c, _, _) -> Var.Set.singleton c - | `ret _ as c -> fv c - | `sw (_, `var i, _, _) -> Var.Set.singleton i - | `sw _ -> Var.Set.empty -[@@ocaml.warning "-32"] +open Sroa_coalesce_common module V = Make(struct module Insn = struct @@ -116,9 +24,8 @@ module V = Make(struct end module Ctrl = struct type t = ctrl - let fv = Ctrl.free_vars - let escapes = fv - (* let escapes = (escapes_ctrl fv :> t -> _) *) + let free_vars = Ctrl.free_vars + let escapes = free_vars let locals = (locals Ctrl.Table.enum :> t -> _) end module Blk = Blk @@ -146,9 +53,8 @@ module A = Make(struct end module Ctrl = struct type t = ctrl - let fv = Ctrl.free_vars - let escapes = fv - (* let escapes = (escapes_ctrl fv :> t -> _) *) + let free_vars = Ctrl.free_vars + let escapes = free_vars let locals = (locals Ctrl.Table.enum :> t -> _) end module Blk = Blk diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml new file mode 100644 index 00000000..6c9baf32 --- /dev/null +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -0,0 +1,94 @@ +open Core +open Regular.Std +open Virtual +open Scalars + +let var_set_of_option = function + | Some x -> Var.Set.singleton x + | None -> Var.Set.empty + +(* Instructions that can produce a scalar `Offset` value. *) +let offset = function + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) -> + Some (x, Bv.to_int64 i) + | `bop (_, `sub _, `var x, `int (i, _)) -> + Some (x, Int64.neg @@ Bv.to_int64 i) + | _ -> None + +(* Instructions that behave like `copy` instead of causing + a variable to "escape". + + TODO: more cases? +*) +let copy_of = function + | `uop (_, `copy _, `var x) -> Some x + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) + | `bop (_, `sub _, `var x, `int (i, _)) + when Bv.(i = zero) -> Some x + | _ -> None + +let escapes mty fv = function + | `store (ty, `var x, `var y) when mty ty -> Var.Set.of_list [x; y] + | `store (_, `var x, _) -> Var.Set.singleton x + | `load (_, ty, `var x) when mty ty -> Var.Set.singleton x + | `load _ -> Var.Set.empty + | o when Option.is_some (offset o) -> Var.Set.empty + | o when Option.is_some (copy_of o) -> Var.Set.empty + | o -> fv o +[@@specialise] + +let load_or_store_to = function + | `load (_, (#Type.basic as b), `var x) -> Some (x, b, Load) + | `store ((#Type.basic as b), _, `var x) -> Some (x, b, Store) + | _ -> None + +let replace_load_or_store_addr a = function + | `load (x, (#Type.basic as b), _) -> `load (x, b, `var a) + | `store ((#Type.basic as b), v, _) -> `store (b, v, `var a) + | op -> op + +let add x ty b o = + assert Int64.(o > 0L); + let i = Bv.(int64 o mod modulus (Type.sizeof_imm_base ty)) in + `bop (x, `add (ty :> Type.basic), `var b, `int (i, (ty :> Type.imm))) + +let is_named = function + | `name _ -> true + | _ -> false + +let local l args = l, List.filter_map args ~f:var_of_operand + +let table enum d ds tbl = + enum tbl |> Seq.map ~f:snd |> + Seq.map ~f:(fun (`label (l, args)) -> local l args) |> + Seq.to_list |> List.cons (local d ds) + +let locals enum = function + | `hlt -> [] + | `jmp #global -> [] + | `jmp `label (l, args) -> + [local l args] + | `br (_, #global, #global) -> [] + | `br (_, `label (y, ys), #global) -> + [local y ys] + | `br (_, #global, `label (n, ns)) -> + [local n ns] + | `br (_, `label (y, ys), `label (n, ns)) -> + [local y ys; local n ns] + | `ret _ -> [] + | `sw (_, _, `label (d, ds), tbl) -> + table enum d ds tbl + +let escapes_ctrl fv = function + | `hlt -> Var.Set.empty + | `jmp `var x -> Var.Set.singleton x + | `jmp _ -> Var.Set.empty + | `br (c, `var y, `var n) -> Var.Set.of_list [c; y; n] + | `br (c, _, `var n) -> Var.Set.of_list [c; n] + | `br (c, `var y, _) -> Var.Set.of_list [c; y] + | `br (c, _, _) -> Var.Set.singleton c + | `ret _ as c -> fv c + | `sw (_, `var i, _, _) -> Var.Set.singleton i + | `sw _ -> Var.Set.empty diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index 38ac8bf5..80a09e41 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -129,6 +129,7 @@ module type L = sig module Ctrl : sig type t + val free_vars : t -> Var.Set.t val escapes : t -> Var.Set.t val locals : t -> (Label.t * Var.t list) list end @@ -226,9 +227,13 @@ module Make(M : L) = struct Map.set acc ~key:(Virtual.Slot.var s) ~data:s) (* Run the dataflow analysis. *) - let analyze slots fn : solution = - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in + let analyze ?cfg ?blks slots fn : solution = + let cfg = match cfg with + | None -> Cfg.create fn + | Some cfg -> cfg in + let blks = match blks with + | None -> Func.map_of_blks fn + | Some blks -> blks in Graphlib.fixpoint (module Cfg) cfg ~init:(initialize slots blks) ~start:Label.pseudoentry diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv index bf24853a..8ae9b8e9 100644 --- a/src/test/data/opt/gcdext.vir.opt.sysv +++ b/src/test/data/opt/gcdext.vir.opt.sysv @@ -3,8 +3,6 @@ module gcdext export function $gcd(w %a/rdi, w %b/rsi) { %res = slot 16, align 8 %r = slot 16, align 8 - %13 = slot 16, align 8 - %18 = slot 16, align 8 @2: %1.1 = eq.w %a, 0x0_w ; @30 %2.1 = add.l %res, 0x8_l ; @31 @@ -15,11 +13,8 @@ export function $gcd(w %a/rdi, w %b/rsi) { st.w 0x0_w, %3.1 ; @8 st.w 0x1_w, %2.1 ; @10 %19.1 = ld.l %res ; @49 - st.l %19.1, %18 ; @50 %21.1 = ld.l %2.1 ; @52 - %22.1 = add.l %18, 0x8_l ; @53 - st.l %21.1, %22.1 ; @54 - jmp @29(%18) + jmp @29(%21.1, %19.1) @4: %m.1 = rem.w %b, %a ; @12 %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 @@ -37,14 +32,8 @@ export function $gcd(w %a/rdi, w %b/rsi) { st.w %7.1, %3.1 ; @25 st.w %rx.1, %2.1 ; @27 %14.1 = ld.l %res ; @43 - st.l %14.1, %13 ; @44 %16.1 = ld.l %2.1 ; @46 - %17.1 = add.l %13, 0x8_l ; @47 - st.l %16.1, %17.1 ; @48 - jmp @29(%13) -@29(%0.1): - %23.1 = ld.l %0.1 ; @55 - %24.1 = add.l %0.1, 0x8_l ; @56 - %25.1 = ld.l %24.1 ; @57 - ret rax/%23.1, rdx/%25.1 + jmp @29(%16.1, %14.1) +@29(%30.1, %29.1): + ret rax/%29.1, rdx/%30.1 } diff --git a/src/test/data/opt/sumphi.vir.opt.sysv b/src/test/data/opt/sumphi.vir.opt.sysv index 30bdfc89..815a23ba 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv +++ b/src/test/data/opt/sumphi.vir.opt.sysv @@ -1,33 +1,28 @@ module sumphi function $sum(l %s/rdi) { - %p = slot 8, align 8 + %3 = slot 8, align 8 @2: - st.l %s, %p ; @24 - %a.1 = ld.w %p ; @4 - %0.1 = add.l %p, 0x4_l ; @19 - %b.1 = ld.w %0.1 ; @6 - %1.1 = add.w %a.1, %b.1 ; @20 - ret rax/%1.1 + st.l %s, %3 ; @22 + %a.1 = ld.w %3 ; @4 + %0 = add.l %3, 0x4_l ; @19 + %b.1 = ld.w %0 ; @6 + %1 = add.w %a.1, %b.1 ; @20 + ret rax/%1 } export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) { %ra = slot 8, align 8 - %9 = slot 8, align 8 - %11 = slot 8, align 8 @8: st.l %a, %ra ; @28 %2.1 = slt.w %x, 0x0_w ; @21 br %2.1, @9, @10 @9: - st.l %a, %11 ; @34 - jmp @14(%11) + jmp @14(%a) @10: - st.l %b, %9 ; @32 - jmp @14(%9) -@14(%u.1): + jmp @14(%b) +@14(%6.3): st.w 0x5_w, %ra ; @17 - %13.1 = ld.l %u.1 ; @35 - %s.1/w/rax = call $sum(%13.1/rdi) ; @18 + %s.1/w/rax = call $sum(%6.3/rdi) ; @18 ret rax/%s.1 } diff --git a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc index 2ca3471b..1b8ae6d6 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc @@ -12,23 +12,18 @@ function $sum { ; returns: rax export function $sumphi { ; returns: rax @8: - push rbp ; @59 - mov rbp, rsp ; @60 - sub rsp, 0x20_l ; @61 - mov qword ptr [rbp - 0x20], rdi ; @54 + push rbp ; @57 + mov rbp, rsp ; @58 + sub rsp, 0x10_l ; @59 + mov qword ptr [rbp - 0x10], rdi ; @54 test edx, edx ; @47 - js @9 ; @48 -@10: - mov qword ptr [rbp - 0x18], rsi ; @32 - lea rax, qword ptr [rbp - 0x18] ; @45 - jmp @14 ; @46 + jns @14 ; @48 @9: - mov qword ptr [rbp - 0x10], rdi ; @34 - lea rax, qword ptr [rbp - 0x10] ; @43 + mov rsi, rdi ; @43 @14: - mov dword ptr [rbp - 0x20], 0x5_w ; @17 - mov rdi, qword ptr [rax] ; @35 + mov dword ptr [rbp - 0x10], 0x5_w ; @17 + mov rdi, rsi ; @18 call $sum ; rdi ; @41 - leave ; @62 + leave ; @60 ret ; @40 } diff --git a/src/test/data/opt/unref.vir.opt.sysv b/src/test/data/opt/unref.vir.opt.sysv index f92df03c..8ef5e49d 100644 --- a/src/test/data/opt/unref.vir.opt.sysv +++ b/src/test/data/opt/unref.vir.opt.sysv @@ -1,14 +1,14 @@ module unref function $sum(l %s/rdi) { - %p = slot 8, align 8 + %3 = slot 8, align 8 @2: - st.l %s, %p ; @24 - %a.1 = ld.w %p ; @4 - %0.1 = add.l %p, 0x4_l ; @19 - %b.1 = ld.w %0.1 ; @6 - %1.1 = add.w %a.1, %b.1 ; @20 - ret rax/%1.1 + st.l %s, %3 ; @22 + %a.1 = ld.w %3 ; @4 + %0 = add.l %3, 0x4_l ; @19 + %b.1 = ld.w %0 ; @6 + %1 = add.w %a.1, %b.1 ; @20 + ret rax/%1 } export function $sump(l %p/rdi) { @@ -19,13 +19,14 @@ export function $sump(l %p/rdi) { } export function $mkt(w %a/rdi, w %b/rsi) { - %r = slot 8, align 8 + %8 = slot 8, align 8 @11: - st.w %a, %r ; @12 - %2.1 = add.l %r, 0x4_l ; @21 - st.w %b, %2.1 ; @14 - %9.1 = ld.l %r ; @28 - ret rax/%9.1 + st.w %a, %8 ; @12 + %2 = add.l %8, 0x4_l ; @21 + st.w %b, %2 ; @14 + %9 = ld.l %8 ; @28 + st.l %9, %8 ; @29 + ret rax/%9 } export function $sumt(w %a/rdi, w %b/rsi) { diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index 0fcdf52b..df70e6e4 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -37,29 +37,29 @@ export function $foo { ; returns: rax cmp edx, 0x28_w ; @104 jbe @64 ; @105 @65: - mov rax, qword ptr [rbp - 0x28] ; @83 - lea rcx, qword ptr [rax + 0x8] ; @84 - lea rdx, qword ptr [rax + 0x10] ; @85 + mov rcx, qword ptr [rbp - 0x28] ; @83 + lea rax, qword ptr [rcx + 0x8] ; @84 + lea rdx, qword ptr [rcx + 0x10] ; @85 mov qword ptr [rbp - 0x28], rdx ; @86 jmp @66 ; @103 @64: - lea rax, qword ptr [rbp + rsi*1 - 0xe0] ; @75 - lea rcx, qword ptr [rbp + rdx*1 - 0xe0] ; @77 + lea rcx, qword ptr [rbp + rsi*1 - 0xe0] ; @75 + lea rax, qword ptr [rbp + rdx*1 - 0xe0] ; @77 add esi, 0x10_w ; @78 add edx, 0x8_w ; @79 mov dword ptr [rbp - 0x2c], esi ; @80 mov dword ptr [rbp - 0x30], edx ; @81 @66: - mov rax, qword ptr [rax] ; @87 - mov rcx, qword ptr [rcx] ; @89 + mov rcx, qword ptr [rcx] ; @87 + mov rdx, qword ptr [rax] ; @89 @61: - mov qword ptr [rbp - 0x18], rax ; @48 - mov qword ptr [rbp - 0x10], rcx ; @52 + mov qword ptr [rbp - 0x18], rcx ; @48 + mov qword ptr [rbp - 0x10], rdx ; @52 movsd xmm0, qword ptr [rbp - 0x18] ; @6 addsd xmm0, qword ptr [rip + @94] ; @95 .fp64 @94, 1.234 ; @96 cvtsd2si rax, xmm0 ; @15 - add rax, rcx ; @16 + add rax, rdx ; @16 add rax, rdi ; @17 leave ; @127 ret ; @93 From c049aec1c58952b37d032eac7a85b261b2d27f10 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 13:56:48 -0500 Subject: [PATCH 15/62] Use the `logs` package for debug logging `fmt` is also required --- src/bin/cli.ml | 19 +++++- src/bin/dune | 2 +- src/cgen.opam | 2 + src/dune-project | 2 + src/lib/dune | 1 + .../coalesce_slots/coalesce_slots_impl.ml | 46 ++++++------- .../promote_slots/promote_slots_impl.ml | 14 ++-- .../remove_dead_vars/remove_dead_vars_impl.ml | 6 +- src/lib/passes/sroa/sroa_impl.ml | 66 +++++++++---------- 9 files changed, 86 insertions(+), 72 deletions(-) diff --git a/src/bin/cli.ml b/src/bin/cli.ml index 0c8510b7..0368c51b 100644 --- a/src/bin/cli.ml +++ b/src/bin/cli.ml @@ -134,7 +134,14 @@ type t = { target : Cgen.Target.t; } -let go f file output dump nc target = +let log_env = Cmd.Env.info "CGEN_LOG" + +let setup_log level = + Logs.set_level level; + Logs.set_reporter @@ Logs_fmt.reporter () + +let go f file output dump nc target log_level = + setup_log log_level; let file = match file with | "" -> Istdin | _ -> Ifile file in @@ -149,7 +156,15 @@ let go f file output dump nc target = | Some t -> t in f {file; output; dump; nc; target} -let t f = Term.(const (go f) $ file $ output $ dump $ dump_no_comment $ target) +let t f = + let open Term in + const (go f) $ + file $ + output $ + dump $ + dump_no_comment $ + target $ + Logs_cli.level ~env:log_env () let man = List.concat [ man_dump; diff --git a/src/bin/dune b/src/bin/dune index 879350de..625e4056 100644 --- a/src/bin/dune +++ b/src/bin/dune @@ -2,7 +2,7 @@ (name cgen_main) (public_name cgen) (package cgen) - (libraries cgen cmdliner) + (libraries cgen cmdliner logs.cli logs.fmt) (modules cgen_main cli) (flags -w -58) (ocamlopt_flags -O2) diff --git a/src/cgen.opam b/src/cgen.opam index 06808bff..58624a0e 100644 --- a/src/cgen.opam +++ b/src/cgen.opam @@ -17,7 +17,9 @@ depends: [ "core" {>= "v0.15"} "core_kernel" {>= "v0.15"} "dune" {>= "3.15"} + "fmt" "graphlib" + "logs" "menhir" "monads" "ocamldiff" diff --git a/src/dune-project b/src/dune-project index d54bc4bb..6fa0f2f1 100644 --- a/src/dune-project +++ b/src/dune-project @@ -23,7 +23,9 @@ (core (>= v0.15)) (core_kernel (>= v0.15)) dune + fmt graphlib + logs menhir monads ocamldiff diff --git a/src/lib/dune b/src/lib/dune index 5b5a17e9..c97d23ca 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -20,6 +20,7 @@ core_kernel.pairing_heap core_kernel.uopt graphlib + logs monads regular zarith) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 94dd67e6..d624654b 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -6,8 +6,6 @@ open Scalars module Slot = Virtual.Slot module Allen = Allen_interval_algebra -let debug = false - type range = { lo : int; hi : int; @@ -62,9 +60,9 @@ let equiv_range slots rs x y = let rx = Map.find_exn rs x in let ry = Map.find_exn rs y in let a : Allen.t = Range.Algebra.relate rx ry in - if debug then - Format.eprintf "%a, %a: %a\n%!" - Var.pp x Var.pp y Sexp.pp (Allen.sexp_of_t a); + Logs.debug (fun m -> + m "%a, %a: %a%!" + Var.pp x Var.pp y Sexp.pp (Allen.sexp_of_t a)); match a with | Before | After -> true | _ -> false @@ -164,28 +162,26 @@ module Make(M : Scalars.L) = struct let blks = Func.map_of_blks fn in let s = Analysis.analyze ~cfg ~blks slots fn in let rs, nums = liveness cfg blks slots s in - if debug then - Map.iter_keys slots ~f:(fun x -> - match Map.find rs x with - | None -> - Format.eprintf "%a: dead\n%!" Var.pp x - | Some r when Range.is_bad r -> - Format.eprintf "%a: top\n%!" Var.pp x - | Some r -> - Format.eprintf "%a: %a (%a to %a)\n%!" Var.pp x Range.pp r - Label.pp (Vec.get_exn nums r.lo) - Label.pp (Vec.get_exn nums r.hi) - ); + Logs.debug (fun m -> + Map.iter_keys slots ~f:(fun x -> + let ppr ppf x = match Map.find rs x with + | None -> Format.fprintf ppf "dead" + | Some r when Range.is_bad r -> + Format.fprintf ppf "top" + | Some r -> + Format.fprintf ppf "%a (%a to %a)" + Range.pp r + Label.pp (Vec.get_exn nums r.lo) + Label.pp (Vec.get_exn nums r.hi) in + m "%a: %a%!" Var.pp x ppr x)); let p = non_interfering slots rs in - if debug then - Partition.groups p |> Seq.iter ~f:(fun g -> - Format.eprintf "%a\n%!" (Group.pp Var.pp) g - ); + Logs.debug (fun m -> + Partition.groups p |> Seq.iter ~f:(fun g -> + m "%a%!" (Group.pp Var.pp) g)); (* TODO: detect singleton ranges: these should be dead stores *) let subst = make_subst slots p in - if debug then - Map.iteri subst ~f:(fun ~key ~data -> - Format.eprintf "%a => %a\n%!" - Var.pp key Virtual.pp_operand data); + Logs.debug (fun m -> + Map.iteri subst ~f:(fun ~key ~data -> + m "coalesce slot: %a => %a%!" Var.pp key Virtual.pp_operand data)); subst end diff --git a/src/lib/passes/promote_slots/promote_slots_impl.ml b/src/lib/passes/promote_slots/promote_slots_impl.ml index 213048c4..7fc41c33 100644 --- a/src/lib/passes/promote_slots/promote_slots_impl.ml +++ b/src/lib/passes/promote_slots/promote_slots_impl.ml @@ -5,8 +5,6 @@ open Virtual module E = Monad.Result.Error -let debug = false - open E.Let module type L = sig @@ -119,17 +117,15 @@ module Make(M : L) = struct Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> match Qualify.go env s with | Bad -> - if debug then - Format.eprintf "slot %a is bad\n%!" Var.pp (Slot.var s); + Logs.debug (fun m -> m "cannot promote %a%!" Var.pp (Slot.var s)); acc | Write (_, t) -> - if debug then - Format.eprintf "promoting %a\n%!" Var.pp (Slot.var s); + Logs.debug (fun m -> m "promoting %a%!" Var.pp (Slot.var s)); Map.set acc ~key:(Slot.var s) ~data:t | Read _ -> - if debug then - Format.eprintf "slot %a is read, but never written to\n%!" - Var.pp (Slot.var s); + Logs.debug (fun m -> + m "slot %a is read, but never written to%!" + Var.pp (Slot.var s)); (* In this case, we read from the slot but never stored anything to it. It's undefined behavior, but it's also what the programmer intended, so we should cancel this promotion. *) diff --git a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml index 9b30ccf6..2ccc71cd 100644 --- a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml +++ b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml @@ -100,11 +100,15 @@ module Make(M : S) = struct | Some x -> i :: acc, changed, alive -- x ++ Insn.free_vars i | None -> i :: acc, changed, alive ++ Insn.free_vars i + let remove_slot fn x = + Logs.debug (fun m -> m "%s: slot %a is dead%!" __FUNCTION__ Var.pp x); + Func.remove_slot fn x + let finalize fn blks live = let ins = Live.ins live @@ Func.entry fn in Func.slots fn |> Seq.map ~f:Slot.var |> Seq.filter ~f:(Fn.non @@ Set.mem ins) |> - Seq.fold ~init:fn ~f:Func.remove_slot |> + Seq.fold ~init:fn ~f:remove_slot |> Fn.flip Func.update_blks' blks let rec run fn blks cfg = diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index ae3738ee..caf31f1c 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -13,7 +13,7 @@ open Regular.Std open Graphlib.Std open Scalars -let debug = false +module Slot = Virtual.Slot module Make(M : Scalars.L) : sig val run : M.Func.t -> M.Func.t Context.t @@ -76,8 +76,8 @@ end = struct | x :: ((y :: _) as xs) -> check x y && ok xs | [] | [_] -> true in let res = ok data in - if debug && not res then - Format.eprintf "filtering out accesses for %a\n%!" Var.pp key; + if not res then + Logs.debug (fun m -> m "filtering out accesses for %a%!" Var.pp key); res) let overlaps oa sa ob sb = @@ -99,7 +99,7 @@ end = struct (* Check if a partition covers the entire slot `s`. *) let is_entire_slot s p = match p.off with - | 0L -> Virtual.Slot.size s = p.size + | 0L -> Slot.size s = p.size | _ -> false let pp_partition ppf p = @@ -161,10 +161,10 @@ end = struct by one or more `ld.w`). If so, this partition could be broken down further if we modify the store instruction(s). *) let* x = Context.Var.fresh in - let*? s = Virtual.Slot.create x ~size:p.size ~align:p.size in - if debug then - Format.eprintf "new slot %a, base=%a, off=0x%Lx, size=%d\n%!" - Var.pp x Var.pp base p.off p.size; + let*? s = Slot.create x ~size:p.size ~align:p.size in + Logs.debug (fun m -> + m "new slot %a, base=%a, off=0x%Lx, size=%d%!" + Var.pp x Var.pp base p.off p.size); !!(Map.set acc ~key:(base, p.off) ~data:s))) (* Find the corresponding partition for [base+off, base+off+size). *) @@ -176,33 +176,28 @@ end = struct (* Exact cover for a scalar at `base + off`. *) let rewrite_insn_exact (m : scalars) i ~exact ~base ~off = let open Context.Syntax in - if debug then - Format.eprintf "exact=0x%Lx, off=0x%Lx, base=%a\n%!" - exact off Var.pp base; + Logs.debug (fun m -> + m "exact=0x%Lx, off=0x%Lx, base=%a%!" + exact off Var.pp base); let op = Insn.op i in let delta = Int64.(off - exact) in match Map.find m (base, exact) with | None -> - if debug then - Format.eprintf "no slot found\n%!"; + Logs.debug (fun m -> m "no slot found%!"); !![i] | Some s when Int64.(delta = 0L) -> - if debug then - Format.eprintf "found slot %a (base)\n%!" - Var.pp (Virtual.Slot.var s); + Logs.debug (fun m -> m "found slot %a (base)%!" Var.pp (Slot.var s)); (* Store to base of new slot. *) - let addr = Virtual.Slot.var s in + let addr = Slot.var s in let op' = Insn.replace_load_or_store_addr addr op in !![Insn.with_op i op'] | Some s -> - if debug then - Format.eprintf "found slot %a (delta 0x%Lx)\n%!" - Var.pp (Virtual.Slot.var s) delta; + Logs.debug (fun m -> m "found slot %a (delta 0x%Lx)%!" Var.pp (Slot.var s) delta); (* Compute offset of new slot and store to it. *) let* l = Context.Label.fresh in let* y = Context.Var.fresh in let+ word = Context.target >>| Target.word in - let a = Insn.add y word (Virtual.Slot.var s) delta in + let a = Insn.add y word (Slot.var s) delta in let op' = Insn.replace_load_or_store_addr y op in [Insn.create ~label:l a; Insn.with_op i op'] @@ -213,20 +208,19 @@ end = struct match Insn.load_or_store_to op with | None -> !![i] | Some (ptr, ty, ldst) -> - if debug then - Format.eprintf "%a: looking at %a.%a to %a\n%!" - Label.pp (Insn.label i) - pp_load_or_store ldst - Type.pp_basic ty - Var.pp ptr; + Logs.debug (fun m -> + m "%a: looking at %a.%a to %a%!" + Label.pp (Insn.label i) + pp_load_or_store ldst + Type.pp_basic ty + Var.pp ptr); match Map.find s ptr with | Some Top | None -> !![i] | Some Offset (base, off) -> match find_partition parts base off @@ basic_size ty with | Some p -> rewrite_insn_exact m i ~exact:p.off ~base ~off | None -> - if debug then - Format.eprintf "no parts found\n%!"; + Logs.debug (fun m -> m "no parts found%!"); !![i] let rewrite_with_partitions slots fn (s : solution) parts m = @@ -250,11 +244,15 @@ end = struct let s = Analysis.analyze slots fn in let accs = collect_accesses slots fn s in let parts = partition_acesses accs in - if debug then - Map.iteri parts ~f:(fun ~key ~data -> - Format.eprintf "partitions for %a:\n%!" Var.pp key; - List.iter data ~f:(fun p -> - Format.eprintf " %a\n%!" pp_partition p)); + Logs.debug (fun m -> + Map.iteri parts ~f:(fun ~key ~data -> + if not @@ List.is_empty data then + m "partitions for %a:\n%a%!" + Var.pp key + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n%!") + (fun ppf a -> Format.fprintf ppf " %a%!" pp_partition a)) + data)); let* m = materialize_partitions slots parts in let fn = insert_new_slots fn m in rewrite_with_partitions slots fn s parts m From fcb355756bbbf8c7484526b313488dec9b1d58f0 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 13:59:54 -0500 Subject: [PATCH 16/62] Fix warnings --- src/lib/egraph/egraph_rewrite.ml | 2 +- src/lib/passes/coalesce_slots/coalesce_slots.ml | 1 - src/lib/passes/sroa/sroa.ml | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lib/egraph/egraph_rewrite.ml b/src/lib/egraph/egraph_rewrite.ml index bd4df29b..0d8b9dc6 100644 --- a/src/lib/egraph/egraph_rewrite.ml +++ b/src/lib/egraph/egraph_rewrite.ml @@ -109,7 +109,7 @@ and optimize ?ty ?l ~d t n id = | U _ -> assert false | N _ when d <= 0 -> id | N _ when Matcher.is_empty t.rules -> id - | N (o, cs) -> + | N _ -> let vm = VM.create () in let rws = {id; budget = t.match_limit} in if VM.init ~lookup:(node t) vm t.rules id then begin diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml index 23b790dc..dfae3457 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -1,6 +1,5 @@ open Core open Monads.Std -open Regular.Std open Virtual open Scalars open Coalesce_slots_impl diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index ef1eb408..b3b2ec1f 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -1,5 +1,4 @@ open Core -open Regular.Std open Virtual open Scalars open Sroa_impl From 70ab27c50e373393f0e7fac11d9ea687bfa1b066 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 14:04:11 -0500 Subject: [PATCH 17/62] Specify build profile with `DEBUG` var --- src/Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index 0708372c..daf688b1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,10 @@ .PHONY: clean install uninstall test doc deps bap-deps +PROFILE = release +ifeq ($(DEBUG),1) + PROFILE = dev +endif + BAP_DEPS = \ bitvec \ bitvec-binprot \ @@ -12,7 +17,7 @@ BAP_DEPS = \ BAP_REPO := git+https://github.com/BinaryAnalysisPlatform/opam-repository\#testing all: - dune build --profile=release + dune build --profile=$(PROFILE) clean: dune clean @@ -24,7 +29,7 @@ uninstall: dune uninstall test: - dune test --profile=release + dune test --profile=$(PROFILE) doc: dune build @doc From d9126ab607376d9e466865c4a1e630ee5e256284 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 14:37:59 -0500 Subject: [PATCH 18/62] Filter out dead stores to slots --- .../passes/coalesce_slots/coalesce_slots.ml | 41 +++++-- .../coalesce_slots/coalesce_slots_impl.ml | 108 ++++++++++++++---- 2 files changed, 115 insertions(+), 34 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml index dfae3457..643ff244 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -1,5 +1,6 @@ open Core open Monads.Std +open Regular.Std open Virtual open Scalars open Coalesce_slots_impl @@ -70,18 +71,38 @@ let check_ssa msg n d = if Dict.mem d Tags.ssa then Ok () else E.failf "In Coalesce_slots%s: function $%s is not in SSA form" msg n () +let no_deads f d is = List.filter is ~f:(not @. Lset.mem d @. f) + let run fn = let+ () = check_ssa "" (Func.name fn) (Func.dict fn) in - let subst = V.run fn in - if Map.is_empty subst then fn else - Func.map_blks fn ~f:(fun b -> - let insns, ctrl = Subst_mapper.map_blk subst b in - Blk.with_ctrl (Blk.with_insns b insns) ctrl) + let t = V.run fn in + if is_empty t then fn else + let f = + if Map.is_empty t.subst then + fun b -> + Blk.insns b |> Seq.to_list |> + no_deads Insn.label t.deads |> + Blk.with_insns b + else + fun b -> + let insns, ctrl = Subst_mapper.map_blk t.subst b in + let insns = no_deads Insn.label t.deads insns in + Blk.with_ctrl (Blk.with_insns b insns) ctrl in + Func.map_blks fn ~f let run_abi fn = let+ () = check_ssa " (ABI)" (Abi.Func.name fn) (Abi.Func.dict fn) in - let subst = A.run fn in - if Map.is_empty subst then fn else - Abi.Func.map_blks fn ~f:(fun b -> - let insns, ctrl = Subst_mapper_abi.map_blk subst b in - Abi.Blk.with_ctrl (Abi.Blk.with_insns b insns) ctrl) + let t = A.run fn in + if is_empty t then fn else + let f = + if Map.is_empty t.subst then + fun b -> + Abi.Blk.insns b |> Seq.to_list |> + no_deads Abi.Insn.label t.deads |> + Abi.Blk.with_insns b + else + fun b -> + let insns, ctrl = Subst_mapper_abi.map_blk t.subst b in + let insns = no_deads Abi.Insn.label t.deads insns in + Abi.Blk.with_ctrl (Abi.Blk.with_insns b insns) ctrl in + Abi.Func.map_blks fn ~f diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index d624654b..6a719282 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -3,26 +3,42 @@ open Regular.Std open Graphlib.Std open Scalars +module Ltree = Label.Tree +module Lset = Label.Tree_set module Slot = Virtual.Slot module Allen = Allen_interval_algebra +type tag = Def | Use | Both [@@deriving compare, equal, sexp] + +let pp_tag ppf = function + | Def -> Format.fprintf ppf "def" + | Use -> Format.fprintf ppf "use" + | Both -> Format.fprintf ppf "both" + +let join_tag a b = if equal_tag a b then a else Both + type range = { lo : int; hi : int; + tg : tag; } [@@deriving compare, equal, sexp] module Range = struct type t = range [@@deriving compare, equal, sexp] - let pp ppf r = Format.fprintf ppf "[%d, %d]" r.lo r.hi + let pp ppf r = Format.fprintf ppf "%a[%d, %d]" pp_tag r.tg r.lo r.hi - let bad = {lo = Int.min_value; hi = Int.max_value} + let bad = {lo = Int.min_value; hi = Int.max_value; tg = Both} let is_bad = equal bad - let singleton n = {lo = n; hi = n} + let singleton n = {lo = n; hi = n; tg = Def} (* Extend the upper-bound on the live range. *) - let use r n = {r with hi = Int.max r.hi n} + let use r n = { + r with + hi = Int.max r.hi n; + tg = join_tag r.tg Use; + } (* Shrink the lower-bound on the live range. @@ -32,6 +48,7 @@ module Range = struct let def r n = { lo = Int.min r.lo n; hi = Int.max r.hi n; + tg = join_tag r.tg Def; } module Algebra = Allen.Make(struct @@ -96,6 +113,19 @@ let make_subst slots p = Group.enum g |> Seq.filter ~f:(not @. Var.equal canon) |> Seq.fold ~init ~f:(fun acc x -> Map.set acc ~key:x ~data:(`var canon))) +type t = { + subst : Subst_mapper.t; (* Map from coalesced to canonical slots *) + deads : Lset.t; (* Stores to dead slots. *) +} + +let empty = { + subst = Var.Map.empty; + deads = Lset.empty; +} + +let is_empty t = + Map.is_empty t.subst && Lset.is_empty t.deads + module Make(M : Scalars.L) = struct open M @@ -137,27 +167,49 @@ module Make(M : Scalars.L) = struct let acc = Graphlib.reverse_postorder_traverse (module Cfg) ~start:Label.pseudoentry cfg |> - Seq.fold ~init:Var.Map.empty ~f:(fun acc l -> - match Label.Tree.find blks l with - | None -> acc - | Some b -> - let s = ref @@ Solution.get s l in - let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i -> - let op = Insn.op i in - let acc = liveness_insn acc !s !ip i in - Vec.push nums (Insn.label i); - s := Analysis.transfer_op slots !s op; - incr ip; - acc) in - let acc = liveness_ctrl acc !s !ip @@ Blk.ctrl b in - Vec.push nums l; - incr ip; - acc) in + Seq.filter_map ~f:(Ltree.find blks) |> + Seq.fold ~init:Var.Map.empty ~f:(fun acc b -> + let l = Blk.label b in + let s = ref @@ Solution.get s l in + let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i -> + let op = Insn.op i in + let acc = liveness_insn acc !s !ip i in + Vec.push nums (Insn.label i); + s := Analysis.transfer_op slots !s op; + incr ip; + acc) in + let acc = liveness_ctrl acc !s !ip @@ Blk.ctrl b in + Vec.push nums l; + incr ip; + acc) in acc, nums + let collect_deads blks slots rs s = + Ltree.fold blks ~init:Lset.empty + ~f:(fun ~key ~data:b init -> + let s = ref @@ Solution.get s key in + Blk.insns b |> Seq.fold ~init ~f:(fun acc i -> + let op = Insn.op i in + let acc = match Insn.load_or_store_to op with + | Some (ptr, _, Store) -> + begin match Map.find !s ptr with + | Some Offset (base, _) -> + begin match Map.find rs base with + | Some {tg = Def; _} -> + (* This slot is only ever stored to, so we can + safely remove it. *) + Lset.add acc @@ Insn.label i + | _ -> acc + end + | _ -> acc + end + | _ -> acc in + s := Analysis.transfer_op slots !s op; + acc)) + let run fn = let slots = Analysis.collect_slots fn in - if Map.is_empty slots then Var.Map.empty else + if Map.is_empty slots then empty else let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in let s = Analysis.analyze ~cfg ~blks slots fn in @@ -178,10 +230,18 @@ module Make(M : Scalars.L) = struct Logs.debug (fun m -> Partition.groups p |> Seq.iter ~f:(fun g -> m "%a%!" (Group.pp Var.pp) g)); - (* TODO: detect singleton ranges: these should be dead stores *) + let deads = collect_deads blks slots rs s in + Logs.debug (fun m -> + if not @@ Lset.is_empty deads then + m "dead stores: %a%!" + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") + Label.pp) + (Lset.to_list deads)); let subst = make_subst slots p in Logs.debug (fun m -> Map.iteri subst ~f:(fun ~key ~data -> - m "coalesce slot: %a => %a%!" Var.pp key Virtual.pp_operand data)); - subst + m "coalesce slot: %a => %a%!" + Var.pp key Virtual.pp_operand data)); + {subst; deads} end From 54dd6aa386783c61df218f022a58d687226ddd31 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 15:17:45 -0500 Subject: [PATCH 19/62] Mark current function in debug logs --- .../coalesce_slots/coalesce_slots_impl.ml | 15 ++--- .../promote_slots/promote_slots_impl.ml | 12 ++-- .../remove_dead_vars/remove_dead_vars_impl.ml | 7 ++- src/lib/passes/sroa/sroa_impl.ml | 55 ++++++++++++------- 4 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 6a719282..b5a8d747 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -78,8 +78,8 @@ let equiv_range slots rs x y = let ry = Map.find_exn rs y in let a : Allen.t = Range.Algebra.relate rx ry in Logs.debug (fun m -> - m "%a, %a: %a%!" - Var.pp x Var.pp y Sexp.pp (Allen.sexp_of_t a)); + m "%s: %a, %a: %a%!" + __FUNCTION__ Var.pp x Var.pp y Sexp.pp (Allen.sexp_of_t a)); match a with | Before | After -> true | _ -> false @@ -225,15 +225,16 @@ module Make(M : Scalars.L) = struct Range.pp r Label.pp (Vec.get_exn nums r.lo) Label.pp (Vec.get_exn nums r.hi) in - m "%a: %a%!" Var.pp x ppr x)); + m "%s: %a: %a%!" __FUNCTION__ Var.pp x ppr x)); let p = non_interfering slots rs in Logs.debug (fun m -> Partition.groups p |> Seq.iter ~f:(fun g -> - m "%a%!" (Group.pp Var.pp) g)); + m "%s: group: %a%!" __FUNCTION__ (Group.pp Var.pp) g)); let deads = collect_deads blks slots rs s in Logs.debug (fun m -> if not @@ Lset.is_empty deads then - m "dead stores: %a%!" + m "%s: dead stores: %a%!" + __FUNCTION__ (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") Label.pp) @@ -241,7 +242,7 @@ module Make(M : Scalars.L) = struct let subst = make_subst slots p in Logs.debug (fun m -> Map.iteri subst ~f:(fun ~key ~data -> - m "coalesce slot: %a => %a%!" - Var.pp key Virtual.pp_operand data)); + m "%s: coalesce slot: %a => %a%!" + __FUNCTION__ Var.pp key Virtual.pp_operand data)); {subst; deads} end diff --git a/src/lib/passes/promote_slots/promote_slots_impl.ml b/src/lib/passes/promote_slots/promote_slots_impl.ml index 7fc41c33..7f7a9b73 100644 --- a/src/lib/passes/promote_slots/promote_slots_impl.ml +++ b/src/lib/passes/promote_slots/promote_slots_impl.ml @@ -117,15 +117,19 @@ module Make(M : L) = struct Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> match Qualify.go env s with | Bad -> - Logs.debug (fun m -> m "cannot promote %a%!" Var.pp (Slot.var s)); + Logs.debug (fun m -> + m "%s: cannot promote %a%!" + __FUNCTION__ Var.pp (Slot.var s)); acc | Write (_, t) -> - Logs.debug (fun m -> m "promoting %a%!" Var.pp (Slot.var s)); + Logs.debug (fun m -> + m "%s: promoting %a%!" + __FUNCTION__ Var.pp (Slot.var s)); Map.set acc ~key:(Slot.var s) ~data:t | Read _ -> Logs.debug (fun m -> - m "slot %a is read, but never written to%!" - Var.pp (Slot.var s)); + m "%s: slot %a is read, but never written to%!" + __FUNCTION__ Var.pp (Slot.var s)); (* In this case, we read from the slot but never stored anything to it. It's undefined behavior, but it's also what the programmer intended, so we should cancel this promotion. *) diff --git a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml index 2ccc71cd..9522becf 100644 --- a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml +++ b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml @@ -16,6 +16,7 @@ module type S = sig module Insn : sig type t + val label : t -> Label.t val check_div_rem : t -> bool val is_effectful : t -> bool val lhs : t -> Var.t option @@ -96,7 +97,11 @@ module Make(M : S) = struct function is in SSA form then keeping in them in the alive set shouldn't affect the results. *) let insn (acc, changed, alive) i = match Insn.lhs i with - | Some x when not @@ keep i x alive -> acc, true, alive + | Some x when not @@ keep i x alive -> + Logs.debug (fun m -> + m "%s: %a: %a is dead%!" __FUNCTION__ + Label.pp (Insn.label i) Var.pp x); + acc, true, alive | Some x -> i :: acc, changed, alive -- x ++ Insn.free_vars i | None -> i :: acc, changed, alive ++ Insn.free_vars i diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index caf31f1c..ff6c40d5 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -77,7 +77,9 @@ end = struct | [] | [_] -> true in let res = ok data in if not res then - Logs.debug (fun m -> m "filtering out accesses for %a%!" Var.pp key); + Logs.debug (fun m -> + m "%s: filtering out accesses for %a%!" + __FUNCTION__ Var.pp key); res) let overlaps oa sa ob sb = @@ -103,7 +105,7 @@ end = struct | _ -> false let pp_partition ppf p = - Format.fprintf ppf "0x%Lx:%d: @[%a@]" + Format.fprintf ppf "0x%Lx:%d: %a" p.off p.size (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") @@ -163,8 +165,8 @@ end = struct let* x = Context.Var.fresh in let*? s = Slot.create x ~size:p.size ~align:p.size in Logs.debug (fun m -> - m "new slot %a, base=%a, off=0x%Lx, size=%d%!" - Var.pp x Var.pp base p.off p.size); + m "%s: new slot %a, base=%a, off=0x%Lx, size=%d%!" + __FUNCTION__ Var.pp x Var.pp base p.off p.size); !!(Map.set acc ~key:(base, p.off) ~data:s))) (* Find the corresponding partition for [base+off, base+off+size). *) @@ -177,22 +179,26 @@ end = struct let rewrite_insn_exact (m : scalars) i ~exact ~base ~off = let open Context.Syntax in Logs.debug (fun m -> - m "exact=0x%Lx, off=0x%Lx, base=%a%!" - exact off Var.pp base); + m "%s: exact=0x%Lx, off=0x%Lx, base=%a%!" + __FUNCTION__ exact off Var.pp base); let op = Insn.op i in let delta = Int64.(off - exact) in match Map.find m (base, exact) with | None -> - Logs.debug (fun m -> m "no slot found%!"); + Logs.debug (fun m -> m "%s: no slot found%!" __FUNCTION__); !![i] | Some s when Int64.(delta = 0L) -> - Logs.debug (fun m -> m "found slot %a (base)%!" Var.pp (Slot.var s)); + Logs.debug (fun m -> + m "%s: found slot %a (base)%!" + __FUNCTION__ Var.pp (Slot.var s)); (* Store to base of new slot. *) let addr = Slot.var s in let op' = Insn.replace_load_or_store_addr addr op in !![Insn.with_op i op'] | Some s -> - Logs.debug (fun m -> m "found slot %a (delta 0x%Lx)%!" Var.pp (Slot.var s) delta); + Logs.debug (fun m -> + m "%s: found slot %a (delta 0x%Lx)%!" + __FUNCTION__ Var.pp (Slot.var s) delta); (* Compute offset of new slot and store to it. *) let* l = Context.Label.fresh in let* y = Context.Var.fresh in @@ -209,7 +215,8 @@ end = struct | None -> !![i] | Some (ptr, ty, ldst) -> Logs.debug (fun m -> - m "%a: looking at %a.%a to %a%!" + m "%s: %a: looking at %a.%a to %a%!" + __FUNCTION__ Label.pp (Insn.label i) pp_load_or_store ldst Type.pp_basic ty @@ -220,7 +227,7 @@ end = struct match find_partition parts base off @@ basic_size ty with | Some p -> rewrite_insn_exact m i ~exact:p.off ~base ~off | None -> - Logs.debug (fun m -> m "no parts found%!"); + Logs.debug (fun m -> m "%s: no parts found%!" __FUNCTION__); !![i] let rewrite_with_partitions slots fn (s : solution) parts m = @@ -237,6 +244,22 @@ end = struct let insert_new_slots fn m = Map.fold m ~init:fn ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data) + let debug_show_parts parts = + if not @@ Map.is_empty parts then + Logs.debug (fun m -> + m "%s: partitions:\n%a%!" + __FUNCTION__ + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + (fun ppf (key, data) -> + Format.fprintf ppf "%a:\n%a" + Var.pp key + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + (fun ppf -> Format.fprintf ppf " %a" pp_partition)) + data)) + (Map.to_alist parts)) + let run fn = let open Context.Syntax in let slots = Analysis.collect_slots fn in @@ -244,15 +267,7 @@ end = struct let s = Analysis.analyze slots fn in let accs = collect_accesses slots fn s in let parts = partition_acesses accs in - Logs.debug (fun m -> - Map.iteri parts ~f:(fun ~key ~data -> - if not @@ List.is_empty data then - m "partitions for %a:\n%a%!" - Var.pp key - (Format.pp_print_list - ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n%!") - (fun ppf a -> Format.fprintf ppf " %a%!" pp_partition a)) - data)); + debug_show_parts parts; let* m = materialize_partitions slots parts in let fn = insert_new_slots fn m in rewrite_with_partitions slots fn s parts m From 4538f39dd4705dd67dd5135fa728203c8cc55622 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 19:34:23 -0500 Subject: [PATCH 20/62] Fixes several bugs 1. SSA renaming would sometimes fail. We should use the `base` of the var to index into the `stk` hashtable. 2. Coalescing would pick the wrong canonical slot. The order of the comparison was backwards, so the resuling code could sometimes segfault when writing out of bounds (because it picked the *smallest* slot). 3. We were a bit too conservative with the `escapes` function on store instructions. This exposed some bugs, but should also open some more optimization opportunity. 4. `Abi_loadopt` now has to do more work with constant folding. --- src/lib/passes/abi_loadopt.ml | 58 ++++++-- .../passes/coalesce_slots/coalesce_slots.ml | 14 +- .../coalesce_slots/coalesce_slots_impl.ml | 7 +- src/lib/passes/passes.ml | 3 + src/lib/passes/sroa/sroa.ml | 14 +- src/lib/passes/sroa_coalesce_common.ml | 2 + src/lib/passes/ssa/ssa_impl_rename.ml | 23 +-- src/lib/scalars.ml | 4 +- src/lib/virtual/abi/abi.ml | 1 + src/lib/virtual/abi/abi_eval.ml | 1 + src/lib/virtual/virtual.mli | 50 +------ src/lib/virtual/virtual_eval_intf.ml | 49 +++++++ src/test/data/opt/gcdext.vir.opt.sysv | 21 ++- src/test/data/opt/sumphi.vir.opt.sysv | 3 - .../opt/sumphi.vir.opt.sysv.amd64.regalloc | 10 +- src/test/data/opt/vaarg1.vir.opt.sysv | 136 +++++++++--------- src/test/data/opt/vaarg1.vir.opt.sysv.amd64 | 68 ++++----- .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 12 +- src/test/data/opt/vaarg2.vir.opt.sysv | 90 +++++------- src/test/data/opt/vasum.vir.opt.sysv | 107 ++++++-------- src/test/data/opt/vasum.vir.opt.sysv.amd64 | 112 +++++++-------- 21 files changed, 416 insertions(+), 369 deletions(-) create mode 100644 src/lib/virtual/abi/abi_eval.ml create mode 100644 src/lib/virtual/virtual_eval_intf.ml diff --git a/src/lib/passes/abi_loadopt.ml b/src/lib/passes/abi_loadopt.ml index c6eccfea..58bf4b09 100644 --- a/src/lib/passes/abi_loadopt.ml +++ b/src/lib/passes/abi_loadopt.ml @@ -215,17 +215,55 @@ module Optimize = struct t.mem <- Some l; `call (xs, f, args) + let eval_bop o a b = match a, b with + | `int (a, _), `int (b, _) -> + (Abi.Eval.binop_int o a b :> const option) + | `float a, `float b -> + (Abi.Eval.binop_single o a b :> const option) + | `double a, `double b -> + (Abi.Eval.binop_double o a b :> const option) + | _ -> None + + let eval_uop o = function + | `int (a, ty) -> + (Abi.Eval.unop_int o a ty :> const option) + | `float a -> + (Abi.Eval.unop_single o a :> const option) + | `double a -> + (Abi.Eval.unop_double o a :> const option) + | _ -> None + + let bop t l x o a b = + let a' = operand t a in + let b' = operand t b in + let op = `bop (x, o, a', b') in + begin match eval_bop o a' b' with + | None -> + let k = Op.of_insn op in + canonicalize t x k; + Op.commute k |> Option.iter ~f:(canonicalize t x) + | Some c -> + Hashtbl.set t.vars ~key:x ~data:(c :> operand); + Hash_set.add t.nop l + end; + op + + let uop t l x o a = + let a' = operand t a in + let op = `uop (x, o, a') in + begin match eval_uop o a' with + | None -> + let k = Op.of_insn op in + canonicalize t x k + | Some c -> + Hashtbl.set t.vars ~key:x ~data:(c :> operand); + Hash_set.add t.nop l + end; + op + let insn t l : Abi.Insn.op -> Abi.Insn.op = function - | `bop (x, o, a, b) -> - let op = `bop (x, o, operand t a, operand t b) in - let k = Op.of_insn op in - canonicalize t x k; - Op.commute k |> Option.iter ~f:(canonicalize t x); - op - | `uop (x, o, a) -> - let op = `uop (x, o, operand t a) in - canonicalize t x @@ Op.of_insn op; - op + | `bop (x, o, a, b) -> bop t l x o a b + | `uop (x, o, a) -> uop t l x o a | `sel (x, ty, c, y, n) -> sel t x ty c y n | `call (xs, f, args) -> call t l xs f args | `store (ty, v, a) -> store t l ty v a diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml index 643ff244..2ad5d08e 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -15,12 +15,15 @@ module V = Make(struct let create ~label op = Insn.create op ~label let with_op = Insn.with_op let op = Insn.op + let special = function + | #Insn.variadic -> true + | _ -> false let label = Insn.label let lhs = var_set_of_option @. Insn.lhs_of_op let offset = (offset :> op -> _) let copy_of = (copy_of :> op -> _) - let fv = Insn.free_vars_of_op - let escapes = (escapes is_named fv :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes is_named free_vars :> op -> _) let load_or_store_to = (load_or_store_to :> op -> _) let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) let add x ty b o = (add x ty b o :> op) @@ -44,12 +47,15 @@ module A = Make(struct let create ~label op = Insn.create op ~label let with_op = Insn.with_op let op = Insn.op + let special = function + | #Insn.extra -> true + | _ -> false let label = Insn.label let lhs = Insn.def_of_op let offset = (offset :> op -> _) let copy_of = (copy_of :> op -> _) - let fv = Insn.free_vars_of_op - let escapes = (escapes (const false) fv :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes (const false) free_vars :> op -> _) let load_or_store_to = (load_or_store_to :> op -> _) let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) let add x ty b o = (add x ty b o :> op) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index b5a8d747..6bb06791 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -101,8 +101,8 @@ let canon_elt slots g = just pick the biggest one. *) let sx, ax = slot_sa slots x in let sy, ay = slot_sa slots y in - match Int.compare sy sx with - | 0 -> Int.compare ay ax + match Int.compare sx sy with + | 0 -> Int.compare ax ay | c -> c) |> Option.value_exn @@ -155,6 +155,9 @@ module Make(M : Scalars.L) = struct update acc s ptr ip false | None -> match Insn.copy_of op with | Some x -> update acc s x ip false + | None when Insn.special op -> + Insn.free_vars op |> Set.fold ~init:acc + ~f:(fun acc x -> Map.set acc ~key:x ~data:Range.bad) | None -> acc let liveness_ctrl acc s ip c = diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index f74244c7..85282098 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -70,6 +70,9 @@ let optimize_abi m = let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in let m = Abi.Module.map_funs m ~f:Remove_disjoint_blks.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in let* () = Context.iter_seq_err (Abi.Module.funs m) ~f:Ssa.check_abi in !!m diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index b3b2ec1f..a045b574 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -11,12 +11,15 @@ module V = Make(struct let create ~label op = Insn.create op ~label let with_op = Insn.with_op let op = Insn.op + let special = function + | #Insn.variadic -> true + | _ -> false let label = Insn.label let lhs = var_set_of_option @. Insn.lhs_of_op let offset = (offset :> op -> _) let copy_of = (copy_of :> op -> _) - let fv = Insn.free_vars_of_op - let escapes = (escapes is_named fv :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes is_named free_vars :> op -> _) let load_or_store_to = (load_or_store_to :> op -> _) let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) let add x ty b o = (add x ty b o :> op) @@ -40,12 +43,15 @@ module A = Make(struct let create ~label op = Insn.create op ~label let with_op = Insn.with_op let op = Insn.op + let special = function + | #Insn.extra -> true + | _ -> false let label = Insn.label let lhs = Insn.def_of_op let offset = (offset :> op -> _) let copy_of = (copy_of :> op -> _) - let fv = Insn.free_vars_of_op - let escapes = (escapes (const false) fv :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes (const false) free_vars :> op -> _) let load_or_store_to = (load_or_store_to :> op -> _) let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) let add x ty b o = (add x ty b o :> op) diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml index 6c9baf32..057f0bb0 100644 --- a/src/lib/passes/sroa_coalesce_common.ml +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -31,7 +31,9 @@ let copy_of = function let escapes mty fv = function | `store (ty, `var x, `var y) when mty ty -> Var.Set.of_list [x; y] + | `store (ty, _, `var y) when mty ty -> Var.Set.of_list [y] | `store (_, `var x, _) -> Var.Set.singleton x + | `store _ -> Var.Set.empty | `load (_, ty, `var x) when mty ty -> Var.Set.singleton x | `load _ -> Var.Set.empty | o when Option.is_some (offset o) -> Var.Set.empty diff --git a/src/lib/passes/ssa/ssa_impl_rename.ml b/src/lib/passes/ssa/ssa_impl_rename.ml index 2b58c832..35e22da7 100644 --- a/src/lib/passes/ssa/ssa_impl_rename.ml +++ b/src/lib/passes/ssa/ssa_impl_rename.ml @@ -15,20 +15,27 @@ end = struct | None -> raise_notrace @@ Missing_blk l | Some _ as b -> b + let fresh_version nums base = + let i = ref 1 in + Hashtbl.update nums base ~f:(function + | None -> !i + | Some n -> + let n = n + 1 in + i := n; + n); + !i + let new_name stk nums x = - let key = Var.base x in - let default = 1 in - let n = ref default in - let upd x = n := x + 1; !n in - Hashtbl.update nums key ~f:(Option.value_map ~default ~f:upd); - let y = Var.with_index x !n in - Hashtbl.add_multi stk ~key ~data:y; + let base = Var.base x in + let idx = fresh_version nums base in + let y = Var.with_index x idx in + Hashtbl.add_multi stk ~key:base ~data:y; y let rename_args stk nums b = Blk.args b |> Seq.map ~f:(new_name stk nums) |> Seq.to_list - let map_var stk x = match Hashtbl.find stk x with + let map_var stk x = match Hashtbl.find stk @@ Var.base x with | Some [] | None -> x | Some (y :: _) -> y diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index 80a09e41..e8dca7dc 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -118,7 +118,9 @@ module type L = sig val lhs : op -> Var.Set.t val offset : op -> scalar option val copy_of : op -> Var.t option + val free_vars : op -> Var.Set.t val escapes : op -> Var.Set.t + val special : op -> bool (* Used during replacement. *) val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option @@ -221,7 +223,7 @@ module Make(M : L) = struct Label.Map.singleton Label.pseudoentry |> Solution.create @< State.empty - (* All slots mapped to their names. *) + (* All slots mapped to their names. *) let collect_slots fn = Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> Map.set acc ~key:(Virtual.Slot.var s) ~data:s) diff --git a/src/lib/virtual/abi/abi.ml b/src/lib/virtual/abi/abi.ml index 5a2a384a..d9114623 100644 --- a/src/lib/virtual/abi/abi.ml +++ b/src/lib/virtual/abi/abi.ml @@ -15,3 +15,4 @@ module Module = Abi_module module Cfg = Abi_cfg module Live = Abi_live module Resolver = Abi_resolver +module Eval = Abi_eval diff --git a/src/lib/virtual/abi/abi_eval.ml b/src/lib/virtual/abi/abi_eval.ml new file mode 100644 index 00000000..5550ec36 --- /dev/null +++ b/src/lib/virtual/abi/abi_eval.ml @@ -0,0 +1 @@ +include Virtual_eval diff --git a/src/lib/virtual/virtual.mli b/src/lib/virtual/virtual.mli index 49a00e3a..d2588ee7 100644 --- a/src/lib/virtual/virtual.mli +++ b/src/lib/virtual/virtual.mli @@ -231,53 +231,7 @@ end type insn = Insn.t [@@deriving bin_io, compare, equal, sexp] (** Evaluation of instructions. *) -module Eval : sig - (** [binop_int o x y] evaluates a binary operator [o] over integers - [x] and [y], and returns the result if it is defined. *) - val binop_int : - Insn.binop -> - Bv.t -> - Bv.t -> - [`bool of bool | `int of Bv.t * Type.imm] option - - (** [binop_single o x y] evaluates a binary operator [o] over 32-bit - floats [x] and [y], and returns the result if it is defined. *) - val binop_single : - Insn.binop -> - Float32.t -> - Float32.t -> - [`bool of bool | `float of Float32.t] option - - (** [binop_double o x y] evaluates a binary operator [o] over 64-bit - floats [x] and [y], and returns the result if it is defined. *) - val binop_double : - Insn.binop -> - float -> - float -> - [`bool of bool | `double of float] option - - (** [unop_int o x ty] evaluates a unary operator [o] over the integer - [x] with type [ty], and returns the result if it is defined. *) - val unop_int : - Insn.unop -> - Bv.t -> - Type.imm -> - [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option - - (** [unop_single o x] evaluates a unary operator [o] over the 32-bit float - [x], and returns the result if it is defined. *) - val unop_single : - Insn.unop -> - Float32.t -> - [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option - - (** [unop_double o x] evaluates a unary operator [o] over the 64-bit float - [x], and returns the result if it is defined. *) - val unop_double : - Insn.unop -> - float -> - [`double of float | `int of Bv.t * Type.imm] option -end +module Eval : Virtual_eval_intf.S (** Control-flow-effectful instructions. *) module Ctrl : Virtual_ctrl_intf.S @@ -711,6 +665,8 @@ module Abi : sig type insn = Insn.t [@@deriving bin_io, compare, equal, sexp] + module Eval : Virtual_eval_intf.S + module Ctrl : Virtual_ctrl_intf.S with type operand := operand and type local := local diff --git a/src/lib/virtual/virtual_eval_intf.ml b/src/lib/virtual/virtual_eval_intf.ml new file mode 100644 index 00000000..9f63915e --- /dev/null +++ b/src/lib/virtual/virtual_eval_intf.ml @@ -0,0 +1,49 @@ +module Insn = Virtual_insn + +module type S = sig + (** [binop_int o x y] evaluates a binary operator [o] over integers + [x] and [y], and returns the result if it is defined. *) + val binop_int : + Insn.binop -> + Bv.t -> + Bv.t -> + [`bool of bool | `int of Bv.t * Type.imm] option + + (** [binop_single o x y] evaluates a binary operator [o] over 32-bit + floats [x] and [y], and returns the result if it is defined. *) + val binop_single : + Insn.binop -> + Float32.t -> + Float32.t -> + [`bool of bool | `float of Float32.t] option + + (** [binop_double o x y] evaluates a binary operator [o] over 64-bit + floats [x] and [y], and returns the result if it is defined. *) + val binop_double : + Insn.binop -> + float -> + float -> + [`bool of bool | `double of float] option + + (** [unop_int o x ty] evaluates a unary operator [o] over the integer + [x] with type [ty], and returns the result if it is defined. *) + val unop_int : + Insn.unop -> + Bv.t -> + Type.imm -> + [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option + + (** [unop_single o x] evaluates a unary operator [o] over the 32-bit float + [x], and returns the result if it is defined. *) + val unop_single : + Insn.unop -> + Float32.t -> + [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option + + (** [unop_double o x] evaluates a unary operator [o] over the 64-bit float + [x], and returns the result if it is defined. *) + val unop_double : + Insn.unop -> + float -> + [`double of float | `int of Bv.t * Type.imm] option +end diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv index 8ae9b8e9..1de15d3b 100644 --- a/src/test/data/opt/gcdext.vir.opt.sysv +++ b/src/test/data/opt/gcdext.vir.opt.sysv @@ -1,37 +1,34 @@ module gcdext export function $gcd(w %a/rdi, w %b/rsi) { - %res = slot 16, align 8 %r = slot 16, align 8 @2: %1.1 = eq.w %a, 0x0_w ; @30 - %2.1 = add.l %res, 0x8_l ; @31 - %3.1 = add.l %res, 0x4_l ; @32 + %2.1 = add.l %r, 0x8_l ; @31 + %3.1 = add.l %r, 0x4_l ; @32 br %1.1, @3, @4 @3: - st.w %b, %res ; @6 + st.w %b, %r ; @6 st.w 0x0_w, %3.1 ; @8 st.w 0x1_w, %2.1 ; @10 - %19.1 = ld.l %res ; @49 + %19.1 = ld.l %r ; @49 %21.1 = ld.l %2.1 ; @52 jmp @29(%21.1, %19.1) @4: %m.1 = rem.w %b, %a ; @12 %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 st.l %27.1, %r ; @38 - %12.1 = add.l %r, 0x8_l ; @41 - st.l %28.1, %12.1 ; @42 + st.l %28.1, %2.1 ; @42 %rg.1 = ld.w %r ; @15 - %4.1 = add.l %r, 0x4_l ; @33 - %rx.1 = ld.w %4.1 ; @17 - %ry.1 = ld.w %12.1 ; @19 - st.w %rg.1, %res ; @20 + %rx.1 = ld.w %3.1 ; @17 + %ry.1 = ld.w %2.1 ; @19 + st.w %rg.1, %r ; @20 %nx.1 = div.w %b, %a ; @21 %6.1 = mul.w %nx.1, %rx.1 ; @35 %7.1 = sub.w %ry.1, %6.1 ; @36 st.w %7.1, %3.1 ; @25 st.w %rx.1, %2.1 ; @27 - %14.1 = ld.l %res ; @43 + %14.1 = ld.l %r ; @43 %16.1 = ld.l %2.1 ; @46 jmp @29(%16.1, %14.1) @29(%30.1, %29.1): diff --git a/src/test/data/opt/sumphi.vir.opt.sysv b/src/test/data/opt/sumphi.vir.opt.sysv index 815a23ba..914fd25f 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv +++ b/src/test/data/opt/sumphi.vir.opt.sysv @@ -12,9 +12,7 @@ function $sum(l %s/rdi) { } export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) { - %ra = slot 8, align 8 @8: - st.l %a, %ra ; @28 %2.1 = slt.w %x, 0x0_w ; @21 br %2.1, @9, @10 @9: @@ -22,7 +20,6 @@ export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) { @10: jmp @14(%b) @14(%6.3): - st.w 0x5_w, %ra ; @17 %s.1/w/rax = call $sum(%6.3/rdi) ; @18 ret rax/%s.1 } diff --git a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc index 1b8ae6d6..058c6ada 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc @@ -14,16 +14,14 @@ export function $sumphi { ; returns: rax @8: push rbp ; @57 mov rbp, rsp ; @58 - sub rsp, 0x10_l ; @59 - mov qword ptr [rbp - 0x10], rdi ; @54 + mov rax, rdi ; @21 + mov rdi, rsi ; @50 test edx, edx ; @47 jns @14 ; @48 @9: - mov rsi, rdi ; @43 + mov rdi, rax ; @43 @14: - mov dword ptr [rbp - 0x10], 0x5_w ; @17 - mov rdi, rsi ; @18 call $sum ; rdi ; @41 - leave ; @60 + leave ; @59 ret ; @40 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv b/src/test/data/opt/vaarg1.vir.opt.sysv index 00c3a0e6..ffdc7852 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv +++ b/src/test/data/opt/vaarg1.vir.opt.sysv @@ -5,82 +5,86 @@ export function $foo(b %11/rax, l %i/rdi, ...) { %r = slot 16, align 8 %5 = slot 176, align 16 @19: - %6.1 = add.l %5, 0x8_l ; @20 - regstore rsi, %6.1 ; @21 - %7.1 = add.l %5, 0x10_l ; @22 - regstore rdx, %7.1 ; @23 - %8.1 = add.l %5, 0x18_l ; @24 - regstore rcx, %8.1 ; @25 - %9.1 = add.l %5, 0x20_l ; @26 - regstore r8, %9.1 ; @27 - %10.1 = add.l %5, 0x28_l ; @28 - regstore r9, %10.1 ; @29 - %12.1 = eq.b %11, 0x0_b ; @30 - br %12.1, @2, @18 + %6 = add.l %5, 0x8_l ; @20 + regstore rsi, %6 ; @21 + %7 = add.l %5, 0x10_l ; @22 + regstore rdx, %7 ; @23 + %8 = add.l %5, 0x18_l ; @24 + regstore rcx, %8 ; @25 + %9 = add.l %5, 0x20_l ; @26 + regstore r8, %9 ; @27 + %10 = add.l %5, 0x28_l ; @28 + regstore r9, %10 ; @29 + %12 = eq.b %11, 0x0_b ; @30 + br %12, @2, @18 @18: - %13.1 = add.l %5, 0x30_l ; @31 - regstore xmm0, %13.1 ; @32 - %14.1 = add.l %5, 0x40_l ; @33 - regstore xmm1, %14.1 ; @34 - %15.1 = add.l %5, 0x50_l ; @35 - regstore xmm2, %15.1 ; @36 - %16.1 = add.l %5, 0x60_l ; @37 - regstore xmm3, %16.1 ; @38 - %17.1 = add.l %5, 0x70_l ; @39 - regstore xmm4, %17.1 ; @40 - %18.1 = add.l %5, 0x80_l ; @41 - regstore xmm5, %18.1 ; @42 - %19.1 = add.l %5, 0x90_l ; @43 - regstore xmm6, %19.1 ; @44 - %20.1 = add.l %5, 0xa0_l ; @45 - regstore xmm7, %20.1 ; @46 + %13 = add.l %5, 0x30_l ; @31 + regstore xmm0, %13 ; @32 + %14 = add.l %5, 0x40_l ; @33 + regstore xmm1, %14 ; @34 + %15 = add.l %5, 0x50_l ; @35 + regstore xmm2, %15 ; @36 + %16 = add.l %5, 0x60_l ; @37 + regstore xmm3, %16 ; @38 + %17 = add.l %5, 0x70_l ; @39 + regstore xmm4, %17 ; @40 + %18 = add.l %5, 0x80_l ; @41 + regstore xmm5, %18 ; @42 + %19 = add.l %5, 0x90_l ; @43 + regstore xmm6, %19 ; @44 + %20 = add.l %5, 0xa0_l ; @45 + regstore xmm7, %20 ; @46 jmp @2 @2: st.w 0x8_w, %ap ; @53 - %26.1 = add.l %ap, 0x4_l ; @54 - st.w 0x30_w, %26.1 ; @55 - %27.1 = stkargs ; @56 - %28.1 = add.l %ap, 0x8_l ; @57 - st.l %27.1, %28.1 ; @58 - %29.1 = add.l %ap, 0x10_l ; @59 - st.l %5, %29.1 ; @60 + %26 = add.l %ap, 0x4_l ; @54 + st.w 0x30_w, %26 ; @55 + %27 = stkargs ; @56 + %28 = add.l %ap, 0x8_l ; @57 + st.l %27, %28 ; @58 + %29 = add.l %ap, 0x10_l ; @59 + st.l %5, %29 ; @60 jmp @62 @62: - %31.1 = ld.w %26.1 ; @68 - %32.1 = le.w %31.1, 0xa0_w ; @69 - br %32.1, @63, @65 + %31 = ld.w %26 ; @68 + %32 = le.w %31, 0xa0_w ; @69 + br %32, @63, @65 @63: - %33.1 = ld.w %ap ; @70 - %34.1 = le.w %33.1, 0x28_w ; @71 - br %34.1, @64, @65 + %33 = ld.w %ap ; @70 + %34 = le.w %33, 0x28_w ; @71 + br %34, @64, @65 @64: - %37.1 = zext.l %31.1 ; @74 - %38.1 = add.l %5, %37.1 ; @75 - %39.1 = zext.l %33.1 ; @76 - %40.1 = add.l %5, %39.1 ; @77 - %41.1 = add.w %31.1, 0x10_l ; @78 - %42.1 = add.w %33.1, 0x8_l ; @79 - st.w %41.1, %26.1 ; @80 - st.w %42.1, %ap ; @81 - jmp @66(%38.1, %40.1) + %37 = zext.l %31 ; @74 + %38 = add.l %5, %37 ; @75 + %39 = zext.l %33 ; @76 + %40 = add.l %5, %39 ; @77 + %41 = add.w %31, 0x10_l ; @78 + %42 = add.w %33, 0x8_l ; @79 + st.w %41, %26 ; @80 + st.w %42, %ap ; @81 + jmp @66(%38, %40) @65: - %44.1 = ld.l %28.1 ; @83 - %45.1 = add.l %44.1, 0x8_l ; @84 - %46.1 = add.l %44.1, 0x10_l ; @85 - st.l %46.1, %28.1 ; @86 - jmp @66(%44.1, %45.1) -@66(%47.1, %48.1): - %49.1 = ld.l %47.1 ; @87 - %50.1 = ld.l %48.1 ; @89 + %44 = ld.l %28 ; @83 + %45 = add.l %44, 0x8_l ; @84 + %46 = add.l %44, 0x10_l ; @85 + st.l %46, %28 ; @86 + jmp @66(%44, %45) +@66(%47, %48): + %49 = ld.l %47 ; @87 + st.l %49, %ap ; @88 + %50 = ld.l %48 ; @89 + st.l %50, %28 ; @91 jmp @61 @61: - st.l %49.1, %r ; @48 - %25.1 = add.l %r, 0x8_l ; @51 - st.l %50.1, %25.1 ; @52 + %22 = ld.l %ap ; @47 + st.l %22, %r ; @48 + %24 = ld.l %28 ; @50 + %25 = add.l %r, 0x8_l ; @51 + st.l %24, %25 ; @52 %f1.1 = ld.d %r ; @6 - %0.1 = add.d %f1.1, 1.234_d ; @13 - %2.1 = ftosi.d.l %0.1 ; @15 - %3.1 = add.l %50.1, %2.1 ; @16 - %4.1 = add.l %3.1, %i ; @17 - ret rax/%4.1 + %0 = add.d %f1.1, 1.234_d ; @13 + %2 = ftosi.d.l %0 ; @15 + %3 = add.l %24, %2 ; @16 + %4 = add.l %3, %i ; @17 + ret rax/%4 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 index 0f74a027..6c5eb8c5 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 @@ -28,54 +28,58 @@ export function $foo { ; returns: rax @2: mov dword ptr [%ap], 0x8_w ; @53 mov dword ptr [%ap + 0x4], 0x30_w ; @55 - lea %27.1:l, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [%ap + 0x8], %27.1:l ; @58 + lea %27:l, qword ptr [rbp + 0x10] ; @56 + mov qword ptr [%ap + 0x8], %27:l ; @58 mov qword ptr [%ap + 0x10], %5:l ; @60 jmp @62 ; @114 @62: - mov %31.1:w, dword ptr [%ap + 0x4] ; @68 - cmp %31.1:w, 0xa0_w ; @109 + mov %31:w, dword ptr [%ap + 0x4] ; @68 + cmp %31:w, 0xa0_w ; @109 jbe @63 ; @110 jmp @65 ; @111 @63: - mov %33.1:w, dword ptr [%ap] ; @70 - cmp %33.1:w, 0x28_w ; @104 + mov %33:w, dword ptr [%ap] ; @70 + cmp %33:w, 0x28_w ; @104 jbe @64 ; @105 jmp @65 ; @106 @65: - mov %44.1:l, qword ptr [%ap + 0x8] ; @83 - lea %45.1:l, qword ptr [%44.1 + 0x8] ; @84 - lea %46.1:l, qword ptr [%44.1 + 0x10] ; @85 - mov qword ptr [%ap + 0x8], %46.1:l ; @86 - mov %47.1:l, %44.1:l ; @101 - mov %48.1:l, %45.1:l ; @102 + mov %44:l, qword ptr [%ap + 0x8] ; @83 + lea %45:l, qword ptr [%44 + 0x8] ; @84 + lea %46:l, qword ptr [%44 + 0x10] ; @85 + mov qword ptr [%ap + 0x8], %46:l ; @86 + mov %47:l, %44:l ; @101 + mov %48:l, %45:l ; @102 jmp @66 ; @103 @64: - mov %37.1:w, %31.1:w ; @74 - lea %38.1:l, qword ptr [%5 + %37.1*1] ; @75 - mov %39.1:w, %33.1:w ; @76 - lea %40.1:l, qword ptr [%5 + %39.1*1] ; @77 - lea %41.1:w, qword ptr [%31.1 + 0x10] ; @78 - lea %42.1:w, qword ptr [%33.1 + 0x8] ; @79 - mov dword ptr [%ap + 0x4], %41.1:w ; @80 - mov dword ptr [%ap], %42.1:w ; @81 - mov %47.1:l, %38.1:l ; @98 - mov %48.1:l, %40.1:l ; @99 + mov %37:w, %31:w ; @74 + lea %38:l, qword ptr [%5 + %37*1] ; @75 + mov %39:w, %33:w ; @76 + lea %40:l, qword ptr [%5 + %39*1] ; @77 + lea %41:w, qword ptr [%31 + 0x10] ; @78 + lea %42:w, qword ptr [%33 + 0x8] ; @79 + mov dword ptr [%ap + 0x4], %41:w ; @80 + mov dword ptr [%ap], %42:w ; @81 + mov %47:l, %38:l ; @98 + mov %48:l, %40:l ; @99 jmp @66 ; @100 @66: - mov %49.1:l, qword ptr [%47.1] ; @87 - mov %50.1:l, qword ptr [%48.1] ; @89 + mov %49:l, qword ptr [%47] ; @87 + mov qword ptr [%ap], %49:l ; @88 + mov %50:l, qword ptr [%48] ; @89 + mov qword ptr [%ap + 0x8], %50:l ; @91 jmp @61 ; @97 @61: - mov qword ptr [%r], %49.1:l ; @48 - mov qword ptr [%r + 0x8], %50.1:l ; @52 + mov %22:l, qword ptr [%ap] ; @47 + mov qword ptr [%r], %22:l ; @48 + mov %24:l, qword ptr [%ap + 0x8] ; @50 + mov qword ptr [%r + 0x8], %24:l ; @52 movsd %f1.1:d, qword ptr [%r] ; @6 - movsd %0.1:d, %f1.1:d ; @13 - addsd %0.1:d, qword ptr [rip + @94] ; @95 + movsd %0:d, %f1.1:d ; @13 + addsd %0:d, qword ptr [rip + @94] ; @95 .fp64 @94, 1.234 ; @96 - cvtsd2si %2.1:l, %0.1:d ; @15 - lea %3.1:l, qword ptr [%50.1 + %2.1*1] ; @16 - lea %4.1:l, qword ptr [%3.1 + %i*1] ; @17 - mov rax, %4.1:l ; @92 + cvtsd2si %2:l, %0:d ; @15 + lea %3:l, qword ptr [%24 + %2*1] ; @16 + lea %4:l, qword ptr [%3 + %i*1] ; @17 + mov rax, %4:l ; @92 ret ; @93 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index df70e6e4..a51e6a21 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -51,15 +51,19 @@ export function $foo { ; returns: rax mov dword ptr [rbp - 0x30], edx ; @81 @66: mov rcx, qword ptr [rcx] ; @87 - mov rdx, qword ptr [rax] ; @89 + mov qword ptr [rbp - 0x30], rcx ; @88 + mov rax, qword ptr [rax] ; @89 + mov qword ptr [rbp - 0x28], rax ; @91 @61: - mov qword ptr [rbp - 0x18], rcx ; @48 - mov qword ptr [rbp - 0x10], rdx ; @52 + mov rax, qword ptr [rbp - 0x30] ; @47 + mov qword ptr [rbp - 0x18], rax ; @48 + mov rcx, qword ptr [rbp - 0x28] ; @50 + mov qword ptr [rbp - 0x10], rcx ; @52 movsd xmm0, qword ptr [rbp - 0x18] ; @6 addsd xmm0, qword ptr [rip + @94] ; @95 .fp64 @94, 1.234 ; @96 cvtsd2si rax, xmm0 ; @15 - add rax, rdx ; @16 + add rax, rcx ; @16 add rax, rdi ; @17 leave ; @127 ret ; @93 diff --git a/src/test/data/opt/vaarg2.vir.opt.sysv b/src/test/data/opt/vaarg2.vir.opt.sysv index d860794a..3f5d6d05 100644 --- a/src/test/data/opt/vaarg2.vir.opt.sysv +++ b/src/test/data/opt/vaarg2.vir.opt.sysv @@ -61,77 +61,61 @@ export function $foo(b %9/rax, l %i/rdi, ...) { jmp @64(%36.1) @64(%38.1): %39.1 = ld.l %38.1 ; @77 + st.l %39.1, %ap ; @78 %40.1 = add.l %38.1, 0x8_l ; @79 %41.1 = ld.l %40.1 ; @80 + st.l %41.1, %26.1 ; @82 jmp @60 @60: - %1.1 = add.l %39.1, %41.1 ; @15 + %20.1 = ld.l %ap ; @46 + %1.1 = add.l %20.1, %41.1 ; @15 %2.1 = add.l %1.1, %i ; @16 ret rax/%2.1 } export function $bar(b %49/rax, ...) { - %ap = slot 24, align 8 %43 = slot 176, align 16 @84: regstore rdi, %43 ; @85 - %44 = add.l %43, 0x8_l ; @86 - regstore rsi, %44 ; @87 - %45 = add.l %43, 0x10_l ; @88 - regstore rdx, %45 ; @89 - %46 = add.l %43, 0x18_l ; @90 - regstore rcx, %46 ; @91 - %47 = add.l %43, 0x20_l ; @92 - regstore r8, %47 ; @93 - %48 = add.l %43, 0x28_l ; @94 - regstore r9, %48 ; @95 - %50 = eq.b %49, 0x0_b ; @96 - br %50, @11, @83 + %44.1 = add.l %43, 0x8_l ; @86 + regstore rsi, %44.1 ; @87 + %45.1 = add.l %43, 0x10_l ; @88 + regstore rdx, %45.1 ; @89 + %46.1 = add.l %43, 0x18_l ; @90 + regstore rcx, %46.1 ; @91 + %47.1 = add.l %43, 0x20_l ; @92 + regstore r8, %47.1 ; @93 + %48.1 = add.l %43, 0x28_l ; @94 + regstore r9, %48.1 ; @95 + %50.1 = eq.b %49, 0x0_b ; @96 + br %50.1, @11, @83 @83: - %51 = add.l %43, 0x30_l ; @97 - regstore xmm0, %51 ; @98 - %52 = add.l %43, 0x40_l ; @99 - regstore xmm1, %52 ; @100 - %53 = add.l %43, 0x50_l ; @101 - regstore xmm2, %53 ; @102 - %54 = add.l %43, 0x60_l ; @103 - regstore xmm3, %54 ; @104 - %55 = add.l %43, 0x70_l ; @105 - regstore xmm4, %55 ; @106 - %56 = add.l %43, 0x80_l ; @107 - regstore xmm5, %56 ; @108 - %57 = add.l %43, 0x90_l ; @109 - regstore xmm6, %57 ; @110 - %58 = add.l %43, 0xa0_l ; @111 - regstore xmm7, %58 ; @112 + %51.1 = add.l %43, 0x30_l ; @97 + regstore xmm0, %51.1 ; @98 + %52.1 = add.l %43, 0x40_l ; @99 + regstore xmm1, %52.1 ; @100 + %53.1 = add.l %43, 0x50_l ; @101 + regstore xmm2, %53.1 ; @102 + %54.1 = add.l %43, 0x60_l ; @103 + regstore xmm3, %54.1 ; @104 + %55.1 = add.l %43, 0x70_l ; @105 + regstore xmm4, %55.1 ; @106 + %56.1 = add.l %43, 0x80_l ; @107 + regstore xmm5, %56.1 ; @108 + %57.1 = add.l %43, 0x90_l ; @109 + regstore xmm6, %57.1 ; @110 + %58.1 = add.l %43, 0xa0_l ; @111 + regstore xmm7, %58.1 ; @112 jmp @11 @11: - st.w 0x0_w, %ap ; @113 - %59 = add.l %ap, 0x4_l ; @114 - st.w 0x30_w, %59 ; @115 - %60 = stkargs ; @116 - %61 = add.l %ap, 0x8_l ; @117 - st.l %60, %61 ; @118 - %62 = add.l %ap, 0x10_l ; @119 - st.l %43, %62 ; @120 jmp @122 @122: - %63 = ld.w %ap ; @126 - %64 = le.w %63, 0x28_w ; @127 - br %64, @123, @124 + jmp @123 @123: - %67 = zext.l %63 ; @130 - %68 = add.l %43, %67 ; @131 - %69 = add.w %63, 0x8_w ; @132 - st.w %69, %ap ; @133 - jmp @125(%68) -@124: - %71 = ld.l %61 ; @135 - %72 = add.l %71, 0x8_l ; @136 - st.l %72, %61 ; @137 - jmp @125(%71) -@125(%73): - %t.1 = ld.b %73 ; @138 + %68.1 = add.l %43, 0x0_l ; @131 + jmp @125 +@125: + %t.1 = ld.b %68.1 ; @138 jmp @121 @121: ret rax/%t.1 diff --git a/src/test/data/opt/vasum.vir.opt.sysv b/src/test/data/opt/vasum.vir.opt.sysv index 1183d6fd..3406b829 100644 --- a/src/test/data/opt/vasum.vir.opt.sysv +++ b/src/test/data/opt/vasum.vir.opt.sysv @@ -1,79 +1,66 @@ module vasum export function $sum(b %9/rax, w %n/rdi, ...) { - %ap = slot 24, align 8 %3 = slot 176, align 16 @19: - %4 = add.l %3, 0x8_l ; @20 - regstore rsi, %4 ; @21 - %5 = add.l %3, 0x10_l ; @22 - regstore rdx, %5 ; @23 - %6 = add.l %3, 0x18_l ; @24 - regstore rcx, %6 ; @25 - %7 = add.l %3, 0x20_l ; @26 - regstore r8, %7 ; @27 - %8 = add.l %3, 0x28_l ; @28 - regstore r9, %8 ; @29 - %10 = eq.b %9, 0x0_b ; @30 - br %10, @2, @18 + %4.1 = add.l %3, 0x8_l ; @20 + regstore rsi, %4.1 ; @21 + %5.1 = add.l %3, 0x10_l ; @22 + regstore rdx, %5.1 ; @23 + %6.1 = add.l %3, 0x18_l ; @24 + regstore rcx, %6.1 ; @25 + %7.1 = add.l %3, 0x20_l ; @26 + regstore r8, %7.1 ; @27 + %8.1 = add.l %3, 0x28_l ; @28 + regstore r9, %8.1 ; @29 + %10.1 = eq.b %9, 0x0_b ; @30 + br %10.1, @2, @18 @18: - %11 = add.l %3, 0x30_l ; @31 - regstore xmm0, %11 ; @32 - %12 = add.l %3, 0x40_l ; @33 - regstore xmm1, %12 ; @34 - %13 = add.l %3, 0x50_l ; @35 - regstore xmm2, %13 ; @36 - %14 = add.l %3, 0x60_l ; @37 - regstore xmm3, %14 ; @38 - %15 = add.l %3, 0x70_l ; @39 - regstore xmm4, %15 ; @40 - %16 = add.l %3, 0x80_l ; @41 - regstore xmm5, %16 ; @42 - %17 = add.l %3, 0x90_l ; @43 - regstore xmm6, %17 ; @44 - %18 = add.l %3, 0xa0_l ; @45 - regstore xmm7, %18 ; @46 + %11.1 = add.l %3, 0x30_l ; @31 + regstore xmm0, %11.1 ; @32 + %12.1 = add.l %3, 0x40_l ; @33 + regstore xmm1, %12.1 ; @34 + %13.1 = add.l %3, 0x50_l ; @35 + regstore xmm2, %13.1 ; @36 + %14.1 = add.l %3, 0x60_l ; @37 + regstore xmm3, %14.1 ; @38 + %15.1 = add.l %3, 0x70_l ; @39 + regstore xmm4, %15.1 ; @40 + %16.1 = add.l %3, 0x80_l ; @41 + regstore xmm5, %16.1 ; @42 + %17.1 = add.l %3, 0x90_l ; @43 + regstore xmm6, %17.1 ; @44 + %18.1 = add.l %3, 0xa0_l ; @45 + regstore xmm7, %18.1 ; @46 jmp @2 @2: - st.w 0x8_w, %ap ; @47 - %19 = add.l %ap, 0x4_l ; @48 - st.w 0x30_w, %19 ; @49 - %20 = stkargs ; @50 - %21 = add.l %ap, 0x8_l ; @51 - st.l %20, %21 ; @52 - %22 = add.l %ap, 0x10_l ; @53 - st.l %3, %22 ; @54 - jmp @3(0x0_w, 0x0_w) -@3(%x.2, %i.2): - %0 = lt.w %i.2, %n ; @15 - br %0, @7, @8 + %20.1 = stkargs ; @50 + jmp @3(%20.1, 0x8_w, 0x0_w, 0x0_w) +@3(%36.2, %34.2, %x.1, %i.1): + %0.1 = lt.w %i.1, %n ; @15 + br %0.1, @7, @8 @7: jmp @56 @56: - %23 = ld.w %ap ; @60 - %24 = le.w %23, 0x28_w ; @61 - br %24, @57, @58 + %24.1 = le.w %34.2, 0x28_w ; @61 + br %24.1, @57, @58 @57: - %26 = ld.l %22 ; @63 - %27 = zext.l %23 ; @64 - %28 = add.l %26, %27 ; @65 - %29 = add.w %23, 0x8_w ; @66 - st.w %29, %ap ; @67 - jmp @59(%28) + %27.1 = zext.l %34.2 ; @64 + %28.1 = add.l %3, %27.1 ; @65 + %29.1 = add.w %34.2, 0x8_w ; @66 + jmp @59(%36.2, %29.1, %28.1) @58: - %31 = ld.l %21 ; @69 - %32 = add.l %31, 0x8_l ; @70 - st.l %32, %21 ; @71 - jmp @59(%31) -@59(%33): - %y.1 = ld.w %33 ; @72 + %32.1 = add.l %36.2, 0x8_l ; @70 + jmp @59(%32.1, %34.2, %36.2) +@59(%36.3, %34.3, %33.1): + %y.1 = ld.w %33.1 ; @72 jmp @55 @55: - %1 = add.w %x.2, %y.1 ; @16 - %2 = add.w %i.2, 0x1_w ; @17 - jmp @3(%1, %2) + %1.1 = add.w %x.1, %y.1 ; @16 + %2.1 = add.w %i.1, 0x1_w ; @17 + jmp @3(%36.3, %34.3, %1.1, %2.1) @8: - ret rax/%x.2 + ret rax/%x.1 } export function $twenty_eight() { diff --git a/src/test/data/opt/vasum.vir.opt.sysv.amd64 b/src/test/data/opt/vasum.vir.opt.sysv.amd64 index 887fe9c4..cd1095a8 100644 --- a/src/test/data/opt/vasum.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vasum.vir.opt.sysv.amd64 @@ -1,19 +1,18 @@ module vasum export function $sum { ; returns: rax - %ap = slot 24, align 8 %3 = slot 176, align 16 @19: mov %9:b, al ; @20 - mov %n:w, edi ; @103 + mov %n:w, edi ; @111 mov qword ptr [%3 + 0x8], rsi ; @21 mov qword ptr [%3 + 0x10], rdx ; @23 mov qword ptr [%3 + 0x18], rcx ; @25 mov qword ptr [%3 + 0x20], r8 ; @27 mov qword ptr [%3 + 0x28], r9 ; @29 - test %9:b, %9:b ; @98 - je @2 ; @99 - jmp @18 ; @100 + test %9:b, %9:b ; @106 + je @2 ; @107 + jmp @18 ; @108 @18: movdqa xmmword ptr [%3 + 0x30], xmm0 ; @32 movdqa xmmword ptr [%3 + 0x40], xmm1 ; @34 @@ -23,70 +22,69 @@ export function $sum { ; returns: rax movdqa xmmword ptr [%3 + 0x80], xmm5 ; @42 movdqa xmmword ptr [%3 + 0x90], xmm6 ; @44 movdqa xmmword ptr [%3 + 0xa0], xmm7 ; @46 - jmp @2 ; @97 + jmp @2 ; @105 @2: - mov dword ptr [%ap], 0x8_w ; @47 - mov dword ptr [%ap + 0x4], 0x30_w ; @49 - lea %20:l, qword ptr [rbp + 0x10] ; @50 - mov qword ptr [%ap + 0x8], %20:l ; @52 - mov qword ptr [%ap + 0x10], %3:l ; @54 - xor %x.2:w, %x.2:w ; @94 - xor %i.2:w, %i.2:w ; @95 - jmp @3 ; @96 + lea %20.1:l, qword ptr [rbp + 0x10] ; @50 + mov %36.2:l, %20.1:l ; @100 + mov %34.2:w, 0x8_w ; @101 + xor %x.1:w, %x.1:w ; @102 + xor %i.1:w, %i.1:w ; @103 + jmp @3 ; @104 @3: - cmp %i.2:w, %n:w ; @89 - jb @7 ; @90 - jmp @8 ; @91 + cmp %i.1:w, %n:w ; @95 + jb @7 ; @96 + jmp @8 ; @97 @8: - mov eax, %x.2:w ; @87 - ret ; @88 + mov eax, %x.1:w ; @93 + ret ; @94 @7: - jmp @56 ; @86 + jmp @56 ; @92 @56: - mov %23:w, dword ptr [%ap] ; @60 - cmp %23:w, 0x28_w ; @81 - jbe @57 ; @82 - jmp @58 ; @83 + cmp %34.2:w, 0x28_w ; @87 + jbe @57 ; @88 + jmp @58 ; @89 @58: - mov %31:l, qword ptr [%ap + 0x8] ; @69 - lea %32:l, qword ptr [%31 + 0x8] ; @70 - mov qword ptr [%ap + 0x8], %32:l ; @71 - mov %33:l, %31:l ; @79 - jmp @59 ; @80 + lea %32.1:l, qword ptr [%36.2 + 0x8] ; @70 + mov %36.3:l, %32.1:l ; @83 + mov %34.3:w, %34.2:w ; @84 + mov %33.1:l, %36.2:l ; @85 + jmp @59 ; @86 @57: - mov %26:l, qword ptr [%ap + 0x10] ; @63 - mov %27:w, %23:w ; @64 - lea %28:l, qword ptr [%26 + %27*1] ; @65 - lea %29:w, qword ptr [%23 + 0x8] ; @66 - mov dword ptr [%ap], %29:w ; @67 - mov %33:l, %28:l ; @77 - jmp @59 ; @78 + mov %27.1:w, %34.2:w ; @64 + lea %28.1:l, qword ptr [%3 + %27.1*1] ; @65 + lea %29.1:w, qword ptr [%34.2 + 0x8] ; @66 + mov %36.3:l, %36.2:l ; @79 + mov %34.3:w, %29.1:w ; @80 + mov %33.1:l, %28.1:l ; @81 + jmp @59 ; @82 @59: - mov %y.1:w, dword ptr [%33] ; @72 - jmp @55 ; @76 + mov %y.1:w, dword ptr [%33.1] ; @72 + jmp @55 ; @78 @55: - lea %1:w, qword ptr [%x.2 + %y.1*1] ; @16 - lea %2:w, qword ptr [%i.2 + 0x1] ; @17 - mov %x.2:w, %1:w ; @73 - mov %i.2:w, %2:w ; @74 - jmp @3 ; @75 + lea %1.1:w, qword ptr [%x.1 + %y.1*1] ; @16 + lea %2.1:w, qword ptr [%i.1 + 0x1] ; @17 + mov %36.2:l, %36.3:l ; @73 + mov %34.2:w, %34.3:w ; @74 + mov %x.1:w, %1.1:w ; @75 + mov %i.1:w, %2.1:w ; @76 + jmp @3 ; @77 } export function $twenty_eight { ; returns: rax @13: mov edi, 0x7_w ; @14 - mov esi, 0x1_w ; @107 - mov edx, 0x2_w ; @108 - mov ecx, 0x3_w ; @109 - mov r8d, 0x4_w ; @110 - mov r9d, 0x5_w ; @111 - sub rsp, 0x10_l ; @112 - mov dword ptr [rsp], 0x6_w ; @113 - mov dword ptr [rsp + 0x8], 0x7_w ; @114 - xor al, al ; @115 - call $sum ; r8 r9 rax rcx rdi rdx rsi ; @116 - add rsp, 0x10_l ; @117 - mov %x.1:w, eax ; @118 - mov eax, %x.1:w ; @105 - ret ; @106 + mov esi, 0x1_w ; @115 + mov edx, 0x2_w ; @116 + mov ecx, 0x3_w ; @117 + mov r8d, 0x4_w ; @118 + mov r9d, 0x5_w ; @119 + sub rsp, 0x10_l ; @120 + mov dword ptr [rsp], 0x6_w ; @121 + mov dword ptr [rsp + 0x8], 0x7_w ; @122 + xor al, al ; @123 + call $sum ; r8 r9 rax rcx rdi rdx rsi ; @124 + add rsp, 0x10_l ; @125 + mov %x.1:w, eax ; @126 + mov eax, %x.1:w ; @113 + ret ; @114 } From 90115b2d69c039a8c6967a6d08d8e0190e255bb6 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 23:08:00 -0500 Subject: [PATCH 21/62] Actually propagate block args Not sure why this wasn't working before --- src/lib/passes/sroa/sroa.ml | 4 ++-- src/lib/passes/sroa_coalesce_common.ml | 1 + src/lib/scalars.ml | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index a045b574..bca26845 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -27,7 +27,7 @@ module V = Make(struct module Ctrl = struct type t = ctrl let free_vars = Ctrl.free_vars - let escapes = free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) let locals = (locals Ctrl.Table.enum :> t -> _) end module Blk = Blk @@ -59,7 +59,7 @@ module A = Make(struct module Ctrl = struct type t = ctrl let free_vars = Ctrl.free_vars - let escapes = free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) let locals = (locals Ctrl.Table.enum :> t -> _) end module Blk = Blk diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml index 057f0bb0..584a8e3f 100644 --- a/src/lib/passes/sroa_coalesce_common.ml +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -94,3 +94,4 @@ let escapes_ctrl fv = function | `ret _ as c -> fv c | `sw (_, `var i, _, _) -> Var.Set.singleton i | `sw _ -> Var.Set.empty +[@@specialise] diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index e8dca7dc..a7ba9335 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -200,8 +200,10 @@ module Make(M : L) = struct List.fold ~init ~f:(fun acc (src, dst) -> if Var.(src = dst) then acc else match Map.find acc src with - | Some v -> Map.set acc ~key:dst ~data:v - | None -> acc) + | None -> acc + | Some v -> Map.update acc dst ~f:(function + | Some v' -> Value.merge v v' + | None -> v)) (* Transfer function for a block. *) let transfer slots blks l s = From 0e93ef035b1c519d87d3bba85f7e9cc6e26969a7 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 23:25:11 -0500 Subject: [PATCH 22/62] Another native test --- src/test/data/opt/unref.driver.sysv.amd64.c | 41 +++++++++++++++++++++ src/test/test_opt.ml | 1 + 2 files changed, 42 insertions(+) create mode 100644 src/test/data/opt/unref.driver.sysv.amd64.c diff --git a/src/test/data/opt/unref.driver.sysv.amd64.c b/src/test/data/opt/unref.driver.sysv.amd64.c new file mode 100644 index 00000000..60aec190 --- /dev/null +++ b/src/test/data/opt/unref.driver.sysv.amd64.c @@ -0,0 +1,41 @@ +#include + +struct t { + int a; + int b; +}; + +extern int sump(struct t *); +extern struct t mkt(int, int); +extern int sumt(int, int); + +int main() { + struct t t1 = mkt(1, 2); + struct t t2 = mkt(3, 4); + struct t t3 = mkt(5, 6); + struct t t4 = mkt(7, 8); + struct t t5 = mkt(9, 10); + + assert(t1.a == 1); + assert(t1.b == 2); + assert(t2.a == 3); + assert(t2.b == 4); + assert(t3.a == 5); + assert(t3.b == 6); + assert(t4.a == 7); + assert(t4.b == 8); + assert(t5.a == 9); + assert(t5.b == 10); + + assert(sump(&t1) == 3); + assert(sump(&t2) == 7); + assert(sump(&t3) == 11); + assert(sump(&t4) == 15); + assert(sump(&t5) == 19); + + assert(sumt(1, 2) == 3); + assert(sumt(3, 4) == 7); + assert(sumt(5, 6) == 11); + assert(sumt(7, 8) == 15); + assert(sumt(9, 10) == 19); +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index 975d134d..e453149e 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -441,6 +441,7 @@ let native_suite = "Test native code" >::: [ "Unsigned remainder by 7 (SysV AMD64)" >:: test_sysv_amd64_native "uremby7"; "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_native "promote2-partial"; "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi"; + "Returning, passing, and dereferencing a struct (SysV AMD64)" >:: test_sysv_amd64_native "unref"; ] let () = run_test_tt_main @@ test_list [ From c36f4b019384225e3ca73be182461fc2b6ec071a Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 16 Nov 2025 23:35:37 -0500 Subject: [PATCH 23/62] Input modules for SROA and coalesce are the same --- .../passes/coalesce_slots/coalesce_slots.ml | 121 +++++------------- src/lib/passes/sroa/sroa.ml | 64 +-------- src/lib/passes/sroa_coalesce_common.ml | 63 +++++++++ 3 files changed, 96 insertions(+), 152 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml index 2ad5d08e..2f504300 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -8,68 +8,8 @@ open Sroa_coalesce_common module E = Monad.Result.Error -module V = Make(struct - module Insn = struct - type t = Insn.t - type op = Insn.op - let create ~label op = Insn.create op ~label - let with_op = Insn.with_op - let op = Insn.op - let special = function - | #Insn.variadic -> true - | _ -> false - let label = Insn.label - let lhs = var_set_of_option @. Insn.lhs_of_op - let offset = (offset :> op -> _) - let copy_of = (copy_of :> op -> _) - let free_vars = Insn.free_vars_of_op - let escapes = (escapes is_named free_vars :> op -> _) - let load_or_store_to = (load_or_store_to :> op -> _) - let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) - let add x ty b o = (add x ty b o :> op) - end - module Ctrl = struct - type t = ctrl - let free_vars = Ctrl.free_vars - let escapes = (escapes_ctrl free_vars :> t -> _) - let locals = (locals Ctrl.Table.enum :> t -> _) - end - module Blk = Blk - module Func = Func - module Cfg = Cfg - end) - -module A = Make(struct - open Abi - module Insn = struct - type t = Insn.t - type op = Insn.op - let create ~label op = Insn.create op ~label - let with_op = Insn.with_op - let op = Insn.op - let special = function - | #Insn.extra -> true - | _ -> false - let label = Insn.label - let lhs = Insn.def_of_op - let offset = (offset :> op -> _) - let copy_of = (copy_of :> op -> _) - let free_vars = Insn.free_vars_of_op - let escapes = (escapes (const false) free_vars :> op -> _) - let load_or_store_to = (load_or_store_to :> op -> _) - let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) - let add x ty b o = (add x ty b o :> op) - end - module Ctrl = struct - type t = ctrl - let free_vars = Ctrl.free_vars - let escapes = (escapes_ctrl free_vars :> t -> _) - let locals = (locals Ctrl.Table.enum :> t -> _) - end - module Blk = Blk - module Func = Func - module Cfg = Cfg - end) +module V = Make(VL) +module A = Make(AL) open E.Let @@ -77,38 +17,39 @@ let check_ssa msg n d = if Dict.mem d Tags.ssa then Ok () else E.failf "In Coalesce_slots%s: function $%s is not in SSA form" msg n () -let no_deads f d is = List.filter is ~f:(not @. Lset.mem d @. f) +let apply t fn map_blks map_blk insns with_insns with_ctrl label = + let not_dead = not @. Lset.mem t.deads @. label in + if is_empty t then fn else + let f = if Map.is_empty t.subst then fun b -> + insns b |> Seq.filter ~f:not_dead |> + Seq.to_list |> with_insns b + else fun b -> + let is, c = map_blk t.subst b in + let is = if Lset.is_empty t.deads then is + else List.filter is ~f:not_dead in + with_ctrl (with_insns b is) c in + map_blks fn ~f +[@@specialise] let run fn = let+ () = check_ssa "" (Func.name fn) (Func.dict fn) in let t = V.run fn in - if is_empty t then fn else - let f = - if Map.is_empty t.subst then - fun b -> - Blk.insns b |> Seq.to_list |> - no_deads Insn.label t.deads |> - Blk.with_insns b - else - fun b -> - let insns, ctrl = Subst_mapper.map_blk t.subst b in - let insns = no_deads Insn.label t.deads insns in - Blk.with_ctrl (Blk.with_insns b insns) ctrl in - Func.map_blks fn ~f + apply t fn + Func.map_blks + Subst_mapper.map_blk + Blk.insns + Blk.with_insns + Blk.with_ctrl + Insn.label let run_abi fn = - let+ () = check_ssa " (ABI)" (Abi.Func.name fn) (Abi.Func.dict fn) in + let open Abi in + let+ () = check_ssa " (ABI)" (Func.name fn) (Func.dict fn) in let t = A.run fn in - if is_empty t then fn else - let f = - if Map.is_empty t.subst then - fun b -> - Abi.Blk.insns b |> Seq.to_list |> - no_deads Abi.Insn.label t.deads |> - Abi.Blk.with_insns b - else - fun b -> - let insns, ctrl = Subst_mapper_abi.map_blk t.subst b in - let insns = no_deads Abi.Insn.label t.deads insns in - Abi.Blk.with_ctrl (Abi.Blk.with_insns b insns) ctrl in - Abi.Func.map_blks fn ~f + apply t fn + Func.map_blks + Subst_mapper_abi.map_blk + Blk.insns + Blk.with_insns + Blk.with_ctrl + Insn.label diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index bca26845..49b8f4b8 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -4,68 +4,8 @@ open Scalars open Sroa_impl open Sroa_coalesce_common -module V = Make(struct - module Insn = struct - type t = Insn.t - type op = Insn.op - let create ~label op = Insn.create op ~label - let with_op = Insn.with_op - let op = Insn.op - let special = function - | #Insn.variadic -> true - | _ -> false - let label = Insn.label - let lhs = var_set_of_option @. Insn.lhs_of_op - let offset = (offset :> op -> _) - let copy_of = (copy_of :> op -> _) - let free_vars = Insn.free_vars_of_op - let escapes = (escapes is_named free_vars :> op -> _) - let load_or_store_to = (load_or_store_to :> op -> _) - let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) - let add x ty b o = (add x ty b o :> op) - end - module Ctrl = struct - type t = ctrl - let free_vars = Ctrl.free_vars - let escapes = (escapes_ctrl free_vars :> t -> _) - let locals = (locals Ctrl.Table.enum :> t -> _) - end - module Blk = Blk - module Func = Func - module Cfg = Cfg - end) - -module A = Make(struct - open Abi - module Insn = struct - type t = Insn.t - type op = Insn.op - let create ~label op = Insn.create op ~label - let with_op = Insn.with_op - let op = Insn.op - let special = function - | #Insn.extra -> true - | _ -> false - let label = Insn.label - let lhs = Insn.def_of_op - let offset = (offset :> op -> _) - let copy_of = (copy_of :> op -> _) - let free_vars = Insn.free_vars_of_op - let escapes = (escapes (const false) free_vars :> op -> _) - let load_or_store_to = (load_or_store_to :> op -> _) - let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) - let add x ty b o = (add x ty b o :> op) - end - module Ctrl = struct - type t = ctrl - let free_vars = Ctrl.free_vars - let escapes = (escapes_ctrl free_vars :> t -> _) - let locals = (locals Ctrl.Table.enum :> t -> _) - end - module Blk = Blk - module Func = Func - module Cfg = Cfg - end) +module V = Make(VL) +module A = Make(AL) open Context.Syntax diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml index 584a8e3f..2c3971ed 100644 --- a/src/lib/passes/sroa_coalesce_common.ml +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -95,3 +95,66 @@ let escapes_ctrl fv = function | `sw (_, `var i, _, _) -> Var.Set.singleton i | `sw _ -> Var.Set.empty [@@specialise] + +module VL = struct + module Insn = struct + type t = Insn.t + type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op + let special = function + | #Insn.variadic -> true + | _ -> false + let label = Insn.label + let lhs = var_set_of_option @. Insn.lhs_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes is_named free_vars :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + module Ctrl = struct + type t = ctrl + let free_vars = Ctrl.free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg +end + +module AL = struct + open Abi + module Insn = struct + type t = Insn.t + type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op + let special = function + | #Insn.extra -> true + | _ -> false + let label = Insn.label + let lhs = Insn.def_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes (const false) free_vars :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + module Ctrl = struct + type t = ctrl + let free_vars = Ctrl.free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg +end From 01e6a36cb09d771afa2641071a39bc8cbbd892ab Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Mon, 17 Nov 2025 00:12:53 -0500 Subject: [PATCH 24/62] Some cleanup --- src/lib/egraph/egraph_input.ml | 18 +-- .../coalesce_slots/coalesce_slots_impl.ml | 57 ++++----- src/lib/passes/resolve_constant_blk_args.ml | 88 ++++++-------- src/lib/passes/sroa/sroa_impl.ml | 114 ++++++++++-------- src/lib/passes/sroa_coalesce_common.ml | 27 +---- src/lib/phi_values.ml | 17 +++ src/lib/scalars.ml | 55 +++++---- 7 files changed, 183 insertions(+), 193 deletions(-) diff --git a/src/lib/egraph/egraph_input.ml b/src/lib/egraph/egraph_input.ml index 7ccf2040..a6f378a0 100644 --- a/src/lib/egraph/egraph_input.ml +++ b/src/lib/egraph/egraph_input.ml @@ -15,24 +15,8 @@ module Operands = Set.Make(struct module Phis_lang = struct module Ctrl = struct type t = ctrl - - let table d ds tbl = - Ctrl.Table.enum tbl |> Seq.map ~f:snd |> - Seq.map ~f:(fun (`label (l, args)) -> l, args) |> - Seq.to_list |> List.cons (d, ds) - - let locals = function - | `hlt -> [] - | `jmp #global -> [] - | `jmp `label (l, args) -> [l, args] - | `br (_, #global, #global) -> [] - | `br (_, `label (y, ys), #global) -> [y, ys] - | `br (_, #global, `label (n, ns)) -> [n, ns] - | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns] - | `ret _ -> [] - | `sw (_, _, `label (d, ds), tbl) -> table d ds tbl + let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _) end - module Blk = Blk module Func = Func module Cfg = Cfg diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 6bb06791..3debfa3d 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -210,6 +210,35 @@ module Make(M : Scalars.L) = struct s := Analysis.transfer_op slots !s op; acc)) + let debug_show slots rs nums deads p subst = + Logs.debug (fun m -> + Map.iter_keys slots ~f:(fun x -> + let ppr ppf x = match Map.find rs x with + | None -> Format.fprintf ppf "none" + | Some r when Range.is_bad r -> + Format.fprintf ppf "escapes" + | Some r -> + Format.fprintf ppf "%a (%a to %a)" + Range.pp r + Label.pp (Vec.get_exn nums r.lo) + Label.pp (Vec.get_exn nums r.hi) in + m "%s: %a: %a%!" __FUNCTION__ Var.pp x ppr x)); + Logs.debug (fun m -> + Partition.groups p |> Seq.iter ~f:(fun g -> + m "%s: group: %a%!" __FUNCTION__ (Group.pp Var.pp) g)); + Logs.debug (fun m -> + if not @@ Lset.is_empty deads then + m "%s: dead stores: %a%!" + __FUNCTION__ + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") + Label.pp) + (Lset.to_list deads)); + Logs.debug (fun m -> + Map.iteri subst ~f:(fun ~key ~data -> + m "%s: coalesce slot: %a => %a%!" + __FUNCTION__ Var.pp key Virtual.pp_operand data)) + let run fn = let slots = Analysis.collect_slots fn in if Map.is_empty slots then empty else @@ -217,35 +246,9 @@ module Make(M : Scalars.L) = struct let blks = Func.map_of_blks fn in let s = Analysis.analyze ~cfg ~blks slots fn in let rs, nums = liveness cfg blks slots s in - Logs.debug (fun m -> - Map.iter_keys slots ~f:(fun x -> - let ppr ppf x = match Map.find rs x with - | None -> Format.fprintf ppf "dead" - | Some r when Range.is_bad r -> - Format.fprintf ppf "top" - | Some r -> - Format.fprintf ppf "%a (%a to %a)" - Range.pp r - Label.pp (Vec.get_exn nums r.lo) - Label.pp (Vec.get_exn nums r.hi) in - m "%s: %a: %a%!" __FUNCTION__ Var.pp x ppr x)); let p = non_interfering slots rs in - Logs.debug (fun m -> - Partition.groups p |> Seq.iter ~f:(fun g -> - m "%s: group: %a%!" __FUNCTION__ (Group.pp Var.pp) g)); let deads = collect_deads blks slots rs s in - Logs.debug (fun m -> - if not @@ Lset.is_empty deads then - m "%s: dead stores: %a%!" - __FUNCTION__ - (Format.pp_print_list - ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") - Label.pp) - (Lset.to_list deads)); let subst = make_subst slots p in - Logs.debug (fun m -> - Map.iteri subst ~f:(fun ~key ~data -> - m "%s: coalesce slot: %a => %a%!" - __FUNCTION__ Var.pp key Virtual.pp_operand data)); + debug_show slots rs nums deads p subst; {subst; deads} end diff --git a/src/lib/passes/resolve_constant_blk_args.ml b/src/lib/passes/resolve_constant_blk_args.ml index d4b426a8..7d5f99b5 100644 --- a/src/lib/passes/resolve_constant_blk_args.ml +++ b/src/lib/passes/resolve_constant_blk_args.ml @@ -1,22 +1,7 @@ open Core open Regular.Std open Virtual - -let table enum d ds tbl = - enum tbl |> Seq.map ~f:snd |> - Seq.map ~f:(fun (`label (l, args)) -> l, args) |> - Seq.to_list |> List.cons (d, ds) - -let locals enum = function - | `hlt -> [] - | `jmp #global -> [] - | `jmp `label (l, args) -> [l, args] - | `br (_, #global, #global) -> [] - | `br (_, `label (y, ys), #global) -> [y, ys] - | `br (_, #global, `label (n, ns)) -> [n, ns] - | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns] - | `ret _ -> [] - | `sw (_, _, `label (d, ds), tbl) -> table enum d ds tbl +open Phi_values module D = struct (* `None` indicates that there may be many values for the phi, @@ -32,7 +17,7 @@ module D = struct let join x y = if equal x y then x else None end -module Phis_v = Phi_values.Make(struct +module V = Make(struct module Ctrl = struct type t = ctrl let locals = (locals Ctrl.Table.enum :> t -> _) @@ -42,7 +27,7 @@ module Phis_v = Phi_values.Make(struct module Cfg = Cfg end)(D) -module Phis_a = Phi_values.Make(struct +module A = Make(struct open Abi module Ctrl = struct type t = ctrl @@ -53,39 +38,42 @@ module Phis_a = Phi_values.Make(struct module Cfg = Cfg end)(D) -let run fn = - if Dict.mem (Func.dict fn) Tags.ssa then - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in - let s = - Map.filter_map ~f:Fn.id @@ - Phis_v.analyze ~blk:(Label.Tree.find blks) cfg in - let fn = - if not @@ Map.is_empty s then Func.map_blks fn ~f:(fun b -> - let is, k = Subst_mapper.map_blk s b in - Blk.(with_ctrl (with_insns b is) k)) - else fn in - Ok fn - else +let check_ssa msg n d fn f = + if Dict.mem (d fn) Tags.ssa then Ok (f ()) else Or_error.errorf - "In Resolve_constant_blk_args: function $%s is \ - not in SSA form" (Func.name fn) + "In Resolve_constant_blk_args%s: function $%s is \ + not in SSA form" msg (n fn) + +let apply fn cfg mb analyze map_blks map_blk with_ctrl with_insns = + let cfg = cfg fn in + let blks = mb fn in + let s = Map.filter_map ~f:Fn.id @@ + analyze ~blk:(Label.Tree.find blks) cfg in + if not @@ Map.is_empty s then map_blks fn ~f:(fun b -> + let is, k = map_blk s b in + with_ctrl (with_insns b is) k) + else fn +[@@specialise] + +let run fn = + check_ssa "" Func.name Func.dict fn @@ fun () -> + apply fn + Cfg.create + Func.map_of_blks + V.analyze + Func.map_blks + Subst_mapper.map_blk + Blk.with_ctrl + Blk.with_insns let run_abi fn = let open Abi in - if Dict.mem (Func.dict fn) Tags.ssa then - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in - let s = - Map.filter_map ~f:Fn.id @@ - Phis_a.analyze ~blk:(Label.Tree.find blks) cfg in - let fn = - if not @@ Map.is_empty s then Func.map_blks fn ~f:(fun b -> - let is, k = Subst_mapper_abi.map_blk s b in - Blk.(with_ctrl (with_insns b is) k)) - else fn in - Ok fn - else - Or_error.errorf - "In Resolve_constant_blk_args (ABI): function $%s is \ - not in SSA form" (Func.name fn) + check_ssa " (ABI)" Func.name Func.dict fn @@ fun () -> + apply fn + Cfg.create + Func.map_of_blks + A.analyze + Func.map_blks + Subst_mapper_abi.map_blk + Blk.with_ctrl + Blk.with_insns diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index ff6c40d5..03f8f1a3 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -66,7 +66,7 @@ end = struct (* Filter out slots that are not splittable. *) Map.map ~f:(List.sort ~compare:cmp_access) |> Map.filteri ~f:(fun ~key ~data -> - let check x y= + let check x y = let sx = sizeof_access x in (* No partial overlaps. *) Int64.(x.off + of_int sx <= y.off) || @@ -111,44 +111,43 @@ end = struct ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_access) p.mems + let match_part x c = {c with mems = x :: c.mems} + let grow_part off size x c = {off; size; mems = x :: c.mems} + let new_part off size x = {off; size; mems = [x]} + (* Sort the memory accesses into self-contained, non-overlapping partitions, which are the fully-or-partially scalarized sub-objects of the aggregate. *) - let partition_acesses m : partitions = + let partition_acesses : accesses -> partitions = fun m -> let rec merge acc c = function | [] -> List.sort (c :: acc) ~compare:cmp_partition | x :: xs -> let sx = sizeof_access x in if Int64.(c.off = x.off) && c.size = sx then (* Access exactly matches the current partition. *) - merge acc {c with mems = x :: c.mems} xs + let p = match_part x c in + merge acc p xs else if overlaps c.off c.size x.off sx then (* Access overlaps with current partition, so the partition must increase in size. *) - let o' = Int64.min c.off x.off in - let e' = Int64.(max (c.off + of_int c.size) (x.off + of_int sx)) in - let s' = Int64.(to_int_exn (e' - o')) in - merge acc { - off = o'; - size = s'; - mems = x :: c.mems; - } xs + let open Int64 in + let o' = min c.off x.off in + let ec = c.off + of_int c.size in + let ex = x.off + of_int sx in + let e' = max ec ex in + let s' = to_int_exn (e' - o') in + let p = grow_part o' s' x c in + merge acc p xs else (* No overlap, so we start a new partition. *) - merge (c :: acc) { - off = x.off; - size = sx; - mems = [x]; - } xs in + let p = new_part x.off sx x in + merge (c :: acc) p xs in (* pre: each access list is sorted *) - Map.map m ~f:(function - | [] -> [] - | (x : access) :: xs -> - merge [] { - off = x.off; - size = sizeof_access x; - mems = [x]; - } xs) + Map.filter_map m ~f:(function + | [] -> None + | x :: xs -> + let p = new_part x.off (sizeof_access x) x in + Some (merge [] p xs)) (* Turn each partition into a concrete slot. *) let materialize_partitions slots parts : scalars Context.t = @@ -207,6 +206,24 @@ end = struct let op' = Insn.replace_load_or_store_addr y op in [Insn.create ~label:l a; Insn.with_op i op'] + let debug_show_insn i ptr ty ldst = + Logs.debug (fun m -> + m "%s: %a: looking at %a.%a to %a%!" + __FUNCTION__ + Label.pp (Insn.label i) + pp_load_or_store ldst + Type.pp_basic ty + Var.pp ptr) + + let debug_show_bad_val ptr v = + Logs.debug (fun m -> + m "%s: cannot rewrite: %a is %a" + __FUNCTION__ Var.pp ptr + (Format.pp_print_option + ~none:pp_bot + pp_value) + v) + (* Rewrite an instruction. *) let rewrite_insn parts (m : scalars) (s : state) i = let open Context.Syntax in @@ -214,15 +231,11 @@ end = struct match Insn.load_or_store_to op with | None -> !![i] | Some (ptr, ty, ldst) -> - Logs.debug (fun m -> - m "%s: %a: looking at %a.%a to %a%!" - __FUNCTION__ - Label.pp (Insn.label i) - pp_load_or_store ldst - Type.pp_basic ty - Var.pp ptr); + debug_show_insn i ptr ty ldst; match Map.find s ptr with - | Some Top | None -> !![i] + | (Some Top | None) as v -> + debug_show_bad_val ptr v; + !![i] | Some Offset (base, off) -> match find_partition parts base off @@ basic_size ty with | Some p -> rewrite_insn_exact m i ~exact:p.off ~base ~off @@ -245,20 +258,19 @@ end = struct ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data) let debug_show_parts parts = - if not @@ Map.is_empty parts then - Logs.debug (fun m -> - m "%s: partitions:\n%a%!" - __FUNCTION__ - (Format.pp_print_list - ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") - (fun ppf (key, data) -> - Format.fprintf ppf "%a:\n%a" - Var.pp key - (Format.pp_print_list - ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") - (fun ppf -> Format.fprintf ppf " %a" pp_partition)) - data)) - (Map.to_alist parts)) + Logs.debug (fun m -> + m "%s: partitions:\n%a%!" + __FUNCTION__ + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + (fun ppf (key, data) -> + Format.fprintf ppf "%a:\n%a" + Var.pp key + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + (fun ppf -> Format.fprintf ppf " %a" pp_partition)) + data)) + (Map.to_alist parts)) let run fn = let open Context.Syntax in @@ -267,8 +279,10 @@ end = struct let s = Analysis.analyze slots fn in let accs = collect_accesses slots fn s in let parts = partition_acesses accs in - debug_show_parts parts; - let* m = materialize_partitions slots parts in - let fn = insert_new_slots fn m in - rewrite_with_partitions slots fn s parts m + if Map.is_empty parts then !!fn else + let () = debug_show_parts parts in + let* m = materialize_partitions slots parts in + if Map.is_empty m then !!fn else + let fn = insert_new_slots fn m in + rewrite_with_partitions slots fn s parts m end diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml index 2c3971ed..5256ec94 100644 --- a/src/lib/passes/sroa_coalesce_common.ml +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -60,29 +60,6 @@ let is_named = function | `name _ -> true | _ -> false -let local l args = l, List.filter_map args ~f:var_of_operand - -let table enum d ds tbl = - enum tbl |> Seq.map ~f:snd |> - Seq.map ~f:(fun (`label (l, args)) -> local l args) |> - Seq.to_list |> List.cons (local d ds) - -let locals enum = function - | `hlt -> [] - | `jmp #global -> [] - | `jmp `label (l, args) -> - [local l args] - | `br (_, #global, #global) -> [] - | `br (_, `label (y, ys), #global) -> - [local y ys] - | `br (_, #global, `label (n, ns)) -> - [local n ns] - | `br (_, `label (y, ys), `label (n, ns)) -> - [local y ys; local n ns] - | `ret _ -> [] - | `sw (_, _, `label (d, ds), tbl) -> - table enum d ds tbl - let escapes_ctrl fv = function | `hlt -> Var.Set.empty | `jmp `var x -> Var.Set.singleton x @@ -120,7 +97,7 @@ module VL = struct type t = ctrl let free_vars = Ctrl.free_vars let escapes = (escapes_ctrl free_vars :> t -> _) - let locals = (locals Ctrl.Table.enum :> t -> _) + let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _) end module Blk = Blk module Func = Func @@ -152,7 +129,7 @@ module AL = struct type t = ctrl let free_vars = Ctrl.free_vars let escapes = (escapes_ctrl free_vars :> t -> _) - let locals = (locals Ctrl.Table.enum :> t -> _) + let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _) end module Blk = Blk module Func = Func diff --git a/src/lib/phi_values.ml b/src/lib/phi_values.ml index 3a5137b3..7ccea306 100644 --- a/src/lib/phi_values.ml +++ b/src/lib/phi_values.ml @@ -4,6 +4,23 @@ open Core open Regular.Std open Graphlib.Std +let table enum d ds tbl = + enum tbl |> Seq.map ~f:snd |> + Seq.map ~f:(fun (`label (l, args)) -> l, args) |> + Seq.to_list |> List.cons (d, ds) + +let locals enum = + let open Virtual in function + | `hlt -> [] + | `jmp #global -> [] + | `jmp `label (l, args) -> [l, args] + | `br (_, #global, #global) -> [] + | `br (_, `label (y, ys), #global) -> [y, ys] + | `br (_, #global, `label (n, ns)) -> [n, ns] + | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns] + | `ret _ -> [] + | `sw (_, _, `label (d, ds), tbl) -> table enum d ds tbl + module type Domain = sig type t [@@deriving equal] val one : Virtual.operand -> t diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index a7ba9335..6960063a 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -5,6 +5,8 @@ open Graphlib.Std let (@.) = Fn.compose let (@<) = Fn.flip +module Slot = Virtual.Slot + (* A scalar access. *) module Scalar = struct module T = struct @@ -16,7 +18,7 @@ module Scalar = struct end type scalar = Scalar.t [@@deriving compare, equal, hash, sexp] -type scalars = Virtual.slot Scalar.Map.t +type scalars = Slot.t Scalar.Map.t (* Lattice of scalar accesses. @@ -45,7 +47,7 @@ module Value = struct | _ -> Top end -type slots = Virtual.slot Var.Map.t +type slots = Slot.t Var.Map.t module State : sig type t = value Var.Map.t [@@deriving equal, sexp] @@ -63,9 +65,7 @@ end = struct let is_bad slots ptr offset = Int64.(offset < 0L) || match Map.find slots ptr with - | Some s -> - let size = Int64.of_int @@ Virtual.Slot.size s in - Int64.(offset >= size) + | Some s -> Int64.(offset >= of_int (Slot.size s)) | None -> false (* Normalize the scalar referred to by `ptr` and `offset`. *) @@ -133,7 +133,7 @@ module type L = sig type t val free_vars : t -> Var.Set.t val escapes : t -> Var.Set.t - val locals : t -> (Label.t * Var.t list) list + val locals : t -> (Label.t * Virtual.operand list) list end module Blk : sig @@ -147,11 +147,11 @@ module type L = sig module Func : sig type t - val slots : ?rev:bool -> t -> Virtual.slot seq + val slots : ?rev:bool -> t -> Slot.t seq val blks : ?rev:bool -> t -> Blk.t seq val map_of_blks : t -> Blk.t Label.Tree.t val with_blks : t -> Blk.t list -> t Or_error.t - val insert_slot : t -> Virtual.slot -> t + val insert_slot : t -> Slot.t -> t end module Cfg : sig @@ -184,26 +184,34 @@ module Make(M : L) = struct ~f:(fun s key -> Map.set s ~key ~data:v) in escaping Insn.escapes op s - let blkargs blks (l, xs) = + let merge_blkarg acc src dst = match src with + | `var src when Var.(src = dst) -> acc + | `var src -> + begin match Map.find acc src with + | None -> acc + | Some v -> Map.update acc dst ~f:(function + | Some v' -> Value.merge v v' + | None -> v) + end + | _ -> acc + + let blkargs blks s (l, xs) = Label.Tree.find blks l |> - Option.value_map ~default:[] ~f:(fun b -> + Option.value_map ~default:s ~f:(fun b -> let args = Seq.to_list @@ Blk.args b in - match List.zip xs args with - | Unequal_lengths -> [] - | Ok args' -> args') + List.fold2 xs args ~init:s ~f:merge_blkarg |> function + | Ok s -> s + | Unequal_lengths -> + Logs.warn (fun m -> + m "%s: unequal lengths (%d vs %d) for %a%!" + __FUNCTION__ (List.length xs) (List.length args) Label.pp l); + s) (* Transfer for control-flow instruction. *) let transfer_ctrl blks s c = let init = escaping Ctrl.escapes c s in (* Propagate the block parameters we are passing. *) - Ctrl.locals c |> List.bind ~f:(blkargs blks) |> - List.fold ~init ~f:(fun acc (src, dst) -> - if Var.(src = dst) then acc - else match Map.find acc src with - | None -> acc - | Some v -> Map.update acc dst ~f:(function - | Some v' -> Value.merge v v' - | None -> v)) + Ctrl.locals c |> List.fold ~init ~f:(blkargs blks) (* Transfer function for a block. *) let transfer slots blks l s = @@ -226,9 +234,8 @@ module Make(M : L) = struct Solution.create @< State.empty (* All slots mapped to their names. *) - let collect_slots fn = - Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> - Map.set acc ~key:(Virtual.Slot.var s) ~data:s) + let collect_slots fn = Func.slots fn |> Seq.fold ~init:Var.Map.empty + ~f:(fun acc s -> Map.set acc ~key:(Slot.var s) ~data:s) (* Run the dataflow analysis. *) let analyze ?cfg ?blks slots fn : solution = From a9c6b44a679f737ce3ce04e4d0487e9e6583ed57 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Mon, 17 Nov 2025 01:29:05 -0500 Subject: [PATCH 25/62] Explicit test --- src/test/data/opt/sroa.vir | 22 ++++++++++++++++++++++ src/test/data/opt/sroa.vir.opt | 9 +++++++++ src/test/test_opt.ml | 1 + 3 files changed, 32 insertions(+) create mode 100644 src/test/data/opt/sroa.vir create mode 100644 src/test/data/opt/sroa.vir.opt diff --git a/src/test/data/opt/sroa.vir b/src/test/data/opt/sroa.vir new file mode 100644 index 00000000..8227d9eb --- /dev/null +++ b/src/test/data/opt/sroa.vir @@ -0,0 +1,22 @@ +module sroa + +export function w $foo(w %a, w %b, w %c, w %d) { + %s = slot 16, align 4 +@start: + %p1 = add.l %s, 0_l + st.w %a, %p1 + %p2 = add.l %s, 4_l + st.w %b, %p2 + %p3 = add.l %s, 8_l + st.w %c, %p3 + %p4 = add.l %s, 12_l + st.w %d, %p4 + %a = ld.w %p1 + %b = ld.w %p2 + %c = ld.w %p3 + %d = ld.w %p4 + %x = add.w %a, %b + %x = add.w %x, %c + %x = add.w %x, %d + ret %x +} diff --git a/src/test/data/opt/sroa.vir.opt b/src/test/data/opt/sroa.vir.opt new file mode 100644 index 00000000..ed03491b --- /dev/null +++ b/src/test/data/opt/sroa.vir.opt @@ -0,0 +1,9 @@ +module sroa + +export function w $foo(w %a, w %b, w %c, w %d) { +@2: + %4 = add.w %a, %b ; @18 + %6 = add.w %c, %d ; @20 + %7 = add.w %4, %6 ; @21 + ret %7 +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index e453149e..ec528730 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -307,6 +307,7 @@ let opt_suite = "Test optimizations" >::: [ "No sinking" >:: test "nosink"; "Spill test 2" >:: test "spill2"; "Parallel moves" >:: test "parallel"; + "SROA" >:: test "sroa"; ] let abi_suite = "Test ABI lowering" >::: [ From 0c2497d9f34810a11668bb5f9c99ac3497df1574 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 18 Nov 2025 19:23:32 -0500 Subject: [PATCH 26/62] Simplify `liveness_insn` for coalescing This isn't a real "liveness" analysis, but instead it is a lens by which we view the results of the `Scalars` analysis. --- .../coalesce_slots/coalesce_slots_impl.ml | 20 ++++++++----------- src/lib/passes/sroa_coalesce_common.ml | 8 ++------ src/lib/scalars.ml | 1 - 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 3debfa3d..3e90ef82 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -147,18 +147,14 @@ module Make(M : Scalars.L) = struct let liveness_insn acc s ip i = let op = Insn.op i in - match Insn.load_or_store_to op with - | Some (ptr, _, ldst) -> - update acc s ptr ip @@ is_store ldst - | None -> match Insn.offset op with - | Some (ptr, _) -> - update acc s ptr ip false - | None -> match Insn.copy_of op with - | Some x -> update acc s x ip false - | None when Insn.special op -> - Insn.free_vars op |> Set.fold ~init:acc - ~f:(fun acc x -> Map.set acc ~key:x ~data:Range.bad) - | None -> acc + let r = Insn.free_vars op in + let r, w = match Insn.load_or_store_to op with + | Some (ptr, _, Store) -> Set.remove r ptr, Some ptr + | Some _ | None -> r, None in + Option.fold w ~init:acc ~f:(fun acc x -> + update acc s x ip true) |> fun init -> + Set.fold r ~init ~f:(fun acc x -> + update acc s x ip false) let liveness_ctrl acc s ip c = Ctrl.free_vars c |> Set.fold ~init:acc diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml index 5256ec94..77ce6ed7 100644 --- a/src/lib/passes/sroa_coalesce_common.ml +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -73,6 +73,7 @@ let escapes_ctrl fv = function | `sw _ -> Var.Set.empty [@@specialise] +(* Virtual language *) module VL = struct module Insn = struct type t = Insn.t @@ -80,9 +81,6 @@ module VL = struct let create ~label op = Insn.create op ~label let with_op = Insn.with_op let op = Insn.op - let special = function - | #Insn.variadic -> true - | _ -> false let label = Insn.label let lhs = var_set_of_option @. Insn.lhs_of_op let offset = (offset :> op -> _) @@ -104,6 +102,7 @@ module VL = struct module Cfg = Cfg end +(* Virtual.Abi language *) module AL = struct open Abi module Insn = struct @@ -112,9 +111,6 @@ module AL = struct let create ~label op = Insn.create op ~label let with_op = Insn.with_op let op = Insn.op - let special = function - | #Insn.extra -> true - | _ -> false let label = Insn.label let lhs = Insn.def_of_op let offset = (offset :> op -> _) diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index 6960063a..540a531e 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -120,7 +120,6 @@ module type L = sig val copy_of : op -> Var.t option val free_vars : op -> Var.Set.t val escapes : op -> Var.Set.t - val special : op -> bool (* Used during replacement. *) val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option From ceee06b8e387eb8a27cfcf7f5cb20c7bce87774f Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 18 Nov 2025 19:53:25 -0500 Subject: [PATCH 27/62] Exclude propagating via block params in SROA for now This could be done in the future, but the transformation needs to be more sophisticated to avoid miscompilation. --- src/lib/passes/sroa/sroa_impl.ml | 14 +++++++++++++- src/lib/scalars.ml | 27 ++++++++++++++++++--------- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 03f8f1a3..7e50e862 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -272,11 +272,23 @@ end = struct data)) (Map.to_alist parts)) + (* XXX: allowing the analysis to propagate through block parameters + could possibly be done, but the current transformation isn't + set up to properly handle it. + + The main issue arises from pointers into the slots that get + aliased via a block parameter. That dangling reference will + currently be left unchanged, leading to garbage values being + read from the old slot. + *) + let analyze slots fn = + Analysis.analyze ~blkparam:false slots fn + let run fn = let open Context.Syntax in let slots = Analysis.collect_slots fn in if Map.is_empty slots then !!fn else - let s = Analysis.analyze slots fn in + let s = analyze slots fn in let accs = collect_accesses slots fn s in let parts = partition_acesses accs in if Map.is_empty parts then !!fn else diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index 540a531e..1a0a3f99 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -206,38 +206,46 @@ module Make(M : L) = struct __FUNCTION__ (List.length xs) (List.length args) Label.pp l); s) + let ctrl_esc blkparam = + if blkparam then Ctrl.escapes else Ctrl.free_vars + [@@inline] + (* Transfer for control-flow instruction. *) - let transfer_ctrl blks s c = - let init = escaping Ctrl.escapes c s in + let transfer_ctrl ?(blkparam = true) blks s c = + let init = escaping (ctrl_esc blkparam) c s in (* Propagate the block parameters we are passing. *) Ctrl.locals c |> List.fold ~init ~f:(blkargs blks) + [@@specialise] (* Transfer function for a block. *) - let transfer slots blks l s = + let transfer ?(blkparam = true) slots blks l s = Label.Tree.find blks l |> Option.value_map ~default:s ~f:(fun b -> Blk.insns b |> Seq.map ~f:Insn.op |> Seq.fold ~init:s ~f:(transfer_op slots) |> - transfer_ctrl blks @< Blk.ctrl b) + transfer_ctrl ~blkparam blks @< Blk.ctrl b) + [@@specialise] (* Initial constraints. *) - let initialize slots blks = + let initialize ?(blkparam = true) slots blks = (* Set all slots to point to their own base address. *) let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in (* Any slot that directly escapes should immediately be set to `Top`. *) + let k = ctrl_esc blkparam in Label.Tree.fold blks ~init ~f:(fun ~key:_ ~data init -> Blk.insns data |> Seq.fold ~init ~f:(fun s i -> escaping Insn.escapes (Insn.op i) s) |> - escaping Ctrl.escapes (Blk.ctrl data)) |> + escaping k (Blk.ctrl data)) |> Label.Map.singleton Label.pseudoentry |> Solution.create @< State.empty + [@@specialise] (* All slots mapped to their names. *) let collect_slots fn = Func.slots fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> Map.set acc ~key:(Slot.var s) ~data:s) (* Run the dataflow analysis. *) - let analyze ?cfg ?blks slots fn : solution = + let analyze ?(blkparam = true) ?cfg ?blks slots fn : solution = let cfg = match cfg with | None -> Cfg.create fn | Some cfg -> cfg in @@ -245,9 +253,10 @@ module Make(M : L) = struct | None -> Func.map_of_blks fn | Some blks -> blks in Graphlib.fixpoint (module Cfg) cfg - ~init:(initialize slots blks) + ~init:(initialize ~blkparam slots blks) ~start:Label.pseudoentry ~equal:State.equal ~merge:State.merge - ~f:(transfer slots blks) + ~f:(transfer ~blkparam slots blks) + [@@specialise] end From 138993af658be9ef6e725cbf49988b6e45b16a07 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 18 Nov 2025 20:26:40 -0500 Subject: [PATCH 28/62] Don't prematurely filter out partially overlapping accesses This is already handled by `partition_accesses`, and it was rejecting valid partitions. --- src/lib/passes/sroa/sroa_impl.ml | 19 +-- src/test/data/opt/gcdext.vir.opt.sysv | 37 ++--- src/test/data/opt/vaarg1.vir.opt.sysv | 131 ++++++++---------- src/test/data/opt/vaarg1.vir.opt.sysv.amd64 | 99 ++++++------- .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 91 +++++------- src/test/data/opt/vaarg2.vir.opt.sysv | 27 ++-- 6 files changed, 166 insertions(+), 238 deletions(-) diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 7e50e862..82c9670b 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -63,24 +63,7 @@ end = struct | _ -> acc in s := Analysis.transfer_op slots !s op; acc)) |> - (* Filter out slots that are not splittable. *) - Map.map ~f:(List.sort ~compare:cmp_access) |> - Map.filteri ~f:(fun ~key ~data -> - let check x y = - let sx = sizeof_access x in - (* No partial overlaps. *) - Int64.(x.off + of_int sx <= y.off) || - (* Allow exact re-use of the same region. *) - cmp_access x y = 0 in - let rec ok = function - | x :: ((y :: _) as xs) -> check x y && ok xs - | [] | [_] -> true in - let res = ok data in - if not res then - Logs.debug (fun m -> - m "%s: filtering out accesses for %a%!" - __FUNCTION__ Var.pp key); - res) + Map.map ~f:(List.sort ~compare:cmp_access) let overlaps oa sa ob sb = Int64.(oa < ob + of_int sb && ob < oa + of_int sa) diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv index 1de15d3b..7b3659db 100644 --- a/src/test/data/opt/gcdext.vir.opt.sysv +++ b/src/test/data/opt/gcdext.vir.opt.sysv @@ -1,35 +1,36 @@ module gcdext export function $gcd(w %a/rdi, w %b/rsi) { - %r = slot 16, align 8 + %31 = slot 8, align 8 + %32 = slot 8, align 8 @2: %1.1 = eq.w %a, 0x0_w ; @30 - %2.1 = add.l %r, 0x8_l ; @31 - %3.1 = add.l %r, 0x4_l ; @32 br %1.1, @3, @4 @3: - st.w %b, %r ; @6 - st.w 0x0_w, %3.1 ; @8 - st.w 0x1_w, %2.1 ; @10 - %19.1 = ld.l %r ; @49 - %21.1 = ld.l %2.1 ; @52 + st.w %b, %31 ; @6 + %33.1 = add.l %31, 0x4_l ; @61 + st.w 0x0_w, %33.1 ; @8 + st.w 0x1_w, %32 ; @10 + %19.1 = ld.l %31 ; @49 + %21.1 = ld.l %32 ; @52 jmp @29(%21.1, %19.1) @4: %m.1 = rem.w %b, %a ; @12 %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 - st.l %27.1, %r ; @38 - st.l %28.1, %2.1 ; @42 - %rg.1 = ld.w %r ; @15 - %rx.1 = ld.w %3.1 ; @17 - %ry.1 = ld.w %2.1 ; @19 - st.w %rg.1, %r ; @20 + st.l %27.1, %31 ; @38 + st.l %28.1, %32 ; @42 + %rg.1 = ld.w %31 ; @15 + %34.1 = add.l %31, 0x4_l ; @62 + %rx.1 = ld.w %34.1 ; @17 + %ry.1 = ld.w %32 ; @19 + st.w %rg.1, %31 ; @20 %nx.1 = div.w %b, %a ; @21 %6.1 = mul.w %nx.1, %rx.1 ; @35 %7.1 = sub.w %ry.1, %6.1 ; @36 - st.w %7.1, %3.1 ; @25 - st.w %rx.1, %2.1 ; @27 - %14.1 = ld.l %r ; @43 - %16.1 = ld.l %2.1 ; @46 + st.w %7.1, %34.1 ; @25 + st.w %rx.1, %32 ; @27 + %14.1 = ld.l %31 ; @43 + %16.1 = ld.l %32 ; @46 jmp @29(%16.1, %14.1) @29(%30.1, %29.1): ret rax/%29.1, rdx/%30.1 diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv b/src/test/data/opt/vaarg1.vir.opt.sysv index ffdc7852..d0d00986 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv +++ b/src/test/data/opt/vaarg1.vir.opt.sysv @@ -1,90 +1,73 @@ module vaarg1 export function $foo(b %11/rax, l %i/rdi, ...) { - %ap = slot 24, align 8 - %r = slot 16, align 8 %5 = slot 176, align 16 + %52 = slot 8, align 8 @19: - %6 = add.l %5, 0x8_l ; @20 - regstore rsi, %6 ; @21 - %7 = add.l %5, 0x10_l ; @22 - regstore rdx, %7 ; @23 - %8 = add.l %5, 0x18_l ; @24 - regstore rcx, %8 ; @25 - %9 = add.l %5, 0x20_l ; @26 - regstore r8, %9 ; @27 - %10 = add.l %5, 0x28_l ; @28 - regstore r9, %10 ; @29 - %12 = eq.b %11, 0x0_b ; @30 - br %12, @2, @18 + %6.1 = add.l %5, 0x8_l ; @20 + regstore rsi, %6.1 ; @21 + %7.1 = add.l %5, 0x10_l ; @22 + regstore rdx, %7.1 ; @23 + %8.1 = add.l %5, 0x18_l ; @24 + regstore rcx, %8.1 ; @25 + %9.1 = add.l %5, 0x20_l ; @26 + regstore r8, %9.1 ; @27 + %10.1 = add.l %5, 0x28_l ; @28 + regstore r9, %10.1 ; @29 + %12.1 = eq.b %11, 0x0_b ; @30 + br %12.1, @2, @18 @18: - %13 = add.l %5, 0x30_l ; @31 - regstore xmm0, %13 ; @32 - %14 = add.l %5, 0x40_l ; @33 - regstore xmm1, %14 ; @34 - %15 = add.l %5, 0x50_l ; @35 - regstore xmm2, %15 ; @36 - %16 = add.l %5, 0x60_l ; @37 - regstore xmm3, %16 ; @38 - %17 = add.l %5, 0x70_l ; @39 - regstore xmm4, %17 ; @40 - %18 = add.l %5, 0x80_l ; @41 - regstore xmm5, %18 ; @42 - %19 = add.l %5, 0x90_l ; @43 - regstore xmm6, %19 ; @44 - %20 = add.l %5, 0xa0_l ; @45 - regstore xmm7, %20 ; @46 + %13.1 = add.l %5, 0x30_l ; @31 + regstore xmm0, %13.1 ; @32 + %14.1 = add.l %5, 0x40_l ; @33 + regstore xmm1, %14.1 ; @34 + %15.1 = add.l %5, 0x50_l ; @35 + regstore xmm2, %15.1 ; @36 + %16.1 = add.l %5, 0x60_l ; @37 + regstore xmm3, %16.1 ; @38 + %17.1 = add.l %5, 0x70_l ; @39 + regstore xmm4, %17.1 ; @40 + %18.1 = add.l %5, 0x80_l ; @41 + regstore xmm5, %18.1 ; @42 + %19.1 = add.l %5, 0x90_l ; @43 + regstore xmm6, %19.1 ; @44 + %20.1 = add.l %5, 0xa0_l ; @45 + regstore xmm7, %20.1 ; @46 jmp @2 @2: - st.w 0x8_w, %ap ; @53 - %26 = add.l %ap, 0x4_l ; @54 - st.w 0x30_w, %26 ; @55 - %27 = stkargs ; @56 - %28 = add.l %ap, 0x8_l ; @57 - st.l %27, %28 ; @58 - %29 = add.l %ap, 0x10_l ; @59 - st.l %5, %29 ; @60 + st.w 0x8_w, %52 ; @53 + %57.1 = add.l %52, 0x4_l ; @92 + st.w 0x30_w, %57.1 ; @55 + %27.1 = stkargs ; @56 jmp @62 @62: - %31 = ld.w %26 ; @68 - %32 = le.w %31, 0xa0_w ; @69 - br %32, @63, @65 + jmp @63 @63: - %33 = ld.w %ap ; @70 - %34 = le.w %33, 0x28_w ; @71 - br %34, @64, @65 + %33.1 = ld.w %52 ; @70 + %34.1 = le.w %33.1, 0x28_w ; @71 + br %34.1, @64, @65 @64: - %37 = zext.l %31 ; @74 - %38 = add.l %5, %37 ; @75 - %39 = zext.l %33 ; @76 - %40 = add.l %5, %39 ; @77 - %41 = add.w %31, 0x10_l ; @78 - %42 = add.w %33, 0x8_l ; @79 - st.w %41, %26 ; @80 - st.w %42, %ap ; @81 - jmp @66(%38, %40) + %38.1 = add.l %5, 0x30_l ; @75 + %39.1 = zext.l %33.1 ; @76 + %40.1 = add.l %5, %39.1 ; @77 + %42.1 = add.w %33.1, 0x8_l ; @79 + st.w 0x40_w, %57.1 ; @80 + st.w %42.1, %52 ; @81 + jmp @66(%38.1, %40.1) @65: - %44 = ld.l %28 ; @83 - %45 = add.l %44, 0x8_l ; @84 - %46 = add.l %44, 0x10_l ; @85 - st.l %46, %28 ; @86 - jmp @66(%44, %45) -@66(%47, %48): - %49 = ld.l %47 ; @87 - st.l %49, %ap ; @88 - %50 = ld.l %48 ; @89 - st.l %50, %28 ; @91 + %45.1 = add.l %27.1, 0x8_l ; @84 + jmp @66(%27.1, %45.1) +@66(%47.1, %48.1): + %49.1 = ld.l %47.1 ; @87 + st.l %49.1, %52 ; @88 + %50.1 = ld.l %48.1 ; @89 jmp @61 @61: - %22 = ld.l %ap ; @47 - st.l %22, %r ; @48 - %24 = ld.l %28 ; @50 - %25 = add.l %r, 0x8_l ; @51 - st.l %24, %25 ; @52 - %f1.1 = ld.d %r ; @6 - %0 = add.d %f1.1, 1.234_d ; @13 - %2 = ftosi.d.l %0 ; @15 - %3 = add.l %24, %2 ; @16 - %4 = add.l %3, %i ; @17 - ret rax/%4 + st.l %49.1, %52 ; @48 + %f1.1 = ld.d %52 ; @6 + %0.1 = add.d %f1.1, 1.234_d ; @13 + %2.1 = ftosi.d.l %0.1 ; @15 + %3.1 = add.l %50.1, %2.1 ; @16 + %4.1 = add.l %3.1, %i ; @17 + ret rax/%4.1 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 index 6c5eb8c5..2660ce2a 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 @@ -1,20 +1,19 @@ module vaarg1 export function $foo { ; returns: rax - %ap = slot 24, align 8 - %r = slot 16, align 8 %5 = slot 176, align 16 + %52 = slot 8, align 8 @19: mov %11:b, al ; @20 - mov %i:l, rdi ; @121 + mov %i:l, rdi ; @120 mov qword ptr [%5 + 0x8], rsi ; @21 mov qword ptr [%5 + 0x10], rdx ; @23 mov qword ptr [%5 + 0x18], rcx ; @25 mov qword ptr [%5 + 0x20], r8 ; @27 mov qword ptr [%5 + 0x28], r9 ; @29 - test %11:b, %11:b ; @116 - je @2 ; @117 - jmp @18 ; @118 + test %11:b, %11:b ; @115 + je @2 ; @116 + jmp @18 ; @117 @18: movdqa xmmword ptr [%5 + 0x30], xmm0 ; @32 movdqa xmmword ptr [%5 + 0x40], xmm1 ; @34 @@ -24,62 +23,48 @@ export function $foo { ; returns: rax movdqa xmmword ptr [%5 + 0x80], xmm5 ; @42 movdqa xmmword ptr [%5 + 0x90], xmm6 ; @44 movdqa xmmword ptr [%5 + 0xa0], xmm7 ; @46 - jmp @2 ; @115 + jmp @2 ; @114 @2: - mov dword ptr [%ap], 0x8_w ; @53 - mov dword ptr [%ap + 0x4], 0x30_w ; @55 - lea %27:l, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [%ap + 0x8], %27:l ; @58 - mov qword ptr [%ap + 0x10], %5:l ; @60 - jmp @62 ; @114 + mov dword ptr [%52], 0x8_w ; @53 + mov dword ptr [%52 + 0x4], 0x30_w ; @55 + lea %27.1:l, qword ptr [rbp + 0x10] ; @56 + jmp @62 ; @113 @62: - mov %31:w, dword ptr [%ap + 0x4] ; @68 - cmp %31:w, 0xa0_w ; @109 - jbe @63 ; @110 - jmp @65 ; @111 + jmp @63 ; @112 @63: - mov %33:w, dword ptr [%ap] ; @70 - cmp %33:w, 0x28_w ; @104 - jbe @64 ; @105 - jmp @65 ; @106 + mov %33.1:w, dword ptr [%52] ; @70 + cmp %33.1:w, 0x28_w ; @107 + jbe @64 ; @108 + jmp @65 ; @109 @65: - mov %44:l, qword ptr [%ap + 0x8] ; @83 - lea %45:l, qword ptr [%44 + 0x8] ; @84 - lea %46:l, qword ptr [%44 + 0x10] ; @85 - mov qword ptr [%ap + 0x8], %46:l ; @86 - mov %47:l, %44:l ; @101 - mov %48:l, %45:l ; @102 - jmp @66 ; @103 + lea %45.1:l, qword ptr [%27.1 + 0x8] ; @84 + mov %47.1:l, %27.1:l ; @104 + mov %48.1:l, %45.1:l ; @105 + jmp @66 ; @106 @64: - mov %37:w, %31:w ; @74 - lea %38:l, qword ptr [%5 + %37*1] ; @75 - mov %39:w, %33:w ; @76 - lea %40:l, qword ptr [%5 + %39*1] ; @77 - lea %41:w, qword ptr [%31 + 0x10] ; @78 - lea %42:w, qword ptr [%33 + 0x8] ; @79 - mov dword ptr [%ap + 0x4], %41:w ; @80 - mov dword ptr [%ap], %42:w ; @81 - mov %47:l, %38:l ; @98 - mov %48:l, %40:l ; @99 - jmp @66 ; @100 + lea %38.1:l, qword ptr [%5 + 0x30] ; @75 + mov %39.1:w, %33.1:w ; @76 + lea %40.1:l, qword ptr [%5 + %39.1*1] ; @77 + lea %42.1:w, qword ptr [%33.1 + 0x8] ; @79 + mov dword ptr [%52 + 0x4], 0x40_w ; @80 + mov dword ptr [%52], %42.1:w ; @81 + mov %47.1:l, %38.1:l ; @101 + mov %48.1:l, %40.1:l ; @102 + jmp @66 ; @103 @66: - mov %49:l, qword ptr [%47] ; @87 - mov qword ptr [%ap], %49:l ; @88 - mov %50:l, qword ptr [%48] ; @89 - mov qword ptr [%ap + 0x8], %50:l ; @91 - jmp @61 ; @97 + mov %49.1:l, qword ptr [%47.1] ; @87 + mov qword ptr [%52], %49.1:l ; @88 + mov %50.1:l, qword ptr [%48.1] ; @89 + jmp @61 ; @100 @61: - mov %22:l, qword ptr [%ap] ; @47 - mov qword ptr [%r], %22:l ; @48 - mov %24:l, qword ptr [%ap + 0x8] ; @50 - mov qword ptr [%r + 0x8], %24:l ; @52 - movsd %f1.1:d, qword ptr [%r] ; @6 - movsd %0:d, %f1.1:d ; @13 - addsd %0:d, qword ptr [rip + @94] ; @95 - .fp64 @94, 1.234 ; @96 - cvtsd2si %2:l, %0:d ; @15 - lea %3:l, qword ptr [%24 + %2*1] ; @16 - lea %4:l, qword ptr [%3 + %i*1] ; @17 - mov rax, %4:l ; @92 - ret ; @93 + mov qword ptr [%52], %49.1:l ; @48 + movsd %f1.1:d, qword ptr [%52] ; @6 + movsd %0.1:d, %f1.1:d ; @13 + addsd %0.1:d, qword ptr [rip + @97] ; @98 + .fp64 @97, 1.234 ; @99 + cvtsd2si %2.1:l, %0.1:d ; @15 + lea %3.1:l, qword ptr [%50.1 + %2.1*1] ; @16 + lea %4.1:l, qword ptr [%3.1 + %i*1] ; @17 + mov rax, %4.1:l ; @95 + ret ; @96 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index a51e6a21..a601061e 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -2,69 +2,54 @@ module vaarg1 export function $foo { ; returns: rax @19: - push rbp ; @124 - mov rbp, rsp ; @125 - sub rsp, 0xe0_l ; @126 - mov qword ptr [rbp - 0xd8], rsi ; @21 - mov qword ptr [rbp - 0xd0], rdx ; @23 - mov qword ptr [rbp - 0xc8], rcx ; @25 - mov qword ptr [rbp - 0xc0], r8 ; @27 - mov qword ptr [rbp - 0xb8], r9 ; @29 - test al, al ; @116 - je @2 ; @117 + push rbp ; @122 + mov rbp, rsp ; @123 + sub rsp, 0xc0_l ; @124 + mov qword ptr [rbp - 0xb8], rsi ; @21 + mov qword ptr [rbp - 0xb0], rdx ; @23 + mov qword ptr [rbp - 0xa8], rcx ; @25 + mov qword ptr [rbp - 0xa0], r8 ; @27 + mov qword ptr [rbp - 0x98], r9 ; @29 + test al, al ; @115 + je @2 ; @116 @18: - movdqa xmmword ptr [rbp - 0xb0], xmm0 ; @32 - movdqa xmmword ptr [rbp - 0xa0], xmm1 ; @34 - movdqa xmmword ptr [rbp - 0x90], xmm2 ; @36 - movdqa xmmword ptr [rbp - 0x80], xmm3 ; @38 - movdqa xmmword ptr [rbp - 0x70], xmm4 ; @40 - movdqa xmmword ptr [rbp - 0x60], xmm5 ; @42 - movdqa xmmword ptr [rbp - 0x50], xmm6 ; @44 - movdqa xmmword ptr [rbp - 0x40], xmm7 ; @46 + movdqa xmmword ptr [rbp - 0x90], xmm0 ; @32 + movdqa xmmword ptr [rbp - 0x80], xmm1 ; @34 + movdqa xmmword ptr [rbp - 0x70], xmm2 ; @36 + movdqa xmmword ptr [rbp - 0x60], xmm3 ; @38 + movdqa xmmword ptr [rbp - 0x50], xmm4 ; @40 + movdqa xmmword ptr [rbp - 0x40], xmm5 ; @42 + movdqa xmmword ptr [rbp - 0x30], xmm6 ; @44 + movdqa xmmword ptr [rbp - 0x20], xmm7 ; @46 @2: - mov dword ptr [rbp - 0x30], 0x8_w ; @53 - mov dword ptr [rbp - 0x2c], 0x30_w ; @55 + mov dword ptr [rbp - 0x10], 0x8_w ; @53 + mov dword ptr [rbp - 0xc], 0x30_w ; @55 lea rax, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [rbp - 0x28], rax ; @58 - lea rax, qword ptr [rbp - 0xe0] ; @60 - mov qword ptr [rbp - 0x20], rax ; @123 -@62: - mov esi, dword ptr [rbp - 0x2c] ; @68 - cmp esi, 0xa0_w ; @109 - ja @65 ; @110 @63: - mov edx, dword ptr [rbp - 0x30] ; @70 - cmp edx, 0x28_w ; @104 - jbe @64 ; @105 + mov edx, dword ptr [rbp - 0x10] ; @70 + cmp edx, 0x28_w ; @107 + jbe @64 ; @108 @65: - mov rcx, qword ptr [rbp - 0x28] ; @83 - lea rax, qword ptr [rcx + 0x8] ; @84 - lea rdx, qword ptr [rcx + 0x10] ; @85 - mov qword ptr [rbp - 0x28], rdx ; @86 - jmp @66 ; @103 + lea rcx, qword ptr [rax + 0x8] ; @84 + jmp @66 ; @106 @64: - lea rcx, qword ptr [rbp + rsi*1 - 0xe0] ; @75 - lea rax, qword ptr [rbp + rdx*1 - 0xe0] ; @77 - add esi, 0x10_w ; @78 + lea rax, qword ptr [rbp - 0x90] ; @75 + lea rcx, qword ptr [rbp + rdx*1 - 0xc0] ; @77 add edx, 0x8_w ; @79 - mov dword ptr [rbp - 0x2c], esi ; @80 - mov dword ptr [rbp - 0x30], edx ; @81 + mov dword ptr [rbp - 0xc], 0x40_w ; @80 + mov dword ptr [rbp - 0x10], edx ; @81 @66: - mov rcx, qword ptr [rcx] ; @87 - mov qword ptr [rbp - 0x30], rcx ; @88 - mov rax, qword ptr [rax] ; @89 - mov qword ptr [rbp - 0x28], rax ; @91 + mov rax, qword ptr [rax] ; @87 + mov qword ptr [rbp - 0x10], rax ; @88 + mov rcx, qword ptr [rcx] ; @89 @61: - mov rax, qword ptr [rbp - 0x30] ; @47 - mov qword ptr [rbp - 0x18], rax ; @48 - mov rcx, qword ptr [rbp - 0x28] ; @50 - mov qword ptr [rbp - 0x10], rcx ; @52 - movsd xmm0, qword ptr [rbp - 0x18] ; @6 - addsd xmm0, qword ptr [rip + @94] ; @95 - .fp64 @94, 1.234 ; @96 + mov qword ptr [rbp - 0x10], rax ; @48 + movsd xmm0, qword ptr [rbp - 0x10] ; @6 + addsd xmm0, qword ptr [rip + @97] ; @98 + .fp64 @97, 1.234 ; @99 cvtsd2si rax, xmm0 ; @15 add rax, rcx ; @16 add rax, rdi ; @17 - leave ; @127 - ret ; @93 + leave ; @125 + ret ; @96 } diff --git a/src/test/data/opt/vaarg2.vir.opt.sysv b/src/test/data/opt/vaarg2.vir.opt.sysv index 3f5d6d05..f1ff6b17 100644 --- a/src/test/data/opt/vaarg2.vir.opt.sysv +++ b/src/test/data/opt/vaarg2.vir.opt.sysv @@ -1,8 +1,8 @@ module vaarg2 export function $foo(b %9/rax, l %i/rdi, ...) { - %ap = slot 24, align 8 %3 = slot 176, align 16 + %74 = slot 8, align 8 @18: %4.1 = add.l %3, 0x8_l ; @19 regstore rsi, %4.1 ; @20 @@ -35,40 +35,31 @@ export function $foo(b %9/rax, l %i/rdi, ...) { regstore xmm7, %18.1 ; @45 jmp @2 @2: - st.w 0x8_w, %ap ; @52 - %24.1 = add.l %ap, 0x4_l ; @53 - st.w 0x30_w, %24.1 ; @54 + st.w 0x8_w, %74 ; @52 + %79.1 = add.l %74, 0x4_l ; @139 + st.w 0x30_w, %79.1 ; @54 %25.1 = stkargs ; @55 - %26.1 = add.l %ap, 0x8_l ; @56 - st.l %25.1, %26.1 ; @57 - %27.1 = add.l %ap, 0x10_l ; @58 - st.l %3, %27.1 ; @59 jmp @61 @61: - %28.1 = ld.w %ap ; @65 + %28.1 = ld.w %74 ; @65 %29.1 = le.w %28.1, 0x20_w ; @66 br %29.1, @62, @63 @62: %32.1 = zext.l %28.1 ; @69 %33.1 = add.l %3, %32.1 ; @70 %34.1 = add.w %28.1, 0x10_w ; @71 - st.w %34.1, %ap ; @72 + st.w %34.1, %74 ; @72 jmp @64(%33.1) @63: - %36.1 = ld.l %26.1 ; @74 - %37.1 = add.l %36.1, 0x10_l ; @75 - st.l %37.1, %26.1 ; @76 - jmp @64(%36.1) + jmp @64(%25.1) @64(%38.1): %39.1 = ld.l %38.1 ; @77 - st.l %39.1, %ap ; @78 + st.l %39.1, %74 ; @78 %40.1 = add.l %38.1, 0x8_l ; @79 %41.1 = ld.l %40.1 ; @80 - st.l %41.1, %26.1 ; @82 jmp @60 @60: - %20.1 = ld.l %ap ; @46 - %1.1 = add.l %20.1, %41.1 ; @15 + %1.1 = add.l %39.1, %41.1 ; @15 %2.1 = add.l %1.1, %i ; @16 ret rax/%2.1 } From 7884c2652871e135dda6457954f394d29dc84c7c Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 18 Nov 2025 21:04:21 -0500 Subject: [PATCH 29/62] Fix lowering of `ftosi/ftoui` These should use the `cvtt{sd,ss}2si` variants, not `cvt{sd,ss}2si`. --- src/lib/machine/x86/x86_amd64_common.ml | 12 +++++++ src/lib/machine/x86/x86_amd64_isel.ml | 8 ++--- src/lib/machine/x86/x86_amd64_regalloc.ml | 2 ++ src/test/data/opt/vaarg1.driver.sysv.amd64.c | 31 +++++++++++++++++-- src/test/data/opt/vaarg1.vir.opt.sysv.amd64 | 2 +- .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 2 +- 6 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_common.ml b/src/lib/machine/x86/x86_amd64_common.ml index 643228ab..44aca971 100644 --- a/src/lib/machine/x86/x86_amd64_common.ml +++ b/src/lib/machine/x86/x86_amd64_common.ml @@ -412,11 +412,13 @@ module Insn = struct | CMOVcc of cc | CMP | CVTSD2SI + | CVTTSD2SI | CVTSD2SS | CVTSI2SD | CVTSI2SS | CVTSS2SD | CVTSS2SI + | CVTTSS2SI | DIVSD | DIVSS | IMUL2 @@ -463,11 +465,13 @@ module Insn = struct | CMOVcc cc -> Format.fprintf ppf "cmov%a" pp_cc cc | CMP -> Format.fprintf ppf "cmp" | CVTSD2SI -> Format.fprintf ppf "cvtsd2si" + | CVTTSD2SI -> Format.fprintf ppf "cvttsd2si" | CVTSD2SS -> Format.fprintf ppf "cvtsd2ss" | CVTSI2SD -> Format.fprintf ppf "cvtsi2sd" | CVTSI2SS -> Format.fprintf ppf "cvtsi2ss" | CVTSS2SD -> Format.fprintf ppf "cvtss2sd" | CVTSS2SI -> Format.fprintf ppf "cvtss2si" + | CVTTSS2SI -> Format.fprintf ppf "cvttss2si" | DIVSD -> Format.fprintf ppf "divsd" | DIVSS -> Format.fprintf ppf "divss" | IMUL2 -> Format.fprintf ppf "imul" @@ -649,11 +653,13 @@ module Insn = struct | BSF | BSR | CVTSD2SI + | CVTTSD2SI | CVTSD2SS | CVTSI2SD | CVTSI2SS | CVTSS2SD | CVTSS2SI + | CVTTSS2SI | LEA | MOVSX | MOVSXD @@ -753,11 +759,13 @@ module Insn = struct | ADDSS | CMOVcc _ | CVTSD2SI + | CVTTSD2SI | CVTSD2SS | CVTSI2SD | CVTSI2SS | CVTSS2SD | CVTSS2SI + | CVTTSS2SI | DIVSD | DIVSS | LEA @@ -873,10 +881,12 @@ module Insn = struct | CMOVcc _ (* illegal *) | CMP | CVTSD2SI (* illegal *) + | CVTTSD2SI (* illegal *) | CVTSD2SS (* illegal *) | CVTSI2SD (* illegal *) | CVTSI2SS (* illegal *) | CVTSS2SI (* illegal *) + | CVTTSS2SI (* illegal *) | CVTSS2SD (* illegal *) | IMUL2 | LEA (* illegal *) @@ -963,11 +973,13 @@ module Insn = struct let cmov cc a b = Two (CMOVcc cc, a, b) let cmp a b = Two (CMP, a, b) let cvtsd2si a b = Two (CVTSD2SI, a, b) + let cvttsd2si a b = Two (CVTTSD2SI, a, b) let cvtsd2ss a b = Two (CVTSD2SS, a, b) let cvtsi2sd a b = Two (CVTSI2SD, a, b) let cvtsi2ss a b = Two (CVTSI2SS, a, b) let cvtss2sd a b = Two (CVTSS2SD, a, b) let cvtss2si a b = Two (CVTSS2SI, a, b) + let cvttss2si a b = Two (CVTTSS2SI, a, b) let divsd a b = Two (DIVSD, a, b) let divss a b = Two (DIVSS, a, b) let imul2 a b = Two (IMUL2, a, b) diff --git a/src/lib/machine/x86/x86_amd64_isel.ml b/src/lib/machine/x86/x86_amd64_isel.ml index 1b117ba1..469dcb3d 100644 --- a/src/lib/machine/x86/x86_amd64_isel.ml +++ b/src/lib/machine/x86/x86_amd64_isel.ml @@ -1738,9 +1738,9 @@ end = struct let xt' = ftosi_ty xt in match tf with | `f32 -> - !!![I.cvtss2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttss2si (Oreg (x, xt')) (Oreg (y, yt))] | `f64 -> - !!![I.cvtsd2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttsd2si (Oreg (x, xt')) (Oreg (y, yt))] | _ -> !!None let ftosi_rf32_x_y ti env = @@ -1769,9 +1769,9 @@ end = struct let xt' = ftoui_ty xt in match tf with | `f32 -> - !!![I.cvtss2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttss2si (Oreg (x, xt')) (Oreg (y, yt))] | `f64 -> - !!![I.cvtsd2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttsd2si (Oreg (x, xt')) (Oreg (y, yt))] | _ -> !!None let ftoui_rf32_x_y ti env = diff --git a/src/lib/machine/x86/x86_amd64_regalloc.ml b/src/lib/machine/x86/x86_amd64_regalloc.ml index a0998ab4..d0041e70 100644 --- a/src/lib/machine/x86/x86_amd64_regalloc.ml +++ b/src/lib/machine/x86/x86_amd64_regalloc.ml @@ -215,11 +215,13 @@ module Typed_writes = struct | BSR | CMOVcc _ | CVTSD2SI + | CVTTSD2SI | CVTSD2SS | CVTSI2SD | CVTSI2SS | CVTSS2SD | CVTSS2SI + | CVTTSS2SI | DIVSD | DIVSS | IMUL2 diff --git a/src/test/data/opt/vaarg1.driver.sysv.amd64.c b/src/test/data/opt/vaarg1.driver.sysv.amd64.c index 41584fa9..9d9482f7 100644 --- a/src/test/data/opt/vaarg1.driver.sysv.amd64.c +++ b/src/test/data/opt/vaarg1.driver.sysv.amd64.c @@ -8,6 +8,33 @@ struct S { extern long foo(long i, ...); int main() { - struct S s = {0.0, 1}; - assert(foo(1, s) == 3); + struct S s1 = {0.0, 1}; + assert(foo(1, s1) == 3); + + struct S s2 = {1.0, 1}; + assert(foo(1, s2) == 4); + + struct S s3 = {2.0, 5}; + assert(foo(10, s3) == 18); + + struct S s4 = {-2.5, 100}; + assert(foo(-4, s4) == 95); + + struct S s5 = {12.75, -20}; + assert(foo(0, s5) == -7); + + struct S s6 = {0.999, 0}; + assert(foo(1, s6) == 3); + + struct S s7 = {-0.1, 7}; + assert(foo(3, s7) == 11); + + struct S s8 = {-1.234, 42}; + assert(foo(0, s8) == 42); + + struct S s9 = {1000.5, 5000}; + assert(foo(1000, s9) == 7001); + + struct S s10 = {-100.9, -50}; + assert(foo(10, s10) == -139); } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 index 2660ce2a..084f6d03 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 @@ -62,7 +62,7 @@ export function $foo { ; returns: rax movsd %0.1:d, %f1.1:d ; @13 addsd %0.1:d, qword ptr [rip + @97] ; @98 .fp64 @97, 1.234 ; @99 - cvtsd2si %2.1:l, %0.1:d ; @15 + cvttsd2si %2.1:l, %0.1:d ; @15 lea %3.1:l, qword ptr [%50.1 + %2.1*1] ; @16 lea %4.1:l, qword ptr [%3.1 + %i*1] ; @17 mov rax, %4.1:l ; @95 diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index a601061e..1bc66f40 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -47,7 +47,7 @@ export function $foo { ; returns: rax movsd xmm0, qword ptr [rbp - 0x10] ; @6 addsd xmm0, qword ptr [rip + @97] ; @98 .fp64 @97, 1.234 ; @99 - cvtsd2si rax, xmm0 ; @15 + cvttsd2si rax, xmm0 ; @15 add rax, rcx ; @16 add rax, rdi ; @17 leave ; @125 From 58c60a02e736f4441f4aaed088defbf600c0e1ac Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 18 Nov 2025 23:15:27 -0500 Subject: [PATCH 30/62] Adds tests for Allen algebra `finished_by` had a typo --- src/lib/allen_interval_algebra.ml | 14 ++++-- .../coalesce_slots/coalesce_slots_impl.ml | 3 +- src/test/dune | 2 +- src/test/test_allen.ml | 47 +++++++++++++++++++ 4 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 src/test/test_allen.ml diff --git a/src/lib/allen_interval_algebra.ml b/src/lib/allen_interval_algebra.ml index fa0060aa..711ed8e8 100644 --- a/src/lib/allen_interval_algebra.ml +++ b/src/lib/allen_interval_algebra.ml @@ -1,3 +1,5 @@ +open Core + (** Allen's Interval Algebra. {:https://en.wikipedia.org/wiki/Allen%27s_interval_algebra} @@ -19,6 +21,8 @@ type t = | After [@@deriving compare, equal, sexp] +let pp ppf t = Format.fprintf ppf "%a" Sexp.pp @@ sexp_of_t t + (** Returns the converse of the relation. *) let converse = function | Before -> After @@ -58,15 +62,15 @@ module Make(M : S) = struct let before a b = hi a < lo b [@@inline] let meets a b = hi a = lo b [@@inline] - let overlaps a b = lo a < lo b && lo b < hi a && hi a < hi b [@@inline] - let finished_by a b = lo b < lo a && hi a = hi b [@@inline] - let contains a b = lo a < lo b && hi b < hi a [@@inline] + let overlaps a b = lo a < lo b && hi a > lo b && hi a < hi b [@@inline] + let finished_by a b = lo a < lo b && hi a = hi b [@@inline] + let contains a b = lo a < lo b && hi a > hi b [@@inline] let starts a b = lo a = lo b && hi a < hi b [@@inline] let equal a b = lo a = lo b && hi a = hi b [@@inline] let started_by a b = lo a = lo b && hi a > hi b [@@inline] - let during a b = lo b < lo a && hi a < hi b [@@inline] + let during a b = lo a > lo b && hi a < hi b [@@inline] let finishes a b = lo a > lo b && hi a = hi b [@@inline] - let overlapped_by a b = lo b < lo a && lo a < hi b && hi b < hi a [@@inline] + let overlapped_by a b = lo a > lo b && lo a < hi b && hi a > hi b [@@inline] let met_by a b = lo a = hi b [@@inline] let after a b = lo a > hi b [@@inline] diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 3e90ef82..d48c279c 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -78,8 +78,7 @@ let equiv_range slots rs x y = let ry = Map.find_exn rs y in let a : Allen.t = Range.Algebra.relate rx ry in Logs.debug (fun m -> - m "%s: %a, %a: %a%!" - __FUNCTION__ Var.pp x Var.pp y Sexp.pp (Allen.sexp_of_t a)); + m "%s: %a, %a: %a%!" __FUNCTION__ Var.pp x Var.pp y Allen.pp a); match a with | Before | After -> true | _ -> false diff --git a/src/test/dune b/src/test/dune index c4688699..4ab88436 100644 --- a/src/test/dune +++ b/src/test/dune @@ -1,5 +1,5 @@ (tests - (names test_bv_interval test_opt test_type) + (names test_allen test_bv_interval test_opt test_type) (libraries ocamldiff ounit2 cgen shexp.process) (ocamlopt_flags -O2) (deps diff --git a/src/test/test_allen.ml b/src/test/test_allen.ml new file mode 100644 index 00000000..99727129 --- /dev/null +++ b/src/test/test_allen.ml @@ -0,0 +1,47 @@ +open Core +open OUnit2 +open Cgen +open Allen_interval_algebra + +type range = { + lo : int; + hi : int; +} + +let (--) lo hi = {lo; hi} + +module A = Make(struct + type point = int + type t = range + let lo t = t.lo + let hi t = t.hi + include Int.Replace_polymorphic_compare + end) + +let printer = Format.asprintf "%a" pp + +let test expect a b _ = + let got = A.relate a b in + assert_equal ~printer expect got; + let expect' = converse expect in + let got' = A.relate b a in + assert_equal ~msg:"Converse" ~printer expect' got' + +let suite = + "Allen interval algebra" >::: [ + "before" >:: test Before (1--3) (5--7); + "meets" >:: test Meets (1--3) (3--8); + "overlaps" >:: test Overlaps (1--5) (3--8); + "finished_by" >:: test Finished_by (1--10) (4--10); + "contains" >:: test Contains (1--10) (3--7); + "starts" >:: test Starts (1--3) (1--10); + "equal" >:: test Equal (2--7) (2--7); + "started_by" >:: test Started_by (1--10) (1--3); + "during" >:: test During (4--6) (1--10); + "finishes" >:: test Finishes (4--10) (1--10); + "overlapped_by" >:: test Overlapped_by (5--10) (1--7); + "met_by" >:: test Met_by (10--15) (5--10); + "after" >:: test After (10--15) (1--7); + ] + +let () = run_test_tt_main suite From 13066416f21c94a3ffc8089535c5ea7ffe775093 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Wed, 19 Nov 2025 16:53:13 -0500 Subject: [PATCH 31/62] Fix warnings --- src/lib/passes/resolve_constant_blk_args.ml | 1 - src/lib/passes/sroa/sroa.ml | 1 - src/lib/passes/sroa_coalesce_common.ml | 1 - 3 files changed, 3 deletions(-) diff --git a/src/lib/passes/resolve_constant_blk_args.ml b/src/lib/passes/resolve_constant_blk_args.ml index 7d5f99b5..69b062e6 100644 --- a/src/lib/passes/resolve_constant_blk_args.ml +++ b/src/lib/passes/resolve_constant_blk_args.ml @@ -1,5 +1,4 @@ open Core -open Regular.Std open Virtual open Phi_values diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index 49b8f4b8..d613f9b9 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -1,6 +1,5 @@ open Core open Virtual -open Scalars open Sroa_impl open Sroa_coalesce_common diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/sroa_coalesce_common.ml index 77ce6ed7..10c273d9 100644 --- a/src/lib/passes/sroa_coalesce_common.ml +++ b/src/lib/passes/sroa_coalesce_common.ml @@ -1,5 +1,4 @@ open Core -open Regular.Std open Virtual open Scalars From 3566480c5b60009c4761e52128d27a177a2b7cc5 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Wed, 19 Nov 2025 17:14:43 -0500 Subject: [PATCH 32/62] Fix partial redundancy calculation The previous implementation used a concept of computing the dominator subtree closure of the iterated dominance frontier. Up until now, I haven't noticed any counterexamples. It turns out that there is a much simpler calculation involving a DFS to determine this property. --- src/lib/egraph/egraph_input.ml | 28 ++--- src/lib/egraph/extractor/extractor_cfg.ml | 117 +++++++++--------- src/test/data/opt/int_pow.vir.opt | 62 +++++----- .../opt/int_pow.vir.opt.sysv.amd64.regalloc | 100 ++++++++------- src/test/data/opt/sink1.driver.sysv.amd64.c | 12 ++ src/test/data/opt/sink1.vir | 15 +++ src/test/data/opt/sink1.vir.opt | 14 +++ src/test/test_opt.ml | 2 + 8 files changed, 195 insertions(+), 155 deletions(-) create mode 100644 src/test/data/opt/sink1.driver.sysv.amd64.c create mode 100644 src/test/data/opt/sink1.vir create mode 100644 src/test/data/opt/sink1.vir.opt diff --git a/src/lib/egraph/egraph_input.ml b/src/lib/egraph/egraph_input.ml index a6f378a0..20ac64d7 100644 --- a/src/lib/egraph/egraph_input.ml +++ b/src/lib/egraph/egraph_input.ml @@ -32,19 +32,18 @@ module Phis = Phi_values.Make(Phis_lang)(Phis_domain) (* General information about the function we're translating. *) type t = { - fn : func; (* The function itself. *) - loop : loops; (* Loops analysis. *) - reso : resolver; (* Labels to blocks/insns. *) - cfg : Cfg.t; (* The CFG. *) - dom : Label.t Semi_nca.tree; (* Dominator tree. *) - domd : int Label.Table.t; (* Dominator tree depths. *) - pdom : Label.t Semi_nca.tree; (* Post-dominator tree. *) - rdom : Dominance.t; (* Fine-grained dominance relation. *) - df : Label.t Semi_nca.frontier; (* Dominance frontiers. *) - lst : Last_stores.t; (* Last stores analysis. *) - tenv : Typecheck.env; (* Typing environment. *) - phis : Phis.state; (* Block argument value sets. *) - rpo : Label.t -> int; (* Reverse post-order number. *) + fn : func; (* The function itself. *) + loop : loops; (* Loops analysis. *) + reso : resolver; (* Labels to blocks/insns. *) + cfg : Cfg.t; (* The CFG. *) + dom : Label.t Semi_nca.tree; (* Dominator tree. *) + domd : int Label.Table.t; (* Dominator tree depths. *) + pdom : Label.t Semi_nca.tree; (* Post-dominator tree. *) + rdom : Dominance.t; (* Fine-grained dominance relation. *) + lst : Last_stores.t; (* Last stores analysis. *) + tenv : Typecheck.env; (* Typing environment. *) + phis : Phis.state; (* Block argument value sets. *) + rpo : Label.t -> int; (* Reverse post-order number. *) } let init_dom_relation reso dom = @@ -95,10 +94,9 @@ let init fn tenv = let loop = Loops.analyze ~name:(Func.name fn) cfg in let dom = Semi_nca.compute (module Cfg) cfg Label.pseudoentry in let pdom = Semi_nca.compute (module Cfg) cfg Label.pseudoexit ~rev:true in - let df = Semi_nca.frontier (module Cfg) cfg dom in let rdom = init_dom_relation reso dom in let lst = init_last_stores cfg reso in let domd = dom_depths dom in let phis = Phis.analyze ~blk:(resolve_blk reso) cfg in let rpo = init_rpo cfg in - {fn; loop; reso; cfg; dom; domd; pdom; rdom; df; lst; tenv; phis; rpo} + {fn; loop; reso; cfg; dom; domd; pdom; rdom; lst; tenv; phis; rpo} diff --git a/src/lib/egraph/extractor/extractor_cfg.ml b/src/lib/egraph/extractor/extractor_cfg.ml index 57c4af4e..bae241c1 100644 --- a/src/lib/egraph/extractor/extractor_cfg.ml +++ b/src/lib/egraph/extractor/extractor_cfg.ml @@ -40,7 +40,6 @@ type env = { insn : Insn.op Label.Table.t; ctrl : ctrl Label.Table.t; news : placed Label.Table.t; - closure : Lset.t Label.Table.t; mutable cur : Label.t; mutable scp : scope; } @@ -49,7 +48,6 @@ let init () = { insn = Label.Table.create (); ctrl = Label.Table.create (); news = Label.Table.create(); - closure = Label.Table.create (); cur = Label.pseudoentry; scp = empty_scope; } @@ -330,38 +328,6 @@ let exp t env l e = | E (_, Ovastart _, _) -> invalid l e module Hoisting = struct - let (++) = Lset.union - let not_pseudo = Fn.non Label.is_pseudo - let descendants t = Semi_nca.Tree.descendants t.eg.input.dom - let dominates t = Semi_nca.Tree.is_descendant_of t.eg.input.dom - let frontier t = Semi_nca.Frontier.enum t.eg.input.df - let to_set = Fn.compose Lset.of_sequence @@ Seq.filter ~f:not_pseudo - - (* Effectively, we are computing the dominator-subtree closure - over the iterated dominance frontier of `l`. - - This helps answer the question of where the computation of a - given instruction is "available" in the CFG. - *) - let rec closure ?(self = true) t env l = - let c = match Hashtbl.find env.closure l with - | Some c -> c - | None -> - let c = - frontier t l |> Seq.filter ~f:not_pseudo |> - (* A block can be in its own dominance frontier, so - we need to avoid an infinite loop. *) - Seq.filter ~f:(Fn.non @@ Label.equal l) |> - (* Additionally, we don't want to follow back-edges. This - can happen when a node in our frontier is, for example, - a loop header. *) - Seq.filter ~f:(fun parent -> not @@ dominates t ~parent l) |> - Seq.map ~f:(closure t env) |> - Seq.fold ~init:(to_set @@ descendants t l) ~f:(++) in - Hashtbl.set env.closure ~key:l ~data:c; - c in - if self then Lset.add c l else Lset.remove c l - (* Try the real ID first before moving on to the canonical ID. This could happen if we rescheduled a newer term before we unioned it with an older term. *) @@ -371,12 +337,14 @@ module Hoisting = struct if id = cid then s else Common.movedof t.eg cid else s + let resolve_label t l = + match Resolver.resolve t.eg.input.reso l with + | Some `insn (_, b, _, _) -> Blk.label b + | Some `blk b -> Blk.label b + | None -> assert false + let moved_blks t id cid = - find_moved t id cid |> Lset.map ~f:(fun l -> - match Resolver.resolve t.eg.input.reso l with - | Some `insn (_, b, _, _) -> Blk.label b - | Some `blk b -> Blk.label b - | None -> assert false) + find_moved t id cid |> Lset.map ~f:(resolve_label t) let rec post_dominated t l bs = match Semi_nca.Tree.parent t.eg.input.pdom l with @@ -398,25 +366,64 @@ module Hoisting = struct Loops.mem t.eg.input.loop l && loop (Stack.singleton (l, Lset.empty)) + (* Given all of the uses we know of, compute the points + where the definition will be killed: + + kills = {k | ∃ u ∈ uses s.t. k ∈ succs(u) ∧ k ∉ uses} + *) + let compute_kills t uses = + Lset.fold uses ~init:Lset.empty ~f:(fun init u -> + Cfg.Node.succs u t.eg.input.cfg |> + Seq.filter ~f:(Fn.non @@ Lset.mem uses) |> + Seq.fold ~init ~f:Lset.add) + + (* See if there exists a path, starting from `l`, that avoids + touching any block in `uses` and either reaches the end of + the function, or one of the blocks in `kills`. + + `kills` is the set of blocks where, if we were to place the + instruction at `l`, its live range would end. + + pre: `kills` and `uses` are disjoint + *) + let is_partial_redundancy_pathwise t l id cid ~uses = + let kills = compute_kills t uses in + let rec loop q = match Stack.pop q with + | None -> false + | Some (n, vis) when Lset.mem vis n -> loop q + | Some (n, _) when Lset.mem uses n -> loop q + | Some (n, _) when Lset.mem kills n -> true + | Some (n, _) when Label.(n = pseudoexit) -> true + | Some (n, vis) -> + let vis = Lset.add vis n in + Cfg.Node.succs n t.eg.input.cfg |> + Seq.iter ~f:(fun s -> Stack.push q (s, vis)); + loop q in + let res = loop @@ Stack.singleton (l, Lset.empty) in + Logs.debug (fun m -> + m "%s: l=%a, id=%d cid=%d, uses=%s, kills=%s, res=%b%!" + __FUNCTION__ Label.pp l id cid + (Lset.to_list uses |> List.to_string ~f:Label.to_string) + (Lset.to_list kills |> List.to_string ~f:Label.to_string) + res); + res + (* When we "move" duplicate nodes up to the LCA (lowest common ancestor) in the dominator tree, we might be introducing a partial redundancy. This means that, at the LCA, the node is not going to be used on all paths that are dominated by it, so we need to do a simple analysis to see if this is the case. *) - let is_partial_redundancy t env l id cid = + let is_partial_redundancy t l id cid = (* If this node is deliberately placed here then allow it. *) not (Common.is_pinned t.eg id) && begin (* Get the blocks associated with the labels that were "moved" for this node. *) - let l = match Resolver.resolve t.eg.input.reso l with - | Some `insn (_, b, _, _) -> Blk.label b - | Some `blk _ -> l - | None -> assert false in let bs = moved_blks t id cid in (* An empty set means that nobody uses this value. *) Lset.is_empty bs || begin (* If we're being used in the candidate block then this is trivially not a partial redundancy. *) + let l = resolve_label t l in not (Lset.mem bs l) && begin (* If one of these blocks post-dominates the block that we're moving to, then it is safe to allow the move to happen, @@ -430,26 +437,16 @@ module Hoisting = struct without visiting any of the blocks we moved from. *) exists_bypass t l bs else - (* For each of these blocks, get its reflexive transitive - closure in the dominator tree, and union them together. *) - let a = Lset.fold bs ~init:Lset.empty - ~f:(fun acc l -> acc ++ closure t env l) in - (* Get the non-reflexive transitive closure of the block - that we moved to. *) - let b = closure t env l ~self:false in - (* If these sets are not equal, then we have a partial - redundancy, and thus need to duplicate code. In the - case where the closure includes our target block `l`, - we want to exclude it, since the closure for `l` will - exclude itself. *) - not @@ Lset.equal (Lset.remove a l) b + (* Check if `bs` intersects on all paths where `id` can + be used. *) + is_partial_redundancy_pathwise t l id cid ~uses:bs end end end - let should_skip t env l id cid = + let should_skip t l id cid = Z.testbit t.impure cid || - is_partial_redundancy t env l id cid + is_partial_redundancy t l id cid (* If any nodes got moved up to this label, then we should check to see if it is eligible for this code motion optimization. @@ -471,7 +468,7 @@ module Hoisting = struct Context.Seq.iter ~f:(fun (id, cid) -> match extract t id with | None -> extract_fail l id | Some e -> - Context.unless (should_skip t env l id cid) @@ fun () -> + Context.unless (should_skip t l id cid) @@ fun () -> pure t env e >>| ignore) end diff --git a/src/test/data/opt/int_pow.vir.opt b/src/test/data/opt/int_pow.vir.opt index b279f5cc..f57d0cfe 100644 --- a/src/test/data/opt/int_pow.vir.opt +++ b/src/test/data/opt/int_pow.vir.opt @@ -11,19 +11,23 @@ export function l $int_pow(l %base, l %exponent) { %2 = mul.l %1, %1 ; @57 %3 = mul.l %2, %1 ; @58 %4 = and.l %exponent.1, 0x3_l ; @59 - %5 = mul.l %ret.2, %1 ; @60 - switch.l %4, @21(%ret.2) [0x1_l -> @21(%5), + switch.l %4, @21(%ret.2) [0x1_l -> @14, 0x2_l -> @15, 0x3_l -> @16] +@14: + %11 = mul.l %ret.2, %1 ; @66 + jmp @21(%11) @15: - %9 = mul.l %5, %1 ; @64 - jmp @21(%9) + %9 = mul.l %ret.2, %1 ; @64 + %10 = mul.l %9, %1 ; @65 + jmp @21(%10) @16: - %7 = mul.l %5, %2 ; @62 + %6 = mul.l %ret.2, %1 ; @61 + %7 = mul.l %6, %2 ; @62 jmp @21(%7) @21(%ret.3): - %6 = asr.l %exponent.1, 0x2_l ; @61 - jmp @3(%ret.3, %3, %1, %6) + %5 = asr.l %exponent.1, 0x2_l ; @60 + jmp @3(%ret.3, %3, %1, %5) @10: ret %ret.2 } @@ -32,31 +36,31 @@ export function l $int_pow_alt(l %base, l %exponent) { %mul = slot 32, align 8 @27: st.l 0x1_l, %mul ; @30 - %10 = add.l %mul, 0x8_l ; @65 - st.l %base, %10 ; @32 - %11 = add.l %mul, 0x18_l ; @66 - st.l 0x1_l, %11 ; @34 - %12 = add.l %mul, 0x10_l ; @67 + %12 = add.l %mul, 0x8_l ; @67 + st.l %base, %12 ; @32 + %13 = add.l %mul, 0x18_l ; @68 + st.l 0x1_l, %13 ; @34 + %14 = add.l %mul, 0x10_l ; @69 jmp @28(0x1_l, %exponent) @28(%ret.2, %exponent.1): - %13 = ne.l %exponent.1, 0x0_l ; @68 - br %13, @35, @36 + %15 = ne.l %exponent.1, 0x0_l ; @70 + br %15, @35, @36 @35: - %mul1.1 = ld.l %10 ; @41 - %mul3.1 = ld.l %11 ; @42 - %14 = mul.l %mul1.1, %mul3.1 ; @69 - st.l %14, %10 ; @44 - %15 = mul.l %14, %14 ; @70 - st.l %15, %12 ; @46 - %16 = mul.l %15, %14 ; @71 - st.l %16, %11 ; @48 - %17 = and.l %exponent.1, 0x3_l ; @72 - %18 = lsl.l %17, 0x3_l ; @73 - %19 = add.l %mul, %18 ; @74 - %mule.1 = ld.l %19 ; @52 - %20 = mul.l %ret.2, %mule.1 ; @75 - %21 = asr.l %exponent.1, 0x2_l ; @76 - jmp @28(%20, %21) + %mul1.1 = ld.l %12 ; @41 + %mul3.1 = ld.l %13 ; @42 + %16 = mul.l %mul1.1, %mul3.1 ; @71 + st.l %16, %12 ; @44 + %17 = mul.l %16, %16 ; @72 + st.l %17, %14 ; @46 + %18 = mul.l %17, %16 ; @73 + st.l %18, %13 ; @48 + %19 = and.l %exponent.1, 0x3_l ; @74 + %20 = lsl.l %19, 0x3_l ; @75 + %21 = add.l %mul, %20 ; @76 + %mule.1 = ld.l %21 ; @52 + %22 = mul.l %ret.2, %mule.1 ; @77 + %23 = asr.l %exponent.1, 0x2_l ; @78 + jmp @28(%22, %23) @36: ret %ret.2 } diff --git a/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc index d20e0a65..364833bc 100644 --- a/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc @@ -2,75 +2,73 @@ module int_pow export function $int_pow { ; returns: rax @2: - mov rcx, rdi ; @119 - mov r8d, 0x1_w ; @121 - mov r9d, 0x1_w ; @122 + mov rdx, rsi ; @123 + mov r8d, 0x1_w ; @124 + mov esi, 0x1_w ; @125 @3: - test rsi, rsi ; @114 - jne @9 ; @115 + test rdx, rdx ; @117 + jne @9 ; @118 @10: - mov rax, r8 ; @112 - ret ; @113 + mov rax, r8 ; @115 + ret ; @116 @9: - imul rcx, r9 ; @111 - mov rdx, rcx ; @57 - imul rdx, rcx ; @110 - mov r9, rdx ; @58 - imul r9, rcx ; @109 - mov rax, rsi ; @59 - and rax, 0x3_l ; @108 - mov rdi, r8 ; @60 - imul rdi, rcx ; @107 - test rax, rax ; @96 - je @77 ; @97 - dec rax ; @99 - cmp rax, 0x2_l ; @100 - ja @77 ; @101 - lea r10, qword ptr [rip + @95] ; @102 - movsxd rax, dword ptr [r10 + rax*4] ; @103 - add rax, r10 ; @104 - jmp rax ; @105 - .tbl @95 [@21, @15, @16] ; @106 -@77: - mov rdi, r8 ; @93 - jmp @21 ; @94 + imul rdi, rsi ; @114 + mov rcx, rdi ; @57 + imul rcx, rdi ; @113 + mov rsi, rcx ; @58 + imul rsi, rdi ; @112 + mov rax, rdx ; @59 + and rax, 0x3_l ; @111 + je @21 ; @101 + dec rax ; @103 + cmp rax, 0x2_l ; @104 + ja @21 ; @105 + lea r9, qword ptr [rip + @99] ; @106 + movsxd rax, dword ptr [r9 + rax*4] ; @107 + add rax, r9 ; @108 + jmp rax ; @109 + .tbl @99 [@14, @15, @16] ; @110 @16: - imul rdi, rdx ; @90 - jmp @21 ; @89 + imul r8, rdi ; @96 + imul r8, rcx ; @95 + jmp @21 ; @94 @15: - imul rdi, rcx ; @87 + imul r8, rdi ; @92 + imul r8, rdi ; @91 + jmp @21 ; @90 +@14: + imul r8, rdi ; @88 @21: - sar rsi, 0x2_b ; @84 - mov r8, rdi ; @79 - jmp @3 ; @83 + sar rdx, 0x2_b ; @85 + jmp @3 ; @84 } export function $int_pow_alt { ; returns: rax @27: - sub rsp, 0x28_l ; @148 - mov qword ptr [rsp], 0x1_l ; @147 + sub rsp, 0x28_l ; @151 + mov qword ptr [rsp], 0x1_l ; @150 mov qword ptr [rsp + 0x8], rdi ; @32 mov qword ptr [rsp + 0x18], 0x1_l ; @34 - mov eax, 0x1_w ; @143 + mov eax, 0x1_w ; @146 @28: - test rsi, rsi ; @138 - jne @35 ; @139 + test rsi, rsi ; @141 + jne @35 ; @142 @36: - add rsp, 0x28_l ; @149 - ret ; @137 + add rsp, 0x28_l ; @152 + ret ; @140 @35: mov rdx, qword ptr [rsp + 0x8] ; @41 - imul rdx, qword ptr [rsp + 0x18] ; @135 + imul rdx, qword ptr [rsp + 0x18] ; @138 mov qword ptr [rsp + 0x8], rdx ; @44 - mov rcx, rdx ; @70 - imul rcx, rdx ; @134 + mov rcx, rdx ; @72 + imul rcx, rdx ; @137 mov qword ptr [rsp + 0x10], rcx ; @46 - imul rcx, rdx ; @133 + imul rcx, rdx ; @136 mov qword ptr [rsp + 0x18], rcx ; @48 - mov rcx, rsi ; @72 - and rcx, 0x3_l ; @132 + mov rcx, rsi ; @74 + and rcx, 0x3_l ; @135 mov rcx, qword ptr [rsp + rcx*8] ; @52 - imul rax, rcx ; @130 - sar rsi, 0x2_b ; @129 - jmp @28 ; @128 + imul rax, rcx ; @133 + sar rsi, 0x2_b ; @132 + jmp @28 ; @131 } diff --git a/src/test/data/opt/sink1.driver.sysv.amd64.c b/src/test/data/opt/sink1.driver.sysv.amd64.c new file mode 100644 index 00000000..4bfa3fd1 --- /dev/null +++ b/src/test/data/opt/sink1.driver.sysv.amd64.c @@ -0,0 +1,12 @@ +#include + +extern int foo(int x); + +int main() { + assert(foo(0) == 1); + assert(foo(1) == 0); + assert(foo(-1) == 0); + assert(foo(2) == 0); + assert(foo(1234) == 0); + assert(foo(-999) == 0); +} diff --git a/src/test/data/opt/sink1.vir b/src/test/data/opt/sink1.vir new file mode 100644 index 00000000..4d261a19 --- /dev/null +++ b/src/test/data/opt/sink1.vir @@ -0,0 +1,15 @@ +module sink1 + +export function w $foo(w %x) { +@start: + %y = add.w %x, 1_w + %c = eq.w %x, 0_w + br %c, @zero, @notzero +@zero: + %c = eq.w %y, 1_w + br %c, @one, @notzero +@one: + ret 1_w +@notzero: + ret 0_w +} diff --git a/src/test/data/opt/sink1.vir.opt b/src/test/data/opt/sink1.vir.opt new file mode 100644 index 00000000..8d310f66 --- /dev/null +++ b/src/test/data/opt/sink1.vir.opt @@ -0,0 +1,14 @@ +module sink1 + +export function w $foo(w %x) { +@2: + %2 = eq.w %x, 0x0_w ; @11 + br %2, @3, @9(0x0_w) +@3: + %3 = add.w %x, 0x1_w ; @12 + %4 = eq.w %3, 0x1_w ; @13 + %5 = flag.w %4 ; @14 + jmp @9(%5) +@9(%0): + ret %0 +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index ec528730..dbb22122 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -308,6 +308,7 @@ let opt_suite = "Test optimizations" >::: [ "Spill test 2" >:: test "spill2"; "Parallel moves" >:: test "parallel"; "SROA" >:: test "sroa"; + "Sink 1" >:: test "sink1"; ] let abi_suite = "Test ABI lowering" >::: [ @@ -443,6 +444,7 @@ let native_suite = "Test native code" >::: [ "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_native "promote2-partial"; "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi"; "Returning, passing, and dereferencing a struct (SysV AMD64)" >:: test_sysv_amd64_native "unref"; + "Sink (SysV AMD64)" >:: test_sysv_amd64_native "sink1"; ] let () = run_test_tt_main @@ test_list [ From 58e5dc851457ef953738baa62ec99a6ad22169be Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Wed, 19 Nov 2025 23:56:39 -0500 Subject: [PATCH 33/62] Some tidying, more logging --- src/lib/egraph/extractor/extractor_cfg.ml | 55 ++++++++++++++-------- src/lib/egraph/extractor/extractor_core.ml | 16 +++---- src/lib/regalloc/regalloc_irc.ml | 39 +++++++++++++-- src/lib/regalloc/regalloc_irc_state.ml | 9 +++- 4 files changed, 87 insertions(+), 32 deletions(-) diff --git a/src/lib/egraph/extractor/extractor_cfg.ml b/src/lib/egraph/extractor/extractor_cfg.ml index bae241c1..bc89bc02 100644 --- a/src/lib/egraph/extractor/extractor_cfg.ml +++ b/src/lib/egraph/extractor/extractor_cfg.ml @@ -91,6 +91,9 @@ let fresh env canon real = let* x = Context.Var.fresh in let+ l = Context.Label.fresh in env.scp <- Id.Tree.set env.scp ~key:canon ~data:(x, l); + Logs.debug (fun m -> + m "%s: placing fresh %a at %a: env.cur=%a canon=%d, real=%d%!" + __FUNCTION__ Var.pp x Label.pp l Label.pp env.cur canon real); Hashtbl.update env.news env.cur ~f:(function | Some p -> add_placed p real l | None -> create_placed real l); @@ -105,14 +108,17 @@ let insn t env a f = let+ op = f x in upd env.insn l op; `var x +[@@specialise] let insn' env l f = let+ op = f () in upd env.insn l op +[@@specialise] let ctrl env l f = let+ c = f () in upd env.ctrl l c +[@@specialise] let sel e x ty c y n = match c with | `var c -> !!(`sel (x, ty, c, y, n)) @@ -120,6 +126,8 @@ let sel e x ty c y n = match c with | `bool false -> !!(`uop (x, `copy ty, n)) | _ -> invalid_pure e +let (let@) x f = x f [@@specialise] [@@inline] + let rec pure t env e : operand Context.t = let pure = pure t env in let insn = insn t env in @@ -128,7 +136,7 @@ let rec pure t env e : operand Context.t = | E (a, Obinop b, [l; r]) -> let* l = pure l in let* r = pure r in - insn a @@ fun x -> + let@ x = insn a in !!(`bop (x, b, l, r)) | E (_, Obool b, []) -> !!(`bool b) | E (_, Ocall (x, _), _) -> !!(`var x) @@ -139,13 +147,13 @@ let rec pure t env e : operand Context.t = let* c = pure c in let* y = pure y in let* n = pure n in - insn a @@ fun x -> + let@ x = insn a in sel e x ty c y n | E (_, Osingle s, []) -> !!(`float s) | E (_, Osym (s, n), []) -> !!(`sym (s, n)) | E (a, Ounop u, [y]) -> let* y = pure y in - insn a @@ fun x -> + let@ x = insn a in !!(`uop (x, u, y)) | E (_, Ovaarg (x, _), [_]) -> !!(`var x) | E (_, Ovar x, []) -> !!(`var x) @@ -255,51 +263,51 @@ let exp t env l e = let* c = pure c in let* y = dst y in let* n = dst n in - ctrl @@ fun () -> + let@ () = ctrl in br l e c y n | E (_, Ocall0 _, [f; args; vargs]) -> let* f = global t env f in let* args = callargs t env args in let* vargs = callargs t env vargs in - insn @@ fun () -> + let@ () = insn in !!(`call (None, f, args, vargs)) | E (_, Ocall (x, ty), [f; args; vargs]) -> let* f = global t env f in let* args = callargs t env args in let* vargs = callargs t env vargs in - insn @@ fun () -> + let@ () = insn in !!(`call (Some (x, ty), f, args, vargs)) | E (_, Oload (x, t), [y]) -> let* y = pure y in - insn @@ fun () -> + let@ () = insn in !!(`load (x, t, y)) | E (_, Ojmp, [d]) -> let* d = dst d in - ctrl @@ fun () -> + let@ () = ctrl in !!(`jmp d) | E (_, Oret, [x]) -> let* x = pure x in - ctrl @@ fun () -> + let@ () = ctrl in !!(`ret (Some x)) | E (_, Oset _, [y]) -> pure y >>| ignore | E (_, Ostore (t, _), [v; x]) -> let* v = pure v in let* x = pure x in - insn @@ fun () -> + let@ () = insn in !!(`store (t, v, x)) | E (_, Osw ty, i :: d :: tbl) -> let* i = pure i in let* d = sw_default t env l d in let* tbl = table t env tbl ty in - ctrl @@ fun () -> + let@ () = ctrl in sw l e ty i d tbl | E (_, Ovaarg (x, t), [a]) -> let* a = pure a in - insn @@ fun () -> + let@ () = insn in vaarg l e x t a | E (_, Ovastart _, [a]) -> let* a = pure a in - insn @@ fun () -> + let@ () = insn in vastart l e a (* The rest are rejected. *) | E (_, Oaddr _, _) @@ -353,7 +361,7 @@ module Hoisting = struct (* See if there exists a cycle starting from `l` that does not intersect with `bs`. *) - let exists_bypass t l bs = + let exists_bypass t l id cid bs = let rec loop q = match Stack.pop q with | None -> false | Some (n, vis) when Lset.mem vis n -> @@ -363,8 +371,15 @@ module Hoisting = struct Cfg.Node.succs n t.eg.input.cfg |> Seq.iter ~f:(fun s -> Stack.push q (s, vis)); loop q in - Loops.mem t.eg.input.loop l && - loop (Stack.singleton (l, Lset.empty)) + let res = + Loops.mem t.eg.input.loop l && + loop (Stack.singleton (l, Lset.empty)) in + Logs.debug (fun m -> + m "%s: %a is post-dominated: bs=%s, id=%d, cid=%d, res=%b%!" + __FUNCTION__ Label.pp l + (Lset.to_list bs |> List.to_string ~f:Label.to_string) + id cid res); + res (* Given all of the uses we know of, compute the points where the definition will be killed: @@ -416,6 +431,9 @@ module Hoisting = struct let is_partial_redundancy t l id cid = (* If this node is deliberately placed here then allow it. *) not (Common.is_pinned t.eg id) && begin + Logs.debug (fun m -> + m "%s: checking %a, id=%d, cid=%d%!" + __FUNCTION__ Label.pp l id cid); (* Get the blocks associated with the labels that were "moved" for this node. *) let bs = moved_blks t id cid in @@ -435,7 +453,7 @@ module Hoisting = struct if post_dominated t l bs then (* Check if we can reach the target block via a backedge without visiting any of the blocks we moved from. *) - exists_bypass t l bs + exists_bypass t l id cid bs else (* Check if `bs` intersects on all paths where `id` can be used. *) @@ -530,8 +548,7 @@ let find_insn env l = Hashtbl.find env.insn l |> Option.map ~f:(Insn.create ~label:l) (* Find any new instructions to be prepended directly before label `l`. - Note that the ordering is, by default, from oldest to newest ID, but - this can be reversed for an efficient `rev_append` as seen below. *) + The order can be reversed for an efficient `rev_append` as seen below. *) let find_news ?(rev = false) env l = Hashtbl.find env.news l |> Option.value_map ~default:[] ~f:(fun p -> diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml index 00e8bf55..33bbd9f7 100644 --- a/src/lib/egraph/extractor/extractor_core.ml +++ b/src/lib/egraph/extractor/extractor_core.ml @@ -3,14 +3,14 @@ open Egraph_common open Monads.Std open Virtual -(* We store the canonical and real IDs to help us determine - the ordering when reifying back to the CFG representation. - - Canonical IDs help us extract the best term, but the real - ID of the term determines the ordering; in particular, we - order from oldest to newest. This makes sense since the way - we build terms in the e-graph will be such that terms with - a newer ID will always depend on an ID that is older. +(* Keep track of the provenance of extracted nodes. + + We were previously using the real IDs for determining + the order by which we place new instructions, but that + is no longer guaranteed. In principle, lower IDs never + depend on higher IDs to compute their results, but when + we reschedule instructions in the CFG this is no longer + useful. *) type prov = | Label of Label.t diff --git a/src/lib/regalloc/regalloc_irc.ml b/src/lib/regalloc/regalloc_irc.ml index 3de447de..eabc40ed 100644 --- a/src/lib/regalloc/regalloc_irc.ml +++ b/src/lib/regalloc/regalloc_irc.ml @@ -165,7 +165,10 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct simplifyWorklist := simplifyWorklist \ {n} *) let n = take_one t.wsimplify in (* push(n, selectStack) *) - if can_be_colored t n then Stack.push t.select n; + if can_be_colored t n then begin + Logs.debug (fun m -> m "%s: selecting %a%!" __FUNCTION__ Rv.pp n); + Stack.push t.select n; + end; (* forall m \in Adjacent(n) *) adjacent t n |> Set.iter ~f:(decrement_degree t) @@ -227,7 +230,11 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct of `u`, leaving the degree of `t` unchanged. *) let combine_edge t u v = - if has_edge t u v then + let e = has_edge t u v in + Logs.debug (fun m -> + m "%s: combining edge u=%a, v=%a, has_edge=%b%!" + __FUNCTION__ Rv.pp u Rv.pp v e); + if e then decrement_degree t v else begin add_adjlist t u v; @@ -238,6 +245,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* Combine `v` with `u` in the interference graph, where `u` is the destination. *) let combine t u v = + Logs.debug (fun m -> + m "%s: combining u=%a with v=%a%!" + __FUNCTION__ Rv.pp u Rv.pp v); (* if v \in freezeWorklist *) if Hash_set.mem t.wfreeze v then (* freezeWorklist := freezeWorklist \ {v} *) @@ -308,12 +318,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct Lset.to_sequence t.wmoves |> Seq.map ~f:(fun m -> m, move_priority t m) |> Seq.max_elt ~compare:(fun (_, a) (_, b) -> Float.compare a b) |> - Option.value_exn |> fst + Option.value_exn (* pre: wmoves is not empty *) let coalesce t = (* let m_(=copy(x,y)) \in worklistMoves *) - let m = pick_move t in + let m, score = pick_move t in let c = Hashtbl.find_exn t.copies m in (* x := GetAlias(x) *) let x = alias t c.dst in @@ -324,6 +334,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct else let (u,v) = (x,y) *) let u, v = if exclude_from_coloring t y then y, x else x, y in + Logs.debug (fun m_ -> + m_ "%s: looking at move %a, score=%g, u=%a, v=%a%!" + __FUNCTION__ Label.pp m score Rv.pp u Rv.pp v); (* worklistMoves := worklistMoves \ {m} *) t.wmoves <- Lset.remove t.wmoves m; (* if u = v then *) @@ -337,6 +350,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct exclude_from_coloring t v || (* (u,v) \in adjSet *) has_edge t u v then begin + Logs.debug (fun m_ -> + m_ "%s: constraining %a%!" __FUNCTION__ Label.pp m); (* constrainedMoves := constrainedMoves U {m} *) t.kmoves <- Lset.add t.kmoves m; (* addWorkList(u) *) @@ -381,6 +396,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* worklistMoves := worklistMoves \ {m} *) t.wmoves <- Lset.remove t.wmoves m; (* frozenMoves := frozenMoves U {m} *) + Logs.debug (fun m_ -> + m_ "%s: freezing move %a, v=%a" + __FUNCTION__ Label.pp m Rv.pp v); t.fmoves <- Lset.add t.fmoves m; (* if NodeMoves(v) = {} ^ degree[v] < K *) if Lset.is_empty (node_moves t v) @@ -396,6 +414,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* let u \in freezeWorklist freezeWorklist := freezeWorklist \ {u} *) let u = take_one t.wfreeze in + Logs.debug (fun m -> m "%s: frozen node u=%a%!" __FUNCTION__ Rv.pp u); (* simplifyWorklist := simplifyWorklist U {u} *) Hash_set.add t.wsimplify u; (* FreezeMoves(u) *) @@ -403,6 +422,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let select_spill t = Pairing_heap.pop t.wspill |> Option.iter ~f:(fun m -> + Logs.debug (fun m_ -> + m_ "%s: selecting spill node %a%!" + __FUNCTION__ Rv.pp m); (* spillWorklist := spillWorklist \ {m} *) Hashtbl.remove t.wspill_elts m; (* simplifyWorklist := simplifyworklist U {m} *) @@ -470,9 +492,15 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let r = Rv.var GPR v in match find_reusable_slot t m n size with | Some r' -> + Logs.debug (fun m -> + m "%s: re-using slot %a for spilled node %a%!" + __FUNCTION__ Rv.pp r' Rv.pp r); Hashtbl.set t.slots ~key:r ~data:r'; acc, m | None -> + Logs.debug (fun m -> + m "%s: spilling %a to new slot%!" + __FUNCTION__ Rv.pp r); let s = Virtual.Slot.create_exn v ~size ~align:size in Hashtbl.set t.slots ~key:r ~data:r; s :: acc, Map.add_multi m ~key:size ~data:r) in @@ -617,6 +645,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let* () = C.when_ (round > max_rounds) @@ fun () -> C.failf "In Regalloc.main: maximum rounds reached (%d) with no \ convergence on spilling" max_rounds () in + Logs.debug (fun m -> + m "%s: $%s: round %d of %d" + __FUNCTION__ (Func.name t.fn) round max_rounds); (* Build the interference graph. *) let live = Live.compute ~keep:t.keep t.fn in let* () = build t live in diff --git a/src/lib/regalloc/regalloc_irc_state.ml b/src/lib/regalloc/regalloc_irc_state.ml index 2c9c00cb..f9adcce7 100644 --- a/src/lib/regalloc/regalloc_irc_state.ml +++ b/src/lib/regalloc/regalloc_irc_state.ml @@ -152,6 +152,9 @@ module Make(M : Machine_intf.S) = struct let add_spill t n = if can_be_colored t n && not (Hashtbl.mem t.wspill_elts n) then let elt = Pairing_heap.add_removable t.wspill n in + Logs.debug (fun m -> + m "%s: adding %a to spill worklist%!" + __FUNCTION__ Rv.pp n); Hashtbl.set t.wspill_elts ~key:n ~data:elt let remove_spill t n = Hashtbl.change t.wspill_elts n ~f:(function @@ -228,7 +231,11 @@ module Make(M : Machine_intf.S) = struct | Second _ -> Hashtbl.find t.colors n | First r -> Regs.reg_color r - let set_color t n c = Hashtbl.set t.colors ~key:n ~data:c + let set_color t n c = + Logs.debug (fun m -> + m "%s: assigning color %a=%d%!" + __FUNCTION__ Rv.pp n c); + Hashtbl.set t.colors ~key:n ~data:c let add_move t label n = Hashtbl.update t.moves n ~f:(function From 59bcb8b5f0a994c395b85bd42f8345733a96a360 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 21 Nov 2025 01:48:20 -0500 Subject: [PATCH 34/62] More bug fixes 1. SROA cannot consider any slots that escape at any point in time, so the analysis has to return the global set of escapees. 2. The use of `Partition.refine` in the coalescing was wrong, because the equivalence relation is not given a transitive treatment. So, we have to roll our own partitioning algorithm. --- .../coalesce_slots/coalesce_slots_impl.ml | 74 ++++++++++++++----- src/lib/passes/sroa/sroa_impl.ml | 22 ++++-- src/lib/scalars.ml | 57 ++++++++------ src/test/data/opt/coalesce1.vir | 36 +++++++++ src/test/data/opt/coalesce1.vir.opt | 34 +++++++++ src/test/data/opt/coalesce1a.vir | 36 +++++++++ src/test/data/opt/coalesce1a.vir.opt | 9 +++ src/test/data/opt/esc1.vir | 27 +++++++ src/test/data/opt/esc1.vir.opt | 20 +++++ src/test/data/opt/storetoload1.vir.opt | 9 ++- src/test/data/opt/sumphi.vir.opt.sysv | 4 +- src/test/data/opt/vaarg2.vir.opt.sysv | 12 +-- src/test/test_opt.ml | 18 ++++- 13 files changed, 293 insertions(+), 65 deletions(-) create mode 100644 src/test/data/opt/coalesce1.vir create mode 100644 src/test/data/opt/coalesce1.vir.opt create mode 100644 src/test/data/opt/coalesce1a.vir create mode 100644 src/test/data/opt/coalesce1a.vir.opt create mode 100644 src/test/data/opt/esc1.vir create mode 100644 src/test/data/opt/esc1.vir.opt diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index d48c279c..d38dd791 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -32,6 +32,13 @@ module Range = struct let is_bad = equal bad let singleton n = {lo = n; hi = n; tg = Def} + let size r = r.hi - r.lo + + let distance x y = + if x.hi < y.lo then y.lo - x.hi + else if x.lo > y.hi then x.lo - y.hi + else 0 + [@@ocaml.warning "-32"] (* Extend the upper-bound on the live range. *) let use r n = { @@ -83,19 +90,43 @@ let equiv_range slots rs x y = | Before | After -> true | _ -> false +let partition slots rs vs = + let pick rem = + Set.to_sequence rem |> + Seq.min_elt ~compare:(fun x y -> + (* Prefer shorter live ranges. *) + let rx = Map.find_exn rs x in + let ry = Map.find_exn rs y in + let c = Int.compare (Range.size rx) (Range.size ry) in + if c = 0 then + (* Break ties by comparing on the var. This + is to give a bit more determinism to the + algorithm. *) + Var.compare x y + else c) in + (* Ensure that `x` does not interfere with any of the + slots in the group. *) + let ok g x = Set.for_all g ~f:(equiv_range slots rs x) in + let[@tail_mod_cons] rec go rem = match pick rem with + | None -> [] + | Some seed -> + let g = Set.fold rem ~init:(Var.Set.singleton seed) + ~f:(fun g x -> if ok g x then Set.add g x else g) in + g :: go (Set.diff rem g) in + go vs + let non_interfering slots rs = Map.to_sequence rs |> - (* The results will still be correct if we omit this, but - it is more efficient to just not consider them at all. *) + (* Do not consider escapees. This would mess up + our heuristics for building the groups. *) Seq.filter ~f:(not @. Range.is_bad @. snd) |> Seq.map ~f:fst |> Var.Set.of_sequence |> - Partition.trivial |> - Partition.refine ~cmp:Var.compare ~equiv:(equiv_range slots rs) + partition slots rs (* invariant: a group is never empty *) let canon_elt slots g = - Group.enum g |> Seq.max_elt ~compare:(fun x y -> + Set.to_sequence g |> Seq.max_elt ~compare:(fun x y -> (* Assuming that the sizes and alignments are compatible, just pick the biggest one. *) let sx, ax = slot_sa slots x in @@ -106,11 +137,12 @@ let canon_elt slots g = Option.value_exn let make_subst slots p = - Partition.groups p |> - Seq.fold ~init:Var.Map.empty ~f:(fun init g -> + List.fold p ~init:Var.Map.empty ~f:(fun init g -> let canon = canon_elt slots g in - Group.enum g |> Seq.filter ~f:(not @. Var.equal canon) |> - Seq.fold ~init ~f:(fun acc x -> Map.set acc ~key:x ~data:(`var canon))) + Set.to_sequence g |> + Seq.filter ~f:(not @. Var.equal canon) |> + Seq.fold ~init ~f:(fun acc x -> + Map.set acc ~key:x ~data:(`var canon))) type t = { subst : Subst_mapper.t; (* Map from coalesced to canonical slots *) @@ -159,16 +191,19 @@ module Make(M : Scalars.L) = struct Ctrl.free_vars c |> Set.fold ~init:acc ~f:(fun acc x -> update acc s x ip false) - let liveness cfg blks slots (s : solution) = + let liveness cfg blks slots t = let ip = ref 0 in let nums = Vec.create () in + let init = + Hash_set.fold t.esc ~init:Var.Map.empty + ~f:(fun acc x -> Map.set acc ~key:x ~data:Range.bad) in let acc = Graphlib.reverse_postorder_traverse (module Cfg) ~start:Label.pseudoentry cfg |> Seq.filter_map ~f:(Ltree.find blks) |> - Seq.fold ~init:Var.Map.empty ~f:(fun acc b -> + Seq.fold ~init ~f:(fun acc b -> let l = Blk.label b in - let s = ref @@ Solution.get s l in + let s = ref @@ get t l in let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i -> let op = Insn.op i in let acc = liveness_insn acc !s !ip i in @@ -182,10 +217,10 @@ module Make(M : Scalars.L) = struct acc) in acc, nums - let collect_deads blks slots rs s = + let collect_deads blks slots rs t = Ltree.fold blks ~init:Lset.empty ~f:(fun ~key ~data:b init -> - let s = ref @@ Solution.get s key in + let s = ref @@ get t key in Blk.insns b |> Seq.fold ~init ~f:(fun acc i -> let op = Insn.op i in let acc = match Insn.load_or_store_to op with @@ -219,8 +254,9 @@ module Make(M : Scalars.L) = struct Label.pp (Vec.get_exn nums r.hi) in m "%s: %a: %a%!" __FUNCTION__ Var.pp x ppr x)); Logs.debug (fun m -> - Partition.groups p |> Seq.iter ~f:(fun g -> - m "%s: group: %a%!" __FUNCTION__ (Group.pp Var.pp) g)); + List.iter p ~f:(fun g -> + m "%s: group: %s%!" __FUNCTION__ + (Set.to_list g |> List.to_string ~f:Var.to_string))); Logs.debug (fun m -> if not @@ Lset.is_empty deads then m "%s: dead stores: %a%!" @@ -239,10 +275,10 @@ module Make(M : Scalars.L) = struct if Map.is_empty slots then empty else let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in - let s = Analysis.analyze ~cfg ~blks slots fn in - let rs, nums = liveness cfg blks slots s in + let t = Analysis.analyze ~cfg ~blks slots fn in + let rs, nums = liveness cfg blks slots t in let p = non_interfering slots rs in - let deads = collect_deads blks slots rs s in + let deads = collect_deads blks slots rs t in let subst = make_subst slots p in debug_show slots rs nums deads p subst; {subst; deads} diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 82c9670b..3f60e597 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -10,7 +10,6 @@ open Core open Regular.Std -open Graphlib.Std open Scalars module Slot = Virtual.Slot @@ -49,15 +48,22 @@ end = struct Type.pp_basic a.ty pre off - let collect_accesses slots fn (s : solution) : accesses = + let collect_accesses slots fn t : accesses = (* Group all memory accesses by their corresponding slot. *) Func.blks fn |> Seq.fold ~init:Var.Map.empty ~f:(fun init b -> - let s = ref @@ Solution.get s @@ Blk.label b in + let s = ref @@ get t @@ Blk.label b in Blk.insns b |> Seq.fold ~init ~f:(fun acc i -> let op = Insn.op i in let acc = match Insn.load_or_store_to op with | None -> acc | Some (ptr, ty, ldst) -> match Map.find !s ptr with + | Some Offset (base, _) when escaped t base -> + (* Any slot that escaped at any time should not + be considered for partitioning. *) + Logs.debug (fun m -> + m "%s: ignoring escaped pointer %a%!" + __FUNCTION__ Var.pp base); + acc | Some Offset (base, off) -> Map.add_multi acc ~key:base ~data:{insn = i; off; ty; ldst} | _ -> acc in @@ -226,10 +232,10 @@ end = struct Logs.debug (fun m -> m "%s: no parts found%!" __FUNCTION__); !![i] - let rewrite_with_partitions slots fn (s : solution) parts m = + let rewrite_with_partitions slots fn t parts m = let open Context.Syntax in let* blks = Func.blks fn |> Context.Seq.map ~f:(fun b -> - let s = ref @@ Solution.get s @@ Blk.label b in + let s = ref @@ get t @@ Blk.label b in let+ insns = Blk.insns b |> Context.Seq.map ~f:(fun i -> let+ is = rewrite_insn parts m !s i in s := Analysis.transfer_op slots !s @@ Insn.op i; is) @@ -271,13 +277,13 @@ end = struct let open Context.Syntax in let slots = Analysis.collect_slots fn in if Map.is_empty slots then !!fn else - let s = analyze slots fn in - let accs = collect_accesses slots fn s in + let t = analyze slots fn in + let accs = collect_accesses slots fn t in let parts = partition_acesses accs in if Map.is_empty parts then !!fn else let () = debug_show_parts parts in let* m = materialize_partitions slots parts in if Map.is_empty m then !!fn else let fn = insert_new_slots fn m in - rewrite_with_partitions slots fn s parts m + rewrite_with_partitions slots fn t parts m end diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index 1a0a3f99..b21a3b63 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -159,20 +159,34 @@ module type L = sig end end +type t = { + soln : solution; (* Dataflow solution *) + esc : Var.Hash_set.t; (* All slots that escaped, globally *) +} + +let get t l = Solution.get t.soln l +let escaped t x = Hash_set.mem t.esc x + module Make(M : L) = struct open M (* Set all known scalars to `Top` according to `f`, which is the set of variables that escape. *) - let escaping f x s = + let escaping ?esc f x s = Set.fold (f x) ~init:s ~f:(fun s v -> match Map.find s v with | Some Offset (ptr, _) -> + Option.iter esc ~f:(fun t -> + Hash_set.strict_add t ptr |> + Or_error.iter ~f:(fun () -> + Logs.debug (fun m -> + m "%s: %a escapes via %a%!" + __FUNCTION__ Var.pp ptr Var.pp v))); Map.set s ~key:ptr ~data:Top | Some _ | None -> s) (* Transfer function for a single instruction. *) - let transfer_op slots s op = + let transfer_op ?esc slots s op = let value = match Insn.offset op with | Some (ptr, offset) -> State.derive slots s ptr offset | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in @@ -181,7 +195,7 @@ module Make(M : L) = struct | Some v -> Insn.lhs op |> Set.fold ~init:s ~f:(fun s key -> Map.set s ~key ~data:v) in - escaping Insn.escapes op s + escaping ?esc Insn.escapes op s let merge_blkarg acc src dst = match src with | `var src when Var.(src = dst) -> acc @@ -211,32 +225,26 @@ module Make(M : L) = struct [@@inline] (* Transfer for control-flow instruction. *) - let transfer_ctrl ?(blkparam = true) blks s c = - let init = escaping (ctrl_esc blkparam) c s in + let transfer_ctrl ?(blkparam = true) ?esc blks s c = + let init = escaping ?esc (ctrl_esc blkparam) c s in (* Propagate the block parameters we are passing. *) Ctrl.locals c |> List.fold ~init ~f:(blkargs blks) [@@specialise] (* Transfer function for a block. *) - let transfer ?(blkparam = true) slots blks l s = + let transfer ?(blkparam = true) ?esc slots blks l s = Label.Tree.find blks l |> Option.value_map ~default:s ~f:(fun b -> Blk.insns b |> Seq.map ~f:Insn.op |> - Seq.fold ~init:s ~f:(transfer_op slots) |> - transfer_ctrl ~blkparam blks @< Blk.ctrl b) + Seq.fold ~init:s ~f:(transfer_op ?esc slots) |> + transfer_ctrl ~blkparam ?esc blks @< Blk.ctrl b) [@@specialise] (* Initial constraints. *) - let initialize ?(blkparam = true) slots blks = + let initialize slots blks = (* Set all slots to point to their own base address. *) let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in - (* Any slot that directly escapes should immediately be set to `Top`. *) - let k = ctrl_esc blkparam in - Label.Tree.fold blks ~init ~f:(fun ~key:_ ~data init -> - Blk.insns data |> Seq.fold ~init ~f:(fun s i -> - escaping Insn.escapes (Insn.op i) s) |> - escaping k (Blk.ctrl data)) |> - Label.Map.singleton Label.pseudoentry |> + Label.Map.singleton Label.pseudoentry init |> Solution.create @< State.empty [@@specialise] @@ -245,18 +253,21 @@ module Make(M : L) = struct ~f:(fun acc s -> Map.set acc ~key:(Slot.var s) ~data:s) (* Run the dataflow analysis. *) - let analyze ?(blkparam = true) ?cfg ?blks slots fn : solution = + let analyze ?(blkparam = true) ?cfg ?blks slots fn = + let esc = Var.Hash_set.create () in let cfg = match cfg with | None -> Cfg.create fn | Some cfg -> cfg in let blks = match blks with | None -> Func.map_of_blks fn | Some blks -> blks in - Graphlib.fixpoint (module Cfg) cfg - ~init:(initialize ~blkparam slots blks) - ~start:Label.pseudoentry - ~equal:State.equal - ~merge:State.merge - ~f:(transfer ~blkparam slots blks) + let s = + Graphlib.fixpoint (module Cfg) cfg + ~init:(initialize slots blks) + ~start:Label.pseudoentry + ~equal:State.equal + ~merge:State.merge + ~f:(transfer ~blkparam ~esc slots blks) in + {soln = s; esc} [@@specialise] end diff --git a/src/test/data/opt/coalesce1.vir b/src/test/data/opt/coalesce1.vir new file mode 100644 index 00000000..35efae18 --- /dev/null +++ b/src/test/data/opt/coalesce1.vir @@ -0,0 +1,36 @@ +module coalesce1 + +;; %a can coalesce with either %b or %c, but not both +export function w $f(w %x, w %y, w %z) { + %a = slot 8, align 8 + %b = slot 8, align 8 + %c = slot 8, align 8 +@start: + st.w %x, %a + jmp @b1 +@b1: + %u1 = ld.w %a + jmp @b2 +@b2: + jmp @b3 +@b3: + st.w %y, %b + jmp @b4 +@b4: + %u2 = ld.w %b + jmp @b5 +@b5: + st.w %z, %c + jmp @b6 +@b6: + %u3 = ld.w %c + jmp @b7 +@b7: + %u4 = ld.w %b + jmp @b8 +@b8: + %r = add.w %u1, %u2 + %r = add.w %r, %u3 + %r = add.w %r, %u4 + ret %r +} diff --git a/src/test/data/opt/coalesce1.vir.opt b/src/test/data/opt/coalesce1.vir.opt new file mode 100644 index 00000000..98954f26 --- /dev/null +++ b/src/test/data/opt/coalesce1.vir.opt @@ -0,0 +1,34 @@ +module coalesce1 + +export function w $f(w %x, w %y, w %z) { + %a = slot 8, align 8 + %c = slot 8, align 8 +@2: + st.w %x, %a ; @4 + jmp @3 +@3: + %u1.1 = ld.w %a ; @6 + jmp @5 +@5: + jmp @7 +@7: + st.w %y, %a ; @9 + jmp @8 +@8: + %u2.1 = ld.w %a ; @11 + jmp @10 +@10: + st.w %z, %c ; @13 + jmp @12 +@12: + %u3.1 = ld.w %c ; @15 + jmp @14 +@14: + %u4.1 = ld.w %a ; @17 + jmp @16 +@16: + %r.1 = add.w %u1.1, %u2.1 ; @18 + %r.2 = add.w %r.1, %u3.1 ; @19 + %r.3 = add.w %r.2, %u4.1 ; @20 + ret %r.3 +} diff --git a/src/test/data/opt/coalesce1a.vir b/src/test/data/opt/coalesce1a.vir new file mode 100644 index 00000000..f7b6ae9c --- /dev/null +++ b/src/test/data/opt/coalesce1a.vir @@ -0,0 +1,36 @@ +module coalesce1a + +;; same as coalesce1, but let all of the opts run +export function w $f(w %x, w %y, w %z) { + %a = slot 8, align 8 + %b = slot 8, align 8 + %c = slot 8, align 8 +@start: + st.w %x, %a + jmp @b1 +@b1: + %u1 = ld.w %a + jmp @b2 +@b2: + jmp @b3 +@b3: + st.w %y, %b + jmp @b4 +@b4: + %u2 = ld.w %b + jmp @b5 +@b5: + st.w %z, %c + jmp @b6 +@b6: + %u3 = ld.w %c + jmp @b7 +@b7: + %u4 = ld.w %b + jmp @b8 +@b8: + %r = add.w %u1, %u2 + %r = add.w %r, %u3 + %r = add.w %r, %u4 + ret %r +} diff --git a/src/test/data/opt/coalesce1a.vir.opt b/src/test/data/opt/coalesce1a.vir.opt new file mode 100644 index 00000000..9234fd83 --- /dev/null +++ b/src/test/data/opt/coalesce1a.vir.opt @@ -0,0 +1,9 @@ +module coalesce1a + +export function w $f(w %x, w %y, w %z) { +@2: + %2 = add.w %x, %y ; @21 + %4 = add.w %z, %y ; @23 + %5 = add.w %2, %4 ; @24 + ret %5 +} diff --git a/src/test/data/opt/esc1.vir b/src/test/data/opt/esc1.vir new file mode 100644 index 00000000..225c0a35 --- /dev/null +++ b/src/test/data/opt/esc1.vir @@ -0,0 +1,27 @@ +module esc1 + +export function w $foo(w %x) { + %a = slot 8, align 8 + %b = slot 8, align 8 +@start: + %x1 = add.w %x, 1_w + %x2 = add.w %x, 2_w + %x3 = add.w %x, 3_w + st.w %x, %a + %a4 = add.l %a, 4_l + st.w %x1, %a4 + st.w %x2, %b + %b4 = add.l %b, 4_l + st.w %x3, %b4 + %c = slt.w %x, 0_w + br %c, @yes, @no +@yes: + %p = copy.l %b4 + jmp @done +@no: + %p = copy.l %a4 + jmp @done +@done: + %r = ld.w %p + ret %r +} diff --git a/src/test/data/opt/esc1.vir.opt b/src/test/data/opt/esc1.vir.opt new file mode 100644 index 00000000..6d33a0e6 --- /dev/null +++ b/src/test/data/opt/esc1.vir.opt @@ -0,0 +1,20 @@ +module esc1 + +export function w $foo(w %x) { + %a = slot 8, align 8 + %b = slot 8, align 8 +@2: + %0 = add.w %x, 0x1_w ; @19 + %1 = add.w %x, 0x2_w ; @20 + %2 = add.w %x, 0x3_w ; @21 + st.w %x, %a ; @8 + %3 = add.l %a, 0x4_l ; @22 + st.w %0, %3 ; @10 + st.w %1, %b ; @11 + %4 = add.l %b, 0x4_l ; @23 + st.w %2, %4 ; @13 + %5 = slt.w %x, 0x0_w ; @24 + %6 = sel.l %5, %4, %3 ; @25 + %r.1 = ld.w %6 ; @18 + ret %r.1 +} diff --git a/src/test/data/opt/storetoload1.vir.opt b/src/test/data/opt/storetoload1.vir.opt index 5ab4aaf0..a86e4b11 100644 --- a/src/test/data/opt/storetoload1.vir.opt +++ b/src/test/data/opt/storetoload1.vir.opt @@ -3,9 +3,10 @@ module foo export function $foo() { %x = slot 16, align 16 @2: - %1 = add.l %x, 0x8_l ; @9 - %2 = and.l %1, 0xf_l ; @10 - %3 = itrunc.w %2 ; @11 - st.w %3, $a ; @8 + %0 = add.l %x, 0x8_l ; @9 + %1 = and.l %0, 0xf_l ; @10 + %2 = itrunc.w %1 ; @11 + st.w %2, %0 ; @6 + st.w %2, $a ; @8 ret } diff --git a/src/test/data/opt/sumphi.vir.opt.sysv b/src/test/data/opt/sumphi.vir.opt.sysv index 914fd25f..36757126 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv +++ b/src/test/data/opt/sumphi.vir.opt.sysv @@ -19,7 +19,7 @@ export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) { jmp @14(%a) @10: jmp @14(%b) -@14(%6.3): - %s.1/w/rax = call $sum(%6.3/rdi) ; @18 +@14(%5.3): + %s.1/w/rax = call $sum(%5.3/rdi) ; @18 ret rax/%s.1 } diff --git a/src/test/data/opt/vaarg2.vir.opt.sysv b/src/test/data/opt/vaarg2.vir.opt.sysv index f1ff6b17..5a781bc6 100644 --- a/src/test/data/opt/vaarg2.vir.opt.sysv +++ b/src/test/data/opt/vaarg2.vir.opt.sysv @@ -2,7 +2,7 @@ module vaarg2 export function $foo(b %9/rax, l %i/rdi, ...) { %3 = slot 176, align 16 - %74 = slot 8, align 8 + %76 = slot 8, align 8 @18: %4.1 = add.l %3, 0x8_l ; @19 regstore rsi, %4.1 ; @20 @@ -35,30 +35,30 @@ export function $foo(b %9/rax, l %i/rdi, ...) { regstore xmm7, %18.1 ; @45 jmp @2 @2: - st.w 0x8_w, %74 ; @52 - %79.1 = add.l %74, 0x4_l ; @139 + st.w 0x8_w, %76 ; @52 + %79.1 = add.l %76, 0x4_l ; @139 st.w 0x30_w, %79.1 ; @54 %25.1 = stkargs ; @55 jmp @61 @61: - %28.1 = ld.w %74 ; @65 + %28.1 = ld.w %76 ; @65 %29.1 = le.w %28.1, 0x20_w ; @66 br %29.1, @62, @63 @62: %32.1 = zext.l %28.1 ; @69 %33.1 = add.l %3, %32.1 ; @70 %34.1 = add.w %28.1, 0x10_w ; @71 - st.w %34.1, %74 ; @72 + st.w %34.1, %76 ; @72 jmp @64(%33.1) @63: jmp @64(%25.1) @64(%38.1): %39.1 = ld.l %38.1 ; @77 - st.l %39.1, %74 ; @78 %40.1 = add.l %38.1, 0x8_l ; @79 %41.1 = ld.l %40.1 ; @80 jmp @60 @60: + st.l %39.1, %76 ; @47 %1.1 = add.l %39.1, %41.1 ; @15 %2.1 = add.l %1.1, %i ; @16 ret rax/%2.1 diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index dbb22122..c980228e 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -41,20 +41,29 @@ let from_file_abi filename = let* () = Context.iter_seq_err (Virtual.Abi.Module.funs m) ~f:Passes.Ssa.check_abi in Passes.optimize_abi m -let test name _ = +let test ?(f = from_file) name _ = let filename = Format.sprintf "data/opt/%s.vir" name in let filename' = filename ^ ".opt" in let expected = In_channel.read_all filename' in Context.init Machine.X86.Amd64_sysv.target |> Context.eval begin let open Context.Syntax in - let* _, m = from_file filename in + let* _, m = f filename in let* () = Virtual.Module.funs m |> Context.iter_seq_err ~f:Passes.Ssa.check in !!(Format.asprintf "%a" Virtual.Module.pp m) end |> function | Ok p' -> compare_outputs filename' expected p' | Error err -> assert_failure @@ Format.asprintf "%a" Error.pp err +let coalesce_only filename = + let open Context.Syntax in + let* m = Parse.Virtual.from_file filename in + let* tenv, m = Passes.initialize m in + let*? m = Virtual.Module.map_funs_err m ~f:Passes.Coalesce_slots.run in + let*? m = Virtual.Module.map_funs_err m ~f:Passes.Resolve_constant_blk_args.run in + let*? m = Virtual.Module.map_funs_err m ~f:Passes.Remove_dead_vars.run in + !!(tenv, m) + let test_abi target ext name _ = let filename = Format.sprintf "data/opt/%s.vir" name in let filename' = Format.sprintf "%s.opt.%s" filename ext in @@ -309,6 +318,9 @@ let opt_suite = "Test optimizations" >::: [ "Parallel moves" >:: test "parallel"; "SROA" >:: test "sroa"; "Sink 1" >:: test "sink1"; + "Escape 1" >:: test "esc1"; + "Slot coalesce 1 (no other opts)" >:: test ~f:coalesce_only "coalesce1"; + "Slot coalesce 1 (full opts)" >:: test "coalesce1a"; ] let abi_suite = "Test ABI lowering" >::: [ @@ -444,7 +456,7 @@ let native_suite = "Test native code" >::: [ "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_native "promote2-partial"; "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi"; "Returning, passing, and dereferencing a struct (SysV AMD64)" >:: test_sysv_amd64_native "unref"; - "Sink (SysV AMD64)" >:: test_sysv_amd64_native "sink1"; + "Sink 1 (SysV AMD64)" >:: test_sysv_amd64_native "sink1"; ] let () = run_test_tt_main @@ test_list [ From 816606ceb1f1b580230397d5386c530c82c25f61 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 21 Nov 2025 11:21:31 -0500 Subject: [PATCH 35/62] Use priority when selecting group members as well --- .../coalesce_slots/coalesce_slots_impl.ml | 124 +++++++++++------- src/lib/passes/passes.ml | 1 + src/test/data/opt/coalesce1.vir.opt | 12 +- src/test/data/opt/vaarg1.vir.opt.sysv | 1 - src/test/data/opt/vaarg1.vir.opt.sysv.amd64 | 1 - .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 1 - 6 files changed, 87 insertions(+), 53 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index d38dd791..b6806378 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -79,7 +79,7 @@ let compat_size_align slots x y = (* Find compatible slots. Most importantly, their live ranges must not interfere. *) -let equiv_range slots rs x y = +let compat_range slots rs x y = compat_size_align slots x y && let rx = Map.find_exn rs x in let ry = Map.find_exn rs y in @@ -90,59 +90,95 @@ let equiv_range slots rs x y = | Before | After -> true | _ -> false -let partition slots rs vs = - let pick rem = - Set.to_sequence rem |> - Seq.min_elt ~compare:(fun x y -> - (* Prefer shorter live ranges. *) - let rx = Map.find_exn rs x in - let ry = Map.find_exn rs y in - let c = Int.compare (Range.size rx) (Range.size ry) in - if c = 0 then - (* Break ties by comparing on the var. This - is to give a bit more determinism to the - algorithm. *) - Var.compare x y - else c) in - (* Ensure that `x` does not interfere with any of the - slots in the group. *) - let ok g x = Set.for_all g ~f:(equiv_range slots rs x) in - let[@tail_mod_cons] rec go rem = match pick rem with - | None -> [] - | Some seed -> - let g = Set.fold rem ~init:(Var.Set.singleton seed) - ~f:(fun g x -> if ok g x then Set.add g x else g) in - g :: go (Set.diff rem g) in - go vs - -let non_interfering slots rs = +let range_priority rs x y = + (* Prefer shorter live ranges. *) + let rx = Map.find_exn rs x in + let ry = Map.find_exn rs y in + let c = Int.compare (Range.size rx) (Range.size ry) in + if c = 0 then + (* Break ties by comparing on the var. This + is to give a bit more determinism to the + algorithm. *) + Var.compare x y + else c + +let size_priority slots x y = + (* Assuming that the sizes and alignments are compatible, + just pick the biggest one. *) + let sx, ax = slot_sa slots x in + let sy, ay = slot_sa slots y in + match Int.compare sx sy with + | 0 -> Int.compare ax ay + | c -> c + +let candidates rs = + let vs = Vec.create ~capacity:(Map.length rs) () in Map.to_sequence rs |> (* Do not consider escapees. This would mess up our heuristics for building the groups. *) Seq.filter ~f:(not @. Range.is_bad @. snd) |> - Seq.map ~f:fst |> - Var.Set.of_sequence |> - partition slots rs + Seq.map ~f:fst |> Seq.iter ~f:(Vec.push vs); + vs + +let is_adjacent slots rs x y = + Var.(x <> y) && compat_range slots rs x y + +(* Greedy partitioning algorithm. *) +let partition slots rs = + let vs = candidates rs in + match Vec.length vs with + | 0 -> [] + | 1 -> [Var.Set.singleton @@ Vec.front_exn vs] + | len -> + assert (len > 1); + let gs = ref [] in + let adj = Var.Table.create ~size:len () in + let assigned = Var.Hash_set.create ~size:len () in + (* Compute the adjacency table. *) + Vec.iter vs ~f:(fun x -> + Vec.to_sequence_mutable vs |> + Seq.filter ~f:(is_adjacent slots rs x) |> + Var.Set.of_sequence |> function + | s when Set.is_empty s -> () + | s -> Hashtbl.set adj ~key:x ~data:s); + (* Use an ascending order. *) + Vec.sort vs ~compare:(fun x y -> range_priority rs y x); + while not @@ Vec.is_empty vs do + let x = Vec.pop_exn vs in + Hash_set.strict_add assigned x |> + Or_error.iter ~f:(fun () -> + Logs.debug (fun m -> + m "%s: processing %a%!" + __FUNCTION__ Var.pp x); + let g = Vec.fold_right vs + ~init:(Var.Set.singleton x) + ~f:(fun y g -> + (* Ensure that all groups are disjoint. *) + if Hash_set.mem assigned y then g + else match Hashtbl.find adj y with + | Some a when Set.is_subset g ~of_:a -> + (* Freeze `y` to this group. *) + Hash_set.add assigned y; + Set.add g y + | Some _ | None -> g) in + gs := g :: !gs) + done; + !gs (* invariant: a group is never empty *) let canon_elt slots g = - Set.to_sequence g |> Seq.max_elt ~compare:(fun x y -> - (* Assuming that the sizes and alignments are compatible, - just pick the biggest one. *) - let sx, ax = slot_sa slots x in - let sy, ay = slot_sa slots y in - match Int.compare sx sy with - | 0 -> Int.compare ax ay - | c -> c) |> + Set.to_sequence g |> + Seq.max_elt ~compare:(size_priority slots) |> Option.value_exn let make_subst slots p = List.fold p ~init:Var.Map.empty ~f:(fun init g -> - let canon = canon_elt slots g in - Set.to_sequence g |> - Seq.filter ~f:(not @. Var.equal canon) |> - Seq.fold ~init ~f:(fun acc x -> - Map.set acc ~key:x ~data:(`var canon))) + if Set.length g <= 1 then init else + let canon = canon_elt slots g in + Set.to_sequence g |> + Seq.filter ~f:(not @. Var.equal canon) |> + Seq.fold ~init ~f:(fun acc x -> + Map.set acc ~key:x ~data:(`var canon))) type t = { subst : Subst_mapper.t; (* Map from coalesced to canonical slots *) @@ -277,7 +313,7 @@ module Make(M : Scalars.L) = struct let blks = Func.map_of_blks fn in let t = Analysis.analyze ~cfg ~blks slots fn in let rs, nums = liveness cfg blks slots t in - let p = non_interfering slots rs in + let p = partition slots rs in let deads = collect_deads blks slots rs t in let subst = make_subst slots p in debug_show slots rs nums deads p subst; diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index 85282098..c9d0103f 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -72,6 +72,7 @@ let optimize_abi m = let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in let* () = Context.iter_seq_err (Abi.Module.funs m) ~f:Ssa.check_abi in !!m diff --git a/src/test/data/opt/coalesce1.vir.opt b/src/test/data/opt/coalesce1.vir.opt index 98954f26..b5646e98 100644 --- a/src/test/data/opt/coalesce1.vir.opt +++ b/src/test/data/opt/coalesce1.vir.opt @@ -2,7 +2,7 @@ module coalesce1 export function w $f(w %x, w %y, w %z) { %a = slot 8, align 8 - %c = slot 8, align 8 + %b = slot 8, align 8 @2: st.w %x, %a ; @4 jmp @3 @@ -12,19 +12,19 @@ export function w $f(w %x, w %y, w %z) { @5: jmp @7 @7: - st.w %y, %a ; @9 + st.w %y, %b ; @9 jmp @8 @8: - %u2.1 = ld.w %a ; @11 + %u2.1 = ld.w %b ; @11 jmp @10 @10: - st.w %z, %c ; @13 + st.w %z, %a ; @13 jmp @12 @12: - %u3.1 = ld.w %c ; @15 + %u3.1 = ld.w %a ; @15 jmp @14 @14: - %u4.1 = ld.w %a ; @17 + %u4.1 = ld.w %b ; @17 jmp @16 @16: %r.1 = add.w %u1.1, %u2.1 ; @18 diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv b/src/test/data/opt/vaarg1.vir.opt.sysv index d0d00986..46807d32 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv +++ b/src/test/data/opt/vaarg1.vir.opt.sysv @@ -63,7 +63,6 @@ export function $foo(b %11/rax, l %i/rdi, ...) { %50.1 = ld.l %48.1 ; @89 jmp @61 @61: - st.l %49.1, %52 ; @48 %f1.1 = ld.d %52 ; @6 %0.1 = add.d %f1.1, 1.234_d ; @13 %2.1 = ftosi.d.l %0.1 ; @15 diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 index 084f6d03..6c098966 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 @@ -57,7 +57,6 @@ export function $foo { ; returns: rax mov %50.1:l, qword ptr [%48.1] ; @89 jmp @61 ; @100 @61: - mov qword ptr [%52], %49.1:l ; @48 movsd %f1.1:d, qword ptr [%52] ; @6 movsd %0.1:d, %f1.1:d ; @13 addsd %0.1:d, qword ptr [rip + @97] ; @98 diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index 1bc66f40..d49b71d7 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -43,7 +43,6 @@ export function $foo { ; returns: rax mov qword ptr [rbp - 0x10], rax ; @88 mov rcx, qword ptr [rcx] ; @89 @61: - mov qword ptr [rbp - 0x10], rax ; @48 movsd xmm0, qword ptr [rbp - 0x10] ; @6 addsd xmm0, qword ptr [rip + @97] ; @98 .fp64 @97, 1.234 ; @99 From 5222b063771791392040a080bf49337d59f189a7 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 22 Nov 2025 00:07:07 -0500 Subject: [PATCH 36/62] Handle load before store It's UB, but we don't want to use it as a license to "improve" the program by having the optimizer potentially eliminate the provenance of a bug. --- .../coalesce_slots/coalesce_slots_impl.ml | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index b6806378..86b64c04 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -202,30 +202,44 @@ module Make(M : Scalars.L) = struct | None -> Range.singleton n | Some r -> Range.def r n) - let mkuse s x n = Map.change s x ~f:(function + let mkuse f s x n = Map.change s x ~f:(function | Some r -> Some (Range.use r n) - | None -> None) + | None -> f n) - let update acc s x n def = match Map.find s x with - | Some Offset (base, _) -> - if def then mkdef acc base n else mkuse acc base n + let update acc s x n ldst = match Map.find s x with | Some Top -> Map.set acc ~key:x ~data:Range.bad + | Some Offset (base, _) -> + begin match ldst with + | None -> mkuse (const None) acc base n + | Some Store -> mkdef acc base n + | Some Load -> + (* If we end up with a load from an uninitialized slot, + then it is UB, and we shouldn't try to coalesce it + with anything else. *) + let f _ = + Logs.debug (fun m -> + m "%s: slot %a is loaded before being initialized" + __FUNCTION__ Var.pp base); + Some Range.bad in + mkuse f acc base n + end | None -> acc let liveness_insn acc s ip i = let op = Insn.op i in let r = Insn.free_vars op in - let r, w = match Insn.load_or_store_to op with - | Some (ptr, _, Store) -> Set.remove r ptr, Some ptr - | Some _ | None -> r, None in + let r, w, ldst = match Insn.load_or_store_to op with + | None -> r, None, None + | Some (ptr, _, ldst) -> + Set.remove r ptr, Some ptr, Some ldst in Option.fold w ~init:acc ~f:(fun acc x -> - update acc s x ip true) |> fun init -> + update acc s x ip ldst) |> fun init -> Set.fold r ~init ~f:(fun acc x -> - update acc s x ip false) + update acc s x ip None) let liveness_ctrl acc s ip c = Ctrl.free_vars c |> Set.fold ~init:acc - ~f:(fun acc x -> update acc s x ip false) + ~f:(fun acc x -> update acc s x ip None) let liveness cfg blks slots t = let ip = ref 0 in From 6da2a6e44a5fb687ed93bb1b1f3c680706538538 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 22 Nov 2025 12:26:02 -0500 Subject: [PATCH 37/62] Prevent promotion errors with uninitialized loads There were still some cases where we would potentially try to promote a slot where it loaded an uninitialized value. The code would then fail the SSA check, so at least we didn't end up with miscompilation errors. However, we probably want to catch these cases and leave the behavior as-is (i.e. don't do the promotion) --- src/lib/dune | 1 + .../coalesce_slots/coalesce_slots_impl.ml | 2 +- src/lib/passes/promote_slots/promote_slots.ml | 22 ++++- .../promote_slots/promote_slots_impl.ml | 18 ++-- src/lib/passes/sroa/sroa_impl.ml | 4 +- src/lib/scalars.ml | 8 +- src/lib/slot_initialization.ml | 82 +++++++++++++++++++ src/test/data/opt/badload1.vir | 21 +++++ src/test/data/opt/badload1.vir.opt | 18 ++++ src/test/data/opt/badload2.vir | 20 +++++ src/test/data/opt/badload2.vir.opt | 16 ++++ src/test/test_opt.ml | 2 + 12 files changed, 193 insertions(+), 21 deletions(-) create mode 100644 src/lib/slot_initialization.ml create mode 100644 src/test/data/opt/badload1.vir create mode 100644 src/test/data/opt/badload1.vir.opt create mode 100644 src/test/data/opt/badload2.vir create mode 100644 src/test/data/opt/badload2.vir.opt diff --git a/src/lib/dune b/src/lib/dune index c97d23ca..38d6ce89 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -41,6 +41,7 @@ phi_values resolver_intf scalars + slot_initialization sm subst_mapper subst_mapper_abi diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 86b64c04..e8d1f3e1 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -325,7 +325,7 @@ module Make(M : Scalars.L) = struct if Map.is_empty slots then empty else let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in - let t = Analysis.analyze ~cfg ~blks slots fn in + let t = Analysis.analyze cfg blks slots in let rs, nums = liveness cfg blks slots t in let p = partition slots rs in let deads = collect_deads blks slots rs t in diff --git a/src/lib/passes/promote_slots/promote_slots.ml b/src/lib/passes/promote_slots/promote_slots.ml index 1b0a5d14..17b05255 100644 --- a/src/lib/passes/promote_slots/promote_slots.ml +++ b/src/lib/passes/promote_slots/promote_slots.ml @@ -1,3 +1,4 @@ +open Core open Virtual open Promote_slots_impl @@ -49,16 +50,29 @@ open E.Syntax let run fn = if Dict.mem (Func.dict fn) Tags.ssa then - V.run fn >>= Ssa.run + let module S = Slot_initialization.Make(Sroa_coalesce_common.VL) in + let slots = S.Analysis.collect_slots fn in + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let s = S.analyze cfg blks slots in + let undef l = Hash_set.mem s.bad l in + V.run fn ~undef >>= Ssa.run else E.failf "In Promote_slots: expected SSA form for function $%s" (Func.name fn) () let run_abi fn = - if Dict.mem (Abi.Func.dict fn) Tags.ssa then - A.run fn >>= Ssa.run_abi + let open Abi in + if Dict.mem (Func.dict fn) Tags.ssa then + let module S = Slot_initialization.Make(Sroa_coalesce_common.AL) in + let slots = S.Analysis.collect_slots fn in + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let s = S.analyze cfg blks slots in + let undef l = Hash_set.mem s.bad l in + A.run fn ~undef >>= Ssa.run_abi else E.failf "In Promote_slots (ABI): expected SSA form for function $%s" - (Abi.Func.name fn) () + (Func.name fn) () diff --git a/src/lib/passes/promote_slots/promote_slots_impl.ml b/src/lib/passes/promote_slots/promote_slots_impl.ml index 7f7a9b73..28e7f488 100644 --- a/src/lib/passes/promote_slots/promote_slots_impl.ml +++ b/src/lib/passes/promote_slots/promote_slots_impl.ml @@ -101,21 +101,23 @@ module Make(M : L) = struct | Write (_, t') when not @@ Type.equal_basic t t' -> Bad | Read _ | Write _ -> Write (s, t) - let go env s = + let go env s ~undef = let x = Slot.var s in Resolver.uses env.reso x |> List.fold_until ~init:Bad ~finish:Fn.id ~f:(fun acc -> function |`blk _ -> Stop Bad - | `insn (i, _, _, _) -> match infer acc x i with - | (Read _ | Write _) as acc -> Continue acc - | Bad -> Stop Bad) + | `insn (i, _, _, _) -> + if undef @@ Insn.label i then Stop Bad + else match infer acc x i with + | (Read _ | Write _) as acc -> Continue acc + | Bad -> Stop Bad) end - let collect env = + let collect env ~undef = Func.slots env.fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> - match Qualify.go env s with + match Qualify.go env s ~undef with | Bad -> Logs.debug (fun m -> m "%s: cannot promote %a%!" @@ -162,9 +164,9 @@ module Make(M : L) = struct | Some (y, t') -> replace_load env x l y t t' | None -> assert false)) - let run fn = + let run fn ~undef = let+ env = init fn in - let xs = collect env in + let xs = collect env ~undef in if not @@ Map.is_empty xs then let fn = remove_slots fn xs in replace env xs; diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 3f60e597..04d6709f 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -277,7 +277,9 @@ end = struct let open Context.Syntax in let slots = Analysis.collect_slots fn in if Map.is_empty slots then !!fn else - let t = analyze slots fn in + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let t = analyze cfg blks slots in let accs = collect_accesses slots fn t in let parts = partition_acesses accs in if Map.is_empty parts then !!fn else diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index b21a3b63..a8910ebb 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -253,14 +253,8 @@ module Make(M : L) = struct ~f:(fun acc s -> Map.set acc ~key:(Slot.var s) ~data:s) (* Run the dataflow analysis. *) - let analyze ?(blkparam = true) ?cfg ?blks slots fn = + let analyze ?(blkparam = true) cfg blks slots = let esc = Var.Hash_set.create () in - let cfg = match cfg with - | None -> Cfg.create fn - | Some cfg -> cfg in - let blks = match blks with - | None -> Func.map_of_blks fn - | Some blks -> blks in let s = Graphlib.fixpoint (module Cfg) cfg ~init:(initialize slots blks) diff --git a/src/lib/slot_initialization.ml b/src/lib/slot_initialization.ml new file mode 100644 index 00000000..ec1955c2 --- /dev/null +++ b/src/lib/slot_initialization.ml @@ -0,0 +1,82 @@ +open Core +open Regular.Std +open Graphlib.Std + +type state = Var.Set.t [@@deriving equal] + +let empty_state : state = Var.Set.empty + +(* Starting constraint has the entry block with no incoming + initializations. *) +let init_constraints : state Label.Map.t = + Label.Map.singleton Label.pseudoentry empty_state + +(* Our top element, which is every slot having been initialized. *) +let top_state slots : state = + Var.Set.of_list @@ Map.keys slots + +(* Since this is a "must" forward-flow analysis, incoming + predecessor states must intersect. *) +let merge_state = Set.inter + +type solution = (Label.t, state) Solution.t + +type t = { + soln : solution; + bad : Label.Hash_set.t; +} + +module Make(M : Scalars.L) = struct + open M + + module Analysis = Scalars.Make(M) + + let transfer_store acc ptr (s : Scalars.state) = + match Map.find s ptr with + | Some Offset (base, _) -> Set.add acc base + | _ -> acc + + let transfer_load bad acc l ptr (s : Scalars.state) = + match Map.find s ptr with + | Some Offset (base, _) -> + (* If the slot is not always initialized by the + time we reach the load, then we have UB. *) + if not @@ Set.mem acc base then Hash_set.add bad l; + acc + | _ -> acc + + let transfer bad t blks slots l st = + match Label.Tree.find blks l with + | None -> st + | Some b -> + let s = ref @@ Scalars.get t l in + Blk.insns b |> Seq.fold ~init:st ~f:(fun acc i -> + let op = Insn.op i and l = Insn.label i in + let acc = match Insn.load_or_store_to op with + | Some (ptr, _, Store) -> transfer_store acc ptr !s + | Some (ptr, _, Load) -> transfer_load bad acc l ptr !s + | _ -> acc in + s := Analysis.transfer_op slots !s op; + acc) + + let analyze cfg blks slots = + let t = Analysis.analyze cfg blks slots in + let bad = Label.Hash_set.create () in + let s = Graphlib.fixpoint (module Cfg) cfg + ~init:(Solution.create init_constraints @@ top_state slots) + ~start:Label.pseudoentry + ~equal:equal_state + ~merge:merge_state + ~f:(transfer bad t blks slots) in + Logs.debug (fun m -> + Label.Tree.iter blks ~f:(fun ~key ~data:_ -> + let s = Solution.get s key in + m "%s: %a: incoming must-initialize: %s%!" + __FUNCTION__ Label.pp key + (Set.to_list s |> List.to_string ~f:Var.to_string))); + Logs.debug (fun m -> + Hash_set.iter bad ~f:(fun l -> + m "%s: load at %a is potentially uninitialized%!" + __FUNCTION__ Label.pp l)); + {soln = s; bad} +end diff --git a/src/test/data/opt/badload1.vir b/src/test/data/opt/badload1.vir new file mode 100644 index 00000000..9fcc5a48 --- /dev/null +++ b/src/test/data/opt/badload1.vir @@ -0,0 +1,21 @@ +module badload1 + +export function w $foo(w %x) { + %a = slot 4, align 4 + %b = slot 4, align 4 +@start: + %c = slt.w %x, 0_w + br %c, @neg, @nonneg +@neg: + %v = ld.w %a + jmp @stuff +@nonneg: + st.w 5_w, %a + %v = ld.w %a + jmp @stuff +@stuff: + st.w 4_w, %b + %w = ld.w %b + %r = add.w %v, %w + ret %r +} diff --git a/src/test/data/opt/badload1.vir.opt b/src/test/data/opt/badload1.vir.opt new file mode 100644 index 00000000..881fdd40 --- /dev/null +++ b/src/test/data/opt/badload1.vir.opt @@ -0,0 +1,18 @@ +module badload1 + +export function w $foo(w %x) { + %a = slot 4, align 4 +@2: + %0 = slt.w %x, 0x0_w ; @13 + br %0, @3, @4 +@3: + %v.3 = ld.w %a ; @7 + jmp @6(%v.3) +@4: + st.w 0x5_w, %a ; @8 + jmp @6(0x5_w) +@6(%v.1): + st.w 0x4_w, %a ; @10 + %1 = add.w %v.1, 0x4_w ; @14 + ret %1 +} diff --git a/src/test/data/opt/badload2.vir b/src/test/data/opt/badload2.vir new file mode 100644 index 00000000..1308e2d0 --- /dev/null +++ b/src/test/data/opt/badload2.vir @@ -0,0 +1,20 @@ +module badload2 + +export function w $foo(w %x) { + %a = slot 4, align 4 + %b = slot 4, align 4 +@start: + %c = slt.w %x, 0_w + br %c, @neg, @nonneg +@neg: + jmp @stuff +@nonneg: + st.w 5_w, %a + jmp @stuff +@stuff: + %v = ld.w %a + st.w 4_w, %b + %w = ld.w %b + %r = add.w %v, %w + ret %r +} diff --git a/src/test/data/opt/badload2.vir.opt b/src/test/data/opt/badload2.vir.opt new file mode 100644 index 00000000..e5c11e17 --- /dev/null +++ b/src/test/data/opt/badload2.vir.opt @@ -0,0 +1,16 @@ +module badload2 + +export function w $foo(w %x) { + %a = slot 4, align 4 +@2: + %0 = slt.w %x, 0x0_w ; @12 + br %0, @6, @4 +@4: + st.w 0x5_w, %a ; @7 + jmp @6 +@6: + %v.1 = ld.w %a ; @8 + st.w 0x4_w, %a ; @9 + %1 = add.w %v.1, 0x4_w ; @13 + ret %1 +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index c980228e..3f911c0c 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -321,6 +321,8 @@ let opt_suite = "Test optimizations" >::: [ "Escape 1" >:: test "esc1"; "Slot coalesce 1 (no other opts)" >:: test ~f:coalesce_only "coalesce1"; "Slot coalesce 1 (full opts)" >:: test "coalesce1a"; + "Bad load 1" >:: test "badload1"; + "Bad load 2" >:: test "badload2"; ] let abi_suite = "Test ABI lowering" >::: [ From 256935a525751970474e360d1ceed090fd5eea35 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 22 Nov 2025 21:22:52 -0500 Subject: [PATCH 38/62] Some performance improvements to coalesce partitioning We were doing lots of repeated lookups in maps/tables, so it makes sense to just have a data structure for the candidates themselves. The ordering of which slot gets picked as the canonical element is changed, but still deterministic. --- .../coalesce_slots/coalesce_slots_impl.ml | 142 +++++++++--------- src/test/data/opt/coalesce1.vir.opt | 10 +- src/test/data/opt/sumphi.vir.opt.sysv | 12 +- src/test/data/opt/unref.vir.opt.sysv | 18 +-- 4 files changed, 94 insertions(+), 88 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index e8d1f3e1..f483aa5a 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -67,118 +67,123 @@ module Range = struct end) end -let slot_sa slots x = - let sx = Map.find_exn slots x in - Slot.(size sx, align sx) +type candidate = { + var : Var.t; + size : int; + align : int; + range : range; + mutable adj : Var.Set.t; + mutable assigned : bool; +} + +let create_candidate slots rs v = + let slot = Map.find_exn slots v in { + var = v; + size = Slot.size slot; + align = Slot.align slot; + range = Map.find_exn rs v; + adj = Var.Set.empty; + assigned = false; + } -let compat_size_align slots x y = - let sx, ax = slot_sa slots x in - let sy, ay = slot_sa slots y in +let compat_size_align x y = (* The smaller slot must not have a higher alignment. *) - not ((sx < sy && ax > ay) || (sy < sx && ay > ax)) + not ((x.size < y.size && x.align > y.align) || + (y.size < x.size && y.align > x.align)) (* Find compatible slots. Most importantly, their live ranges must not interfere. *) -let compat_range slots rs x y = - compat_size_align slots x y && - let rx = Map.find_exn rs x in - let ry = Map.find_exn rs y in - let a : Allen.t = Range.Algebra.relate rx ry in +let compat_range x y = + compat_size_align x y && + let a : Allen.t = Range.Algebra.relate x.range y.range in Logs.debug (fun m -> - m "%s: %a, %a: %a%!" __FUNCTION__ Var.pp x Var.pp y Allen.pp a); + m "%s: %a, %a: %a%!" __FUNCTION__ + Var.pp x.var Var.pp y.var Allen.pp a); match a with | Before | After -> true | _ -> false -let range_priority rs x y = +let range_priority x y = (* Prefer shorter live ranges. *) - let rx = Map.find_exn rs x in - let ry = Map.find_exn rs y in - let c = Int.compare (Range.size rx) (Range.size ry) in + let c = Int.compare (Range.size x.range) (Range.size y.range) in if c = 0 then (* Break ties by comparing on the var. This is to give a bit more determinism to the algorithm. *) - Var.compare x y + Var.compare x.var y.var else c -let size_priority slots x y = +let size_priority x y = (* Assuming that the sizes and alignments are compatible, just pick the biggest one. *) - let sx, ax = slot_sa slots x in - let sy, ay = slot_sa slots y in - match Int.compare sx sy with - | 0 -> Int.compare ax ay + match Int.compare x.size y.size with + | 0 -> Int.compare x.align y.align | c -> c -let candidates rs = +let candidates slots rs = let vs = Vec.create ~capacity:(Map.length rs) () in Map.to_sequence rs |> (* Do not consider escapees. This would mess up our heuristics for building the groups. *) Seq.filter ~f:(not @. Range.is_bad @. snd) |> - Seq.map ~f:fst |> Seq.iter ~f:(Vec.push vs); + Seq.map ~f:(create_candidate slots rs @. fst) |> + Seq.iter ~f:(Vec.push vs); vs -let is_adjacent slots rs x y = - Var.(x <> y) && compat_range slots rs x y +let is_subset g y = List.for_all g ~f:(fun x -> Set.mem y.adj x.var) (* Greedy partitioning algorithm. *) let partition slots rs = - let vs = candidates rs in + let vs = candidates slots rs in match Vec.length vs with | 0 -> [] - | 1 -> [Var.Set.singleton @@ Vec.front_exn vs] + | 1 -> [[Vec.front_exn vs]] | len -> assert (len > 1); let gs = ref [] in - let adj = Var.Table.create ~size:len () in - let assigned = Var.Hash_set.create ~size:len () in - (* Compute the adjacency table. *) - Vec.iter vs ~f:(fun x -> - Vec.to_sequence_mutable vs |> - Seq.filter ~f:(is_adjacent slots rs x) |> - Var.Set.of_sequence |> function - | s when Set.is_empty s -> () - | s -> Hashtbl.set adj ~key:x ~data:s); + (* Compute the adjacency sets. *) + for i = 0 to len - 1 do + let x = Vec.unsafe_get vs i in + for j = 0 to len - 1 do + if i <> j then + let y = Vec.unsafe_get vs j in + if compat_range x y then + x.adj <- Set.add x.adj y.var + done + done; (* Use an ascending order. *) - Vec.sort vs ~compare:(fun x y -> range_priority rs y x); + Vec.sort vs ~compare:(fun x y -> range_priority y x); while not @@ Vec.is_empty vs do let x = Vec.pop_exn vs in - Hash_set.strict_add assigned x |> - Or_error.iter ~f:(fun () -> - Logs.debug (fun m -> - m "%s: processing %a%!" - __FUNCTION__ Var.pp x); - let g = Vec.fold_right vs - ~init:(Var.Set.singleton x) - ~f:(fun y g -> - (* Ensure that all groups are disjoint. *) - if Hash_set.mem assigned y then g - else match Hashtbl.find adj y with - | Some a when Set.is_subset g ~of_:a -> - (* Freeze `y` to this group. *) - Hash_set.add assigned y; - Set.add g y - | Some _ | None -> g) in - gs := g :: !gs) + if not x.assigned then + let () = x.assigned <- true in + Logs.debug (fun m -> + m "%s: processing %a%!" + __FUNCTION__ Var.pp x.var); + let g = ref [x] in + for i = Vec.length vs - 1 downto 0 do + let y = Vec.unsafe_get vs i in + if not y.assigned && is_subset !g y then + let () = y.assigned <- true in + g := y :: !g + done; + gs := !g :: !gs done; !gs (* invariant: a group is never empty *) -let canon_elt slots g = - Set.to_sequence g |> - Seq.max_elt ~compare:(size_priority slots) |> - Option.value_exn +let canon_elt g = List.max_elt g ~compare:size_priority |> Option.value_exn let make_subst slots p = - List.fold p ~init:Var.Map.empty ~f:(fun init g -> - if Set.length g <= 1 then init else - let canon = canon_elt slots g in - Set.to_sequence g |> - Seq.filter ~f:(not @. Var.equal canon) |> - Seq.fold ~init ~f:(fun acc x -> - Map.set acc ~key:x ~data:(`var canon))) + List.fold p ~init:Var.Map.empty + ~f:(fun init -> function + | [] | [_] -> init + | g -> + let canon = canon_elt g in + let data = `var canon.var in + List.fold g ~init ~f:(fun acc x -> + if Var.(x.var = canon.var) then acc + else Map.set acc ~key:x.var ~data)) type t = { subst : Subst_mapper.t; (* Map from coalesced to canonical slots *) @@ -306,7 +311,8 @@ module Make(M : Scalars.L) = struct Logs.debug (fun m -> List.iter p ~f:(fun g -> m "%s: group: %s%!" __FUNCTION__ - (Set.to_list g |> List.to_string ~f:Var.to_string))); + (List.to_string g ~f:(fun x -> + Var.to_string x.var)))); Logs.debug (fun m -> if not @@ Lset.is_empty deads then m "%s: dead stores: %a%!" diff --git a/src/test/data/opt/coalesce1.vir.opt b/src/test/data/opt/coalesce1.vir.opt index b5646e98..7bc292e4 100644 --- a/src/test/data/opt/coalesce1.vir.opt +++ b/src/test/data/opt/coalesce1.vir.opt @@ -1,13 +1,13 @@ module coalesce1 export function w $f(w %x, w %y, w %z) { - %a = slot 8, align 8 %b = slot 8, align 8 + %c = slot 8, align 8 @2: - st.w %x, %a ; @4 + st.w %x, %c ; @4 jmp @3 @3: - %u1.1 = ld.w %a ; @6 + %u1.1 = ld.w %c ; @6 jmp @5 @5: jmp @7 @@ -18,10 +18,10 @@ export function w $f(w %x, w %y, w %z) { %u2.1 = ld.w %b ; @11 jmp @10 @10: - st.w %z, %a ; @13 + st.w %z, %c ; @13 jmp @12 @12: - %u3.1 = ld.w %a ; @15 + %u3.1 = ld.w %c ; @15 jmp @14 @14: %u4.1 = ld.w %b ; @17 diff --git a/src/test/data/opt/sumphi.vir.opt.sysv b/src/test/data/opt/sumphi.vir.opt.sysv index 36757126..2e38fb39 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv +++ b/src/test/data/opt/sumphi.vir.opt.sysv @@ -1,11 +1,11 @@ module sumphi function $sum(l %s/rdi) { - %3 = slot 8, align 8 + %p = slot 8, align 8 @2: - st.l %s, %3 ; @22 - %a.1 = ld.w %3 ; @4 - %0 = add.l %3, 0x4_l ; @19 + st.l %s, %p ; @22 + %a.1 = ld.w %p ; @4 + %0 = add.l %p, 0x4_l ; @19 %b.1 = ld.w %0 ; @6 %1 = add.w %a.1, %b.1 ; @20 ret rax/%1 @@ -19,7 +19,7 @@ export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) { jmp @14(%a) @10: jmp @14(%b) -@14(%5.3): - %s.1/w/rax = call $sum(%5.3/rdi) ; @18 +@14(%rb.3): + %s.1/w/rax = call $sum(%rb.3/rdi) ; @18 ret rax/%s.1 } diff --git a/src/test/data/opt/unref.vir.opt.sysv b/src/test/data/opt/unref.vir.opt.sysv index 8ef5e49d..3e6cf750 100644 --- a/src/test/data/opt/unref.vir.opt.sysv +++ b/src/test/data/opt/unref.vir.opt.sysv @@ -1,11 +1,11 @@ module unref function $sum(l %s/rdi) { - %3 = slot 8, align 8 + %p = slot 8, align 8 @2: - st.l %s, %3 ; @22 - %a.1 = ld.w %3 ; @4 - %0 = add.l %3, 0x4_l ; @19 + st.l %s, %p ; @22 + %a.1 = ld.w %p ; @4 + %0 = add.l %p, 0x4_l ; @19 %b.1 = ld.w %0 ; @6 %1 = add.w %a.1, %b.1 ; @20 ret rax/%1 @@ -19,13 +19,13 @@ export function $sump(l %p/rdi) { } export function $mkt(w %a/rdi, w %b/rsi) { - %8 = slot 8, align 8 + %r = slot 8, align 8 @11: - st.w %a, %8 ; @12 - %2 = add.l %8, 0x4_l ; @21 + st.w %a, %r ; @12 + %2 = add.l %r, 0x4_l ; @21 st.w %b, %2 ; @14 - %9 = ld.l %8 ; @28 - st.l %9, %8 ; @29 + %9 = ld.l %r ; @28 + st.l %9, %r ; @29 ret rax/%9 } From 66f175159b6b2fed452494182a5535967746ab1f Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 22 Nov 2025 23:35:59 -0500 Subject: [PATCH 39/62] Some cleanups in the amd64 code We shouldn't be emitting `endbr64` yet, without a generic way of passing metadata about all control-flow targets that require it. --- src/lib/machine/x86/x86_amd64_common.ml | 502 +++++++++++----------- src/lib/machine/x86/x86_amd64_emit.ml | 49 ++- src/lib/machine/x86/x86_amd64_regalloc.ml | 408 +++++++++--------- 3 files changed, 479 insertions(+), 480 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_common.ml b/src/lib/machine/x86/x86_amd64_common.ml index 44aca971..df61f012 100644 --- a/src/lib/machine/x86/x86_amd64_common.ml +++ b/src/lib/machine/x86/x86_amd64_common.ml @@ -612,109 +612,109 @@ module Insn = struct Regvar.Set.of_list @@ List.map regs ~f:Regvar.reg - (* Registers read by an instruction. *) - let reads = function - | Two (op, a, b) -> - begin match op with - | ADD - | ADDSD - | ADDSS - | AND - | CMP - | DIVSD - | DIVSS - | IMUL2 - | MULSD - | MULSS - | OR - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | SUBSD - | SUBSS - | TEST_ - | UCOMISD - | UCOMISS - -> rset [a; b] - | CMOVcc _ - (* We introduce a dependency on `a` so that any previous - writes to it will be preserved. - - This is morally equivalent to: - - `x := cc ? b : a` - *) - -> - Set.union (rset' [`rflags]) - (Set.union (rset_reg [a]) (rset [b])) - | BSF - | BSR - | CVTSD2SI - | CVTTSD2SI - | CVTSD2SS - | CVTSI2SD - | CVTSI2SS - | CVTSS2SD - | CVTSS2SI - | CVTTSS2SI - | LEA - | MOVSX - | MOVSXD - | MOVZX - | POPCNT - -> rset [b] - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - -> Set.union (rset_mem [a]) (rset [b]) - | XOR - | XORPD - | XORPS -> - begin match a, b with - | Oreg (a, _), Oreg (b, _) when Regvar.(a = b) - (* Special case for XORing with self: this isn't really a use - of the register, since we're just assigning 0. *) - -> Regvar.Set.empty - | _ -> rset [a; b] - end + let binop_reads op a b = match op with + | ADD + | ADDSD + | ADDSS + | AND + | CMP + | DIVSD + | DIVSS + | IMUL2 + | MULSD + | MULSS + | OR + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | SUBSD + | SUBSS + | TEST_ + | UCOMISD + | UCOMISS + -> rset [a; b] + | CMOVcc _ + (* We introduce a dependency on `a` so that any previous + writes to it will be preserved. + + This is morally equivalent to: + + `x := cc ? b : a` + *) + -> + Set.union (rset' [`rflags]) + (Set.union (rset_reg [a]) (rset [b])) + | BSF + | BSR + | CVTSD2SI + | CVTTSD2SI + | CVTSD2SS + | CVTSI2SD + | CVTSI2SS + | CVTSS2SD + | CVTSS2SI + | CVTTSS2SI + | LEA + | MOVSX + | MOVSXD + | MOVZX + | POPCNT + -> rset [b] + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + -> Set.union (rset_mem [a]) (rset [b]) + | XOR + | XORPD + | XORPS -> + begin match a, b with + | Oreg (a, _), Oreg (b, _) when Regvar.(a = b) + (* Special case for XORing with self: this isn't really a use + of the register, since we're just assigning 0. *) + -> Regvar.Set.empty + | _ -> rset [a; b] end - | One (op, a) -> - begin match op with - | CALL args -> - Set.union (rset' [`rsp]) @@ - Set.union (rset_mem [a]) @@ - Regvar.Set.of_list args - | DEC - | INC - | NEG - | NOT - -> rset [a] - | DIV - | IDIV -> - begin match a with - | Oreg (_, `i8) - -> Set.union (rset' [`rax]) (rset [a]) - | _ - -> Set.union (rset' [`rax; `rdx]) (rset [a]) - end - | IMUL1 - | MUL + + let unop_reads op a = match op with + | CALL args -> + Set.union (rset' [`rsp]) @@ + Set.union (rset_mem [a]) @@ + Regvar.Set.of_list args + | DEC + | INC + | NEG + | NOT + -> rset [a] + | DIV + | IDIV -> + begin match a with + | Oreg (_, `i8) -> Set.union (rset' [`rax]) (rset [a]) - | SETcc _ - (* SETcc will "read" the destination in the sense that - only the lower 8 bits are changed. *) - -> Set.union (rset' [`rflags]) (rset [a]) - | POP - | PUSH - -> Set.union (rset' [`rsp]) (rset_mem [a]) + | _ + -> Set.union (rset' [`rax; `rdx]) (rset [a]) end + | IMUL1 + | MUL + -> Set.union (rset' [`rax]) (rset [a]) + | SETcc _ + (* SETcc will "read" the destination in the sense that + only the lower 8 bits are changed. *) + -> Set.union (rset' [`rflags]) (rset [a]) + | POP + | PUSH + -> Set.union (rset' [`rsp]) (rset_mem [a]) + + (* Registers read by an instruction. *) + let reads = function + | Two (op, a, b) -> binop_reads op a b + | One (op, a) -> unop_reads op a | JMP (Jind a) -> rset [a] | CDQ | CQO @@ -736,88 +736,88 @@ module Insn = struct | FP64 _ -> Regvar.Set.empty + let binop_writes op a _b = match op with + | ADD + | AND + | BSF + | BSR + | IMUL2 + | OR + | POPCNT + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | XOR + -> Set.union (rset' [`rflags]) (rset_reg [a]) + | ADDSD + | ADDSS + | CMOVcc _ + | CVTSD2SI + | CVTTSD2SI + | CVTSD2SS + | CVTSI2SD + | CVTSI2SS + | CVTSS2SD + | CVTSS2SI + | CVTTSS2SI + | DIVSD + | DIVSS + | LEA + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + | MOVSX + | MOVSXD + | MOVZX + | MULSD + | MULSS + | SUBSD + | SUBSS + | XORPD + | XORPS + -> rset_reg [a] + | CMP + | TEST_ + | UCOMISD + | UCOMISS + -> rset' [`rflags] + + let unop_writes call op a = match op with + | DEC + | INC + | NEG + | NOT + -> Set.union (rset' [`rflags]) (rset_reg [a]) + | SETcc _ + -> rset_reg [a] + | CALL _ + -> Set.union (rset' [`rsp; `rflags]) call + | PUSH + -> rset' [`rsp] + | DIV + | IDIV + | IMUL1 + | MUL -> + begin match a with + | Oreg (_, `i8) + -> rset' [`rax; `rflags] + | _ + -> rset' [`rax; `rdx; `rflags] + end + | POP + -> Set.union (rset' [`rsp]) (rset_reg [a]) + (* Registers written to by an instruction. *) let writes call = function - | Two (op, a, _) -> - begin match op with - | ADD - | AND - | BSF - | BSR - | IMUL2 - | OR - | POPCNT - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | XOR - -> Set.union (rset' [`rflags]) (rset_reg [a]) - | ADDSD - | ADDSS - | CMOVcc _ - | CVTSD2SI - | CVTTSD2SI - | CVTSD2SS - | CVTSI2SD - | CVTSI2SS - | CVTSS2SD - | CVTSS2SI - | CVTTSS2SI - | DIVSD - | DIVSS - | LEA - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - | MOVSX - | MOVSXD - | MOVZX - | MULSD - | MULSS - | SUBSD - | SUBSS - | XORPD - | XORPS - -> rset_reg [a] - | CMP - | TEST_ - | UCOMISD - | UCOMISS - -> rset' [`rflags] - end - | One (op, a) -> - begin match op with - | DEC - | INC - | NEG - | NOT - -> Set.union (rset' [`rflags]) (rset_reg [a]) - | SETcc _ - -> rset_reg [a] - | CALL _ - -> Set.union (rset' [`rsp; `rflags]) call - | PUSH - -> rset' [`rsp] - | DIV - | IDIV - | IMUL1 - | MUL -> - begin match a with - | Oreg (_, `i8) - -> rset' [`rax; `rflags] - | _ - -> rset' [`rax; `rdx; `rflags] - end - | POP - -> Set.union (rset' [`rsp]) (rset_reg [a]) - end + | Two (op, a, b) -> binop_writes op a b + | One (op, a) -> unop_writes call op a | IMUL3 (a, _, _) -> Set.union (rset' [`rflags]) (rset_reg [a]) | RET @@ -845,80 +845,80 @@ module Insn = struct | Omem _ -> true | _ -> false + let binop_writes_to_memory op a _b = match op with + | ADD + | ADDSD + | ADDSS + | AND + | DIVSD + | DIVSS + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + | MULSD + | MULSS + | OR + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | SUBSD + | SUBSS + | XOR + -> is_mem a + | BSF (* illegal *) + | BSR (* illegal *) + | CMOVcc _ (* illegal *) + | CMP + | CVTSD2SI (* illegal *) + | CVTTSD2SI (* illegal *) + | CVTSD2SS (* illegal *) + | CVTSI2SD (* illegal *) + | CVTSI2SS (* illegal *) + | CVTSS2SI (* illegal *) + | CVTTSS2SI (* illegal *) + | CVTSS2SD (* illegal *) + | IMUL2 + | LEA (* illegal *) + | MOVSX (* illegal *) + | MOVZX (* illegal *) + | MOVSXD (* illegal *) + | POPCNT (* illegal *) + | TEST_ + | UCOMISD + | UCOMISS + | XORPD (* illegal *) + | XORPS (* illegal *) + -> false + + let unop_writes_to_memory op a = match op with + | DEC + | INC + | NEG + | NOT + | POP + | SETcc _ + -> is_mem a + | CALL _ (* writes to stack *) + | PUSH (* writes to stack *) + -> true + | DIV + | IDIV + | IMUL1 + | MUL + -> false + (* "illegal" here means that it is illegal to have a memory operand in the destination. *) let writes_to_memory = function - | Two (op, a, _) -> - begin match op with - | ADD - | ADDSD - | ADDSS - | AND - | DIVSD - | DIVSS - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - | MULSD - | MULSS - | OR - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | SUBSD - | SUBSS - | XOR - -> is_mem a - | BSF (* illegal *) - | BSR (* illegal *) - | CMOVcc _ (* illegal *) - | CMP - | CVTSD2SI (* illegal *) - | CVTTSD2SI (* illegal *) - | CVTSD2SS (* illegal *) - | CVTSI2SD (* illegal *) - | CVTSI2SS (* illegal *) - | CVTSS2SI (* illegal *) - | CVTTSS2SI (* illegal *) - | CVTSS2SD (* illegal *) - | IMUL2 - | LEA (* illegal *) - | MOVSX (* illegal *) - | MOVZX (* illegal *) - | MOVSXD (* illegal *) - | POPCNT (* illegal *) - | TEST_ - | UCOMISD - | UCOMISS - | XORPD (* illegal *) - | XORPS (* illegal *) - -> false - end - | One (op, a) -> - begin match op with - | DEC - | INC - | NEG - | NOT - | POP - | SETcc _ - -> is_mem a - | CALL _ (* writes to stack *) - | PUSH (* writes to stack *) - -> true - | DIV - | IDIV - | IMUL1 - | MUL - -> false - end + | Two (op, a, b) -> binop_writes_to_memory op a b + | One (op, a) -> unop_writes_to_memory op a | CDQ | CQO | CWD diff --git a/src/lib/machine/x86/x86_amd64_emit.ml b/src/lib/machine/x86/x86_amd64_emit.ml index 6182031f..e5fa631c 100644 --- a/src/lib/machine/x86/x86_amd64_emit.ml +++ b/src/lib/machine/x86/x86_amd64_emit.ml @@ -66,36 +66,35 @@ let emit_func ppf (name, lnk) = Format.fprintf ppf ".section %s\n" section; if Linkage.export lnk then global ppf name; Format.fprintf ppf ".p2align 4\n"; - Format.fprintf ppf "%s:\n" name; - Format.fprintf ppf " endbr64\n" + Format.fprintf ppf "%s:\n" name let emit_blk ppf (l : Label.t) = Format.fprintf ppf "%a:\n" label l +let emit_reg t ppf r = match r, t with + | (#Reg.sse as r), (#Type.fp | `v128) -> + Format.fprintf ppf "%a" Reg.pp_sse r + | (#Reg.gpr as r), `i8 -> + Format.fprintf ppf "%a" Reg.pp_gpr8 r + | (#Reg.gpr as r), `i16 -> + Format.fprintf ppf "%a" Reg.pp_gpr16 r + | (#Reg.gpr as r), `i32 -> + Format.fprintf ppf "%a" Reg.pp_gpr32 r + | (#Reg.gpr as r), `i64 -> + Format.fprintf ppf "%a" Reg.pp_gpr r + | `rip, `i64 -> + Format.fprintf ppf "rip" + | _ -> + invalid_argf "invalid register/type combo: %s/%s" + (Format.asprintf "%a" Reg.pp r) + (match t with + | `v128 -> "v128" + | #Type.basic as t -> + Format.asprintf "%a" Type.pp_basic t) + () + let emit_regvar t ppf rv = match Regvar.which rv with - | First r -> - begin match r, t with - | (#Reg.sse as r), (#Type.fp | `v128) -> - Format.fprintf ppf "%a" Reg.pp_sse r - | (#Reg.gpr as r), `i8 -> - Format.fprintf ppf "%a" Reg.pp_gpr8 r - | (#Reg.gpr as r), `i16 -> - Format.fprintf ppf "%a" Reg.pp_gpr16 r - | (#Reg.gpr as r), `i32 -> - Format.fprintf ppf "%a" Reg.pp_gpr32 r - | (#Reg.gpr as r), `i64 -> - Format.fprintf ppf "%a" Reg.pp_gpr r - | `rip, `i64 -> - Format.fprintf ppf "rip" - | _ -> - invalid_argf "invalid register/type combo: %s/%s" - (Format.asprintf "%a" Reg.pp r) - (match t with - | `v128 -> "v128" - | #Type.basic as t -> - Format.asprintf "%a" Type.pp_basic t) - () - end + | First r -> emit_reg t ppf r | Second (x, _) -> invalid_argf "tried to emit a variable %s" (Format.asprintf "%a" Var.pp x) () diff --git a/src/lib/machine/x86/x86_amd64_regalloc.ml b/src/lib/machine/x86/x86_amd64_regalloc.ml index d0041e70..9c4c75d7 100644 --- a/src/lib/machine/x86/x86_amd64_regalloc.ml +++ b/src/lib/machine/x86/x86_amd64_regalloc.ml @@ -100,46 +100,46 @@ let substitute_operand f = function | Osym _ as s -> s | Oah -> Oah +(* Attempt some peephole optimizations after the substitution. + XXX: what if the FLAGS register is live? *) +let substitute_binop' o a b op = match o, op a, op b with + | LEA, Oreg (x, ty), Omem (Abd (y, d), _) when Regvar.(x = y) -> + (* lea x, [x+d] => add x, d or sub x, -d *) + if Int32.(d < 0l) then + let d = Int32.neg d in + let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in + if Int64.(d' = 1L) then + One (DEC, Oreg (x, ty)) + else + Two (SUB, Oreg (x, ty), Oimm (d', immty ty)) + else + let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in + if Int64.(d' = 1L) then + One (INC, Oreg (x, ty)) + else + Two (ADD, Oreg (x, ty), Oimm (d', immty ty)) + | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = y) -> + (* lea x, [x+y*1] => add x, y *) + Two (ADD, Oreg (x, ty), Oreg (z, ty)) + | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = z) -> + (* lea x, [y+x*1] => add x, y *) + Two (ADD, Oreg (x, ty), Oreg (y, ty)) + | LEA, Oreg (x, ty), Omem (Aisd (y, S2, 0l), _) when Regvar.(x = y) -> + (* lea x, [x*2] => shl x, 1 *) + Two (SHL, Oreg (x, ty), Oimm (1L, `i8)) + | LEA, Oreg (x, ty), Omem (Aisd (y, S4, 0l), _) when Regvar.(x = y) -> + (* lea x, [x*4] => shl x, 2 *) + Two (SHL, Oreg (x, ty), Oimm (2L, `i8)) + | LEA, Oreg (x, ty), Omem (Aisd (y, S8, 0l), _) when Regvar.(x = y) -> + (* lea x, [x*8] => shl x, 3 *) + Two (SHL, Oreg (x, ty), Oimm (3L, `i8)) + | o, a, b -> + (* Default case. *) + Two (o, a, b) + let substitute' i op = match i with | One (o, a) -> One (o, op a) - | Two (o, a, b) -> - (* Attempt some peephole optimizations after the substitution. - XXX: what if the FLAGS register is live? *) - begin match o, op a, op b with - | LEA, Oreg (x, ty), Omem (Abd (y, d), _) when Regvar.(x = y) -> - (* lea x, [x+d] => add x, d or sub x, -d *) - if Int32.(d < 0l) then - let d = Int32.neg d in - let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in - if Int64.(d' = 1L) then - One (DEC, Oreg (x, ty)) - else - Two (SUB, Oreg (x, ty), Oimm (d', immty ty)) - else - let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in - if Int64.(d' = 1L) then - One (INC, Oreg (x, ty)) - else - Two (ADD, Oreg (x, ty), Oimm (d', immty ty)) - | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = y) -> - (* lea x, [x+y*1] => add x, y *) - Two (ADD, Oreg (x, ty), Oreg (z, ty)) - | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = z) -> - (* lea x, [y+x*1] => add x, y *) - Two (ADD, Oreg (x, ty), Oreg (y, ty)) - | LEA, Oreg (x, ty), Omem (Aisd (y, S2, 0l), _) when Regvar.(x = y) -> - (* lea x, [x*2] => shl x, 1 *) - Two (SHL, Oreg (x, ty), Oimm (1L, `i8)) - | LEA, Oreg (x, ty), Omem (Aisd (y, S4, 0l), _) when Regvar.(x = y) -> - (* lea x, [x*4] => shl x, 2 *) - Two (SHL, Oreg (x, ty), Oimm (2L, `i8)) - | LEA, Oreg (x, ty), Omem (Aisd (y, S8, 0l), _) when Regvar.(x = y) -> - (* lea x, [x*8] => shl x, 3 *) - Two (SHL, Oreg (x, ty), Oimm (3L, `i8)) - | o, a, b -> - (* Default case. *) - Two (o, a, b) - end + | Two (o, a, b) -> substitute_binop' o a b op | IMUL3 (a, b, c) -> IMUL3 (op a, op b, c) | JMP (Jind a) -> JMP (Jind (op a)) | CDQ @@ -203,90 +203,90 @@ module Typed_writes = struct Regvar.Map.of_alist_reduce ~f:reduce @@ List.map l ~f:(fun (r, t) -> Regvar.reg r, t) - (* Registers written to by an instruction. *) - let writes call = function - | Two (o, a, _) -> - begin match o with - | ADD - | ADDSD - | ADDSS - | AND - | BSF - | BSR - | CMOVcc _ - | CVTSD2SI - | CVTTSD2SI - | CVTSD2SS - | CVTSI2SD - | CVTSI2SS - | CVTSS2SD - | CVTSS2SI - | CVTTSS2SI - | DIVSD - | DIVSS - | IMUL2 - | LEA - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - | MOVSX - | MOVSXD - | MOVZX - | MULSD - | MULSS - | OR - | POPCNT - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | SUBSD - | SUBSS - | XOR - | XORPD - | XORPS - -> rmap_reg [a] - | CMP - | TEST_ - | UCOMISD - | UCOMISS - -> Regvar.Map.empty - end - | One (o, a) -> - begin match o with - | CALL _ - -> call - | DEC - | INC - | NEG - | NOT - | SETcc _ - | POP - -> rmap_reg [a] - | PUSH + let binop_writes o a _b = match o with + | ADD + | ADDSD + | ADDSS + | AND + | BSF + | BSR + | CMOVcc _ + | CVTSD2SI + | CVTTSD2SI + | CVTSD2SS + | CVTSI2SD + | CVTSI2SS + | CVTSS2SD + | CVTSS2SI + | CVTTSS2SI + | DIVSD + | DIVSS + | IMUL2 + | LEA + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + | MOVSX + | MOVSXD + | MOVZX + | MULSD + | MULSS + | OR + | POPCNT + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | SUBSD + | SUBSS + | XOR + | XORPD + | XORPS + -> rmap_reg [a] + | CMP + | TEST_ + | UCOMISD + | UCOMISS + -> Regvar.Map.empty + + let unop_writes call o a = match o with + | CALL _ + -> call + | DEC + | INC + | NEG + | NOT + | SETcc _ + | POP + -> rmap_reg [a] + | PUSH + -> Regvar.Map.empty + | DIV + | IDIV + | IMUL1 + | MUL -> + begin match a with + | Oreg (_, `i8) + -> rmap' [`rax, `i8] + | Oreg (_, t) + -> rmap' [`rax, wty t; `rdx, wty t] + | Omem (_, t) + -> rmap' [`rax, wty t; `rdx, wty t] + | _ + (* invalid forms *) -> Regvar.Map.empty - | DIV - | IDIV - | IMUL1 - | MUL -> - begin match a with - | Oreg (_, `i8) - -> rmap' [`rax, `i8] - | Oreg (_, t) - -> rmap' [`rax, wty t; `rdx, wty t] - | Omem (_, t) - -> rmap' [`rax, wty t; `rdx, wty t] - | _ - (* invalid forms *) - -> Regvar.Map.empty - end end + + (* Registers written to by an instruction. *) + let writes call = function + | Two (o, a, b) -> binop_writes o a b + | One (o, a) -> unop_writes call o a | IMUL3 (a, _, _) -> rmap_reg [a] | Jcc _ @@ -320,65 +320,65 @@ module Pre_assign_slots(C : Context_intf.S) = struct let off' = Int32.of_int_exn off in let d' = Int32.(d + off') in if (off > 0 && Int32.(d' < d)) || - (off < 0 && Int32.(d' > d)) then begin + (off < 0 && Int32.(d' > d)) then let+ x, is = freshen base off' in Second (x, is) - end else !!(First d') + else !!(First d') + + let assign_ab find base a b = match find b with + | Some 0 -> !!(Ab base, []) + | Some o -> !!(Abd (base, Int32.of_int_exn o), []) + | None -> !!(a, []) + + let assign_abd find base a b d = match find b with + | None -> !!(a, []) + | Some o -> add_disp base o d >>| function + | First d' -> Abd (base, d'), [] + | Second (b', bi) -> Abd (b', d), bi + + let assign_abis find base a b i s = match find b, find i with + | None, None -> !!(a, []) + | Some 0, None -> !!(Abis (base, i, s), []) + | Some o, None -> !!(Abisd (base, i, s, Int32.of_int_exn o), []) + | None, Some o -> + let+ i', ii = freshen base (Int32.of_int_exn o) in + Abis (b, i', s), ii + | Some ob, Some oi -> + let+ i', ii = freshen base (Int32.of_int_exn oi) in + Abisd (base, i', s, Int32.of_int_exn ob), ii + + let assign_aisd find base a i s d = match find i with + | None -> !!(a, []) + | Some o -> + let+ i', ii = freshen base (Int32.of_int_exn o) in + Aisd (i', s, d), ii let rec assign_amode find base a = match a with | Albl _ | Asym _ -> !!(a, []) - | Ab b -> - begin match find b with - | Some 0 -> !!(Ab base, []) - | Some o -> !!(Abd (base, Int32.of_int_exn o), []) - | None -> !!(a, []) - end - | Abd (b, d) -> - begin match find b with - | None -> !!(a, []) - | Some o -> add_disp base o d >>| function - | First d' -> Abd (base, d'), [] - | Second (b', bi) -> Abd (b', d), bi - end - | Abis (b, i, s) -> - begin match find b, find i with - | None, None -> !!(a, []) - | Some 0, None -> !!(Abis (base, i, s), []) - | Some o, None -> !!(Abisd (base, i, s, Int32.of_int_exn o), []) - | None, Some o -> - let+ i', ii = freshen base (Int32.of_int_exn o) in - Abis (b, i', s), ii - | Some ob, Some oi -> - let+ i', ii = freshen base (Int32.of_int_exn oi) in - Abisd (base, i', s, Int32.of_int_exn ob), ii - end + | Ab b -> assign_ab find base a b + | Abd (b, d) -> assign_abd find base a b d + | Abis (b, i, s) -> assign_abis find base a b i s | Aisd (i, S1, d) -> assign_amode find base @@ Abd (i, d) - | Aisd (i, s, d) -> - begin match find i with - | None -> !!(a, []) - | Some o -> - let+ i', ii = freshen base (Int32.of_int_exn o) in - Aisd (i', s, d), ii - end - | Abisd (b, i, s, d) -> - begin match find b, find i with - | None, None -> !!(a, []) - | None, Some _ when equal_scale s S1 -> - assign_amode find base @@ Abisd (i, b, S1, d) - | None, Some o -> - let+ i', ii = freshen base (Int32.of_int_exn o) in - Abisd (b, i', s, d), ii - | Some o, None -> - begin add_disp base o d >>| function - | First d' -> Abisd (base, i, s, d'), [] - | Second (b', bi) -> Abisd (b', i, s, d), bi - end - | Some ob, Some oi -> - let* i', ii = freshen base (Int32.of_int_exn oi) in - add_disp base ob d >>| function - | First d' -> Abisd (base, i', s, d'), ii - | Second (b', bi) -> Abisd (b', i', s, d), bi @ ii + | Aisd (i, s, d) -> assign_aisd find base a i s d + | Abisd (b, i, s, d) -> assign_abisd find base a b i s d + + and assign_abisd find base a b i s d = match find b, find i with + | None, None -> !!(a, []) + | None, Some _ when equal_scale s S1 -> + assign_amode find base @@ Abisd (i, b, S1, d) + | None, Some o -> + let+ i', ii = freshen base (Int32.of_int_exn o) in + Abisd (b, i', s, d), ii + | Some o, None -> + begin add_disp base o d >>| function + | First d' -> Abisd (base, i, s, d'), [] + | Second (b', bi) -> Abisd (b', i, s, d), bi end + | Some ob, Some oi -> + let* i', ii = freshen base (Int32.of_int_exn oi) in + add_disp base ob d >>| function + | First d' -> Abisd (base, i', s, d'), ii + | Second (b', bi) -> Abisd (b', i', s, d), bi @ ii let assign_operand find base op = match op with | Oreg (r, t) -> @@ -430,41 +430,41 @@ module Pre_assign_slots(C : Context_intf.S) = struct | FP64 _ -> !![i] end +(* NB: this makes assumptions based on the results of `Pre_assign_slots`. *) module Post_assign_slots = struct - (* NB: this makes assumptions based on the results of `Pre_assign_slots`. *) + (* All spills/reloads should be using this form. *) + let assign_ab find base a b = match find b with + | Some 0 -> Ab base + | Some o -> Abd (base, Int32.of_int_exn o) + | None -> a + + let assign_abd find base a b _d = match find b with + | None -> a + | Some _ -> assert false + + let assign_abis find base a b i _s = match find b, find i with + | None, None -> a + | Some _, None -> assert false + | None, Some _ -> assert false + | Some _, Some _ -> assert false + + let assign_aisd find base a i _s _d = match find i with + | None -> a + | Some _ -> assert false + + let assign_abisd find base a b i _s _d = match find b, find i with + | None, None -> a + | None, Some _ -> assert false + | Some _, Some _ -> assert false + | Some _, None -> assert false + let assign_amode find base a = match a with | Albl _ | Asym _ -> a - | Ab b -> - (* All spills/reloads should be using this form. *) - begin match find b with - | Some 0 -> Ab base - | Some o -> Abd (base, Int32.of_int_exn o) - | None -> a - end - | Abd (b, _d) -> - begin match find b with - | None -> a - | Some _ -> assert false - end - | Abis (b, i, _s) -> - begin match find b, find i with - | None, None -> a - | Some _, None -> assert false - | None, Some _ -> assert false - | Some _, Some _ -> assert false - end - | Aisd (i, _s, _d) -> - begin match find i with - | None -> a - | Some _ -> assert false - end - | Abisd (b, i, _s, _d) -> - begin match find b, find i with - | None, None -> a - | None, Some _ -> assert false - | Some _, Some _ -> assert false - | Some _, None -> assert false - end + | Ab b -> assign_ab find base a b + | Abd (b, d) -> assign_abd find base a b d + | Abis (b, i, s) -> assign_abis find base a b i s + | Aisd (i, s, d) -> assign_aisd find base a i s d + | Abisd (b, i, s, d) -> assign_abisd find base a b i s d let assign_operand find base op = match op with | Oreg (r, _) -> From 2bf771892dd963ee92d70f8b071c4908350b6d30 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 23 Nov 2025 00:24:04 -0500 Subject: [PATCH 40/62] Some cleanups in the isel code --- src/lib/isel/isel.ml | 2 +- src/lib/isel/isel_builder.ml | 86 +++++++++++++++++++++--------------- src/lib/isel/isel_common.ml | 14 +++++- src/lib/isel/isel_match.ml | 50 ++++++++++++++++----- 4 files changed, 103 insertions(+), 49 deletions(-) diff --git a/src/lib/isel/isel.ml b/src/lib/isel/isel.ml index 15b45f8c..93a169ec 100644 --- a/src/lib/isel/isel.ml +++ b/src/lib/isel/isel.ml @@ -34,12 +34,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct fn; node = Vec.create (); typs = Vec.create (); + id2r = Vec.create (); cfg; dom = Semi_nca.compute (module Cfg) cfg Label.pseudoentry; rpo = init_rpo cfg; blks = Func.map_of_blks fn; v2id = Var.Table.create (); - id2r = Id.Table.create (); insn = Label.Table.create (); extra = Label.Table.create (); frame = needs_stack_frame fn; diff --git a/src/lib/isel/isel_builder.ml b/src/lib/isel/isel_builder.ml index d56bf05b..4b7512ff 100644 --- a/src/lib/isel/isel_builder.ml +++ b/src/lib/isel/isel_builder.ml @@ -21,20 +21,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct "In Isel_builder.reg: invalid register %s in function $%s" r (Func.name t.fn) () - let var t x = match Hashtbl.find t.v2id x with + let var t x = match getvar t x with | Some id -> !!id | None -> C.failf "In Isel_builder.var: unbound variable %a in function $%s" Var.pp x (Func.name t.fn) () - let new_var t x ty = Hashtbl.find_or_add t.v2id x ~default:(fun () -> - let v = Rv.var (regcls ty) x in - let id = new_node ~ty t @@ Rv v in - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:v; - id) - + let newvar = newvar ~f:Rv.var let word = (Target.word M.target :> ty) let typeof_operand t : Virtual.operand -> ty C.t = function @@ -122,7 +116,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct | src -> src in let emit ?i dst src = let+ ty, id = operand' t @@ rewrite src in - let n = N (Omove, [new_var t dst ty; id]) in + let n = N (Omove, [newvar t dst ty; id]) in ignore @@ new_node ~l t n; Option.iter i ~f:(fun i -> status.(i) <- `moved) in let rec move_one i = @@ -168,7 +162,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct moves to be inserted into. *) let* l', ld' = if br then let+ l' = C.Label.fresh in - Hashtbl.add_multi t.extra ~key:l ~data:l'; + addextra t l l'; l', l' else !!(l, ld) in let+ () = windmill t l' moves in @@ -189,17 +183,38 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct | #Virtual.global as g -> global t g | #Virtual.local as loc -> local ~br t l loc >>| snd + let eval_binop o a b = match a, b with + | `int (a, _), `int (b, _) -> + (Eval.binop_int o a b :> Virtual.const option) + | `float a, `float b -> + (Eval.binop_single o a b :> Virtual.const option) + | `double a, `double b -> + (Eval.binop_double o a b :> Virtual.const option) + | _ -> None + + let eval_unop o = function + | `int (a, ty) -> + (Eval.unop_int o a ty :> Virtual.const option) + | `float a -> + (Eval.unop_single o a :> Virtual.const option) + | `double a -> + (Eval.unop_double o a :> Virtual.const option) + | _ -> None + let binop t l x o a b = - let* a = operand t a in - let+ b = operand t b in - let n = N (Obinop o, [a; b]) in let ty = infer_ty_binop o in - let id = new_node ~ty t n in + let+ id = match eval_binop o a b with + | Some c -> !!(constant t c) + | None -> + let* a = operand t a in + let+ b = operand t b in + let n = N (Obinop o, [a; b]) in + new_node ~ty t n in let r = Rv.var (regcls ty) x in let rid = new_node ~ty t @@ Rv r in ignore @@ new_node ~l t @@ N (Omove, [rid; id]); - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:r + setvar t x id; + setrv t id r let unop t l x o a = let* () = match o with @@ -208,19 +223,20 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct "In Isel_builder.unop: uitof is not supported by target %a" Target.pp M.target () | _ -> !!() in - let+ a = operand t a in let ty = infer_ty_unop o in - (* Copy propagation *) - let id = match o with - | `copy _ -> a - | _ -> - let n = N (Ounop o, [a]) in - new_node ~ty t n in + let+ id = match eval_unop o a with + | Some c -> !!(constant t c) + | None -> match o with + | `copy _ -> operand t a (* copy propagation *) + | _ -> + let+ a = operand t a in + let n = N (Ounop o, [a]) in + new_node ~ty t n in let r = Rv.var (regcls ty) x in let rid = new_node ~ty t @@ Rv r in ignore @@ new_node ~l t @@ N (Omove, [rid; id]); - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:r + setvar t x id; + setrv t id r let sel t l x ty c y n = let* c = var t c in @@ -232,8 +248,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let r = Rv.var (regcls ty) x in let rid = new_node ~ty t @@ Rv r in ignore @@ new_node ~l t @@ N (Omove, [rid; id]); - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:r + setvar t x id; + setrv t id r let call_args_stack_size t l f args = C.List.fold args ~init:0 ~f:(fun sz -> function @@ -282,7 +298,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let+ r = reg t r in let ty = (ty :> ty) in let rid = new_node ~ty t @@ Rv (Rv.reg r) in - let xid = new_var t x ty in + let xid = newvar t x ty in let n = N (Omove, [xid; rid]) in ignore @@ new_node ~l t n @@ -317,7 +333,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let ty' = (ty :> ty) in let n = N (Oload ty, [a]) in let lid = new_node ~ty:ty' t n in - let vid = new_var t x ty' in + let vid = newvar t x ty' in (* TODO: see if we can do a pessimistic alias analysis to forward the `Oload` node where this var appears, where possible. *) ignore @@ new_node ~l t @@ N (Omove, [vid; lid]) @@ -333,7 +349,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let+ r = reg t r in let ty = (ty :> ty) in let rid = new_node ~ty t @@ Rv (Rv.reg r) in - let xid = new_var t x ty in + let xid = newvar t x ty in let n = N (Omove, [xid; rid]) in ignore @@ new_node ~l t n @@ -356,7 +372,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let stkargs t l x = assert t.frame; let rid = new_node ~ty:word t @@ Rv (Rv.reg R.fp) in - let xid = new_var t x word in + let xid = newvar t x word in let w = Target.word M.target in let off = Bv.(int M.stack_args_offset mod modulus (Type.sizeof_imm_base w)) in let oid = new_node ~ty:word t @@ N (Oint (off, (w :> Type.imm)), []) in @@ -402,7 +418,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let sw t l ty i d tbl = let ty' = (ty :> ty) in let i = match i with - | `var x -> new_var t x ty' + | `var x -> newvar t x ty' | `sym (s, o) -> new_node ~ty:ty' t @@ N (Osym (s, o), []) in let* d, _ = local ~br:true t l d in let+ tbl = @@ -423,7 +439,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let+ r = reg t r in let ty = (ty :> ty) in let rid = new_node ~ty t @@ Rv (Rv.reg r) in - let xid = new_var t x ty in + let xid = newvar t x ty in ignore @@ new_node ~l t @@ N (Omove, [xid; rid]) let stkparam t l (x, o, ty) = @@ -431,7 +447,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let w = Target.word M.target in let wi = (w :> Type.imm) in let wb = (w :> Type.basic) in - let xid = new_var t x ty' in + let xid = newvar t x ty' in (* Use the frame pointer. It will make our lives much easier. *) let rid = new_node ~ty:word t @@ Rv (Rv.reg R.fp) in let o' = o + M.stack_args_offset in @@ -449,7 +465,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct at that point we will have more information on how to lay out the stack. *) let slot t _l s = - let _sid = new_var t (Slot.var s) word in + let _sid = newvar t (Slot.var s) word in () let step t l = match Label.Tree.find t.blks l with diff --git a/src/lib/isel/isel_common.ml b/src/lib/isel/isel_common.ml index 55db348d..654617dc 100644 --- a/src/lib/isel/isel_common.ml +++ b/src/lib/isel/isel_common.ml @@ -23,12 +23,12 @@ type 'r t = { fn : func; node : 'r node Vec.t; typs : ty Uopt.t Vec.t; + id2r : 'r Uopt.t Vec.t; cfg : Cfg.t; dom : Label.t Semi_nca.tree; rpo : Label.t -> int; blks : blk Label.Tree.t; v2id : Id.t Var.Table.t; - id2r : 'r Id.Table.t; insn : Id.t list Label.Table.t; extra : Label.t list Label.Table.t; frame : bool; @@ -41,12 +41,24 @@ let new_node ?l ?ty t n : Id.t = assert (id = Vec.length t.typs); Vec.push t.node n; Vec.push t.typs @@ Uopt.of_option ty; + Vec.push t.id2r Uopt.none; Option.iter l ~f:(fun key -> Hashtbl.add_multi t.insn ~key ~data:id); id let node t id = Vec.get_exn t.node id let typeof t id = Uopt.to_option @@ Vec.get_exn t.typs id +let setvar t x id = Hashtbl.set t.v2id ~key:x ~data:id +let getvar t x = Hashtbl.find t.v2id x +let setrv t id r = Vec.set_exn t.id2r id @@ Uopt.some r +let getrv t id = Uopt.to_option @@ Vec.get_exn t.id2r id +let addextra t key data = Hashtbl.add_multi t.extra ~key ~data + +let newvar ~f t x ty = Hashtbl.find_or_add t.v2id x ~default:(fun () -> + let v = f (regcls ty) x in + let id = new_node ~ty t @@ Rv v in + setrv t id v; + id) let rec pp_node t ppr ppf id = match node t id with | Rv r -> Format.fprintf ppf "%a" ppr r diff --git a/src/lib/isel/isel_match.ml b/src/lib/isel/isel_match.ml index 2984d7a0..e54d1a23 100644 --- a/src/lib/isel/isel_match.ml +++ b/src/lib/isel/isel_match.ml @@ -16,6 +16,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let wordi = (word :> Type.imm) let wordb = (word :> Type.basic) + type subst = Rv.t S.t type rule = (Rv.t, M.Insn.t) R.t type callback = (Rv.t, M.Insn.t) R.callback @@ -70,13 +71,17 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* Translate a substitution we got from the matcher into one that our rule callbacks can understand. *) - let map_subst_terms t (s : Matcher.subst) : Rv.t S.t = + let map_subst_terms t (s : Matcher.subst) : subst = let open S in let regvar id r = match typeof t id with | Some (#Type.basic as ty) -> Regvar (r, ty) | Some `v128 -> Regvar_v r | Some `flag -> Regvar (r, wordb) - | None -> raise_notrace Mismatch in + | None -> + Logs.debug (fun m -> + m "%s: no regvar for term %d: %a%!" + __FUNCTION__ id (pp_node t) id); + raise_notrace Mismatch in Map.map s ~f:(fun id -> let tm = match node t id with | N (Oaddr a, []) -> Imm (a, wordi) @@ -89,9 +94,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct | Callargs rs -> Callargs rs | Tbl (d, tbl) -> Table (d, tbl) | Rv r -> regvar id r - | N _ -> match Hashtbl.find t.id2r id with - | None -> raise_notrace Mismatch - | Some r -> regvar id r in + | N _ -> match getrv t id with + | Some r -> regvar id r + | None -> + Logs.debug (fun m -> + m "%s: no regvar for term %d: %a%!" + __FUNCTION__ id (pp_node t) id); + raise_notrace Mismatch + in S.{id; tm}) let fail_init_matcher t l id = @@ -112,13 +122,19 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct result of computing ?y because future instructions may want to match on it. *) - let check_blank_move s n (p : Matcher.pat) = match n, p with + let check_blank_move t (s : subst) id (p : Matcher.pat) = + match node t id, p with | N (Omove, [_; _]), P (Omove, [V x; V y]) -> begin match Map.(find s x, find s y) with | Some x, Some y - when x.S.id = y.S.id + when x.id = y.id || S.equal_term Rv.equal x.tm y.tm -> + Logs.debug (fun m -> + m "%s: term %d: blank move to x=%d: %a from y=%d: %a%!" + __FUNCTION__ id + x.id (pp_node t) x.id + y.id (pp_node t) y.id); raise_notrace Mismatch | _ -> () end @@ -129,13 +145,23 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let* () = C.unless init @@ fail_init_matcher t l id in let rec loop () = match VM.one vm prog with | None -> !!None - | Some y -> - try + | Some y -> try + Logs.debug (fun m -> + let s = Map.to_alist @@ Y.subst y in + m "%s: insn %a, term %d, yielded:\n rule: %d\n subst: %s\n pat: %a%!" + __FUNCTION__ Label.pp l id (Y.rule y) + (List.to_string s ~f:(fun (x, id) -> + Format.asprintf "%s=%d" x id)) + Matcher.pp_pat (Y.pat y)); let s = map_subst_terms t @@ Y.subst y in - check_blank_move s (node t id) @@ Y.pat y; + check_blank_move t s id @@ Y.pat y; R.try_ s (Y.payload y) >>= function | Some _ as is -> !!is - | None -> loop () + | None -> + Logs.debug (fun m -> + m "%s: no callbacks succeeded, looping again%!" + __FUNCTION__); + loop () with Mismatch -> loop () in loop () >>= function | None -> fail_match t l id () @@ -200,8 +226,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct >>| List.dedup_and_sort ~compare:M.Reg.compare let run t = - let* blks = transl_blks t in let* rets = transl_rets t in + let* blks = transl_blks t in let dict = Func.dict t.fn in let dict = if not t.frame then dict else Dict.set dict Pseudo.Func.Tag.needs_stack_frame () in From 6559aff2750f9cde36272e2926ebbddf0e315825 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 23 Nov 2025 12:17:53 -0500 Subject: [PATCH 41/62] Use `Slot_initialization` analysis when building live ranges --- .../coalesce_slots/coalesce_slots_impl.ml | 67 +++++++++---------- src/lib/slot_initialization.ml | 7 +- src/test/data/opt/badload1.vir.opt | 1 - src/test/data/opt/badload2.vir.opt | 1 - 4 files changed, 37 insertions(+), 39 deletions(-) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index f483aa5a..b2ae86ae 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -201,52 +201,48 @@ let is_empty t = module Make(M : Scalars.L) = struct open M - module Analysis = Scalars.Make(M) + module S = Slot_initialization.Make(M) let mkdef s x n = Map.update s x ~f:(function | None -> Range.singleton n | Some r -> Range.def r n) - let mkuse f s x n = Map.change s x ~f:(function + let mkuse s x n = Map.change s x ~f:(function | Some r -> Some (Range.use r n) - | None -> f n) + | None -> None) - let update acc s x n ldst = match Map.find s x with - | Some Top -> Map.set acc ~key:x ~data:Range.bad - | Some Offset (base, _) -> - begin match ldst with - | None -> mkuse (const None) acc base n - | Some Store -> mkdef acc base n - | Some Load -> - (* If we end up with a load from an uninitialized slot, - then it is UB, and we shouldn't try to coalesce it - with anything else. *) - let f _ = - Logs.debug (fun m -> - m "%s: slot %a is loaded before being initialized" - __FUNCTION__ Var.pp base); - Some Range.bad in - mkuse f acc base n - end + let update (si : Slot_initialization.t) acc s x n l ldst = + match Map.find s x with | None -> acc + | Some Top -> Map.set acc ~key:x ~data:Range.bad + | Some Offset (base, _) -> match ldst with + | Some Store -> mkdef acc base n + | Some Load when Hash_set.mem si.bad l -> + (* Uninitialized load is UB: forbid this slot as + a candidate for coalescing. *) + Logs.debug (fun m -> + m "%s: uninitialized load at %a from %a: marking bad%!" + __FUNCTION__ Label.pp l Var.pp base); + Map.set acc ~key:base ~data:Range.bad + | Some Load | None -> mkuse acc base n - let liveness_insn acc s ip i = - let op = Insn.op i in + let liveness_insn si acc s ip i = + let l = Insn.label i and op = Insn.op i in let r = Insn.free_vars op in let r, w, ldst = match Insn.load_or_store_to op with | None -> r, None, None | Some (ptr, _, ldst) -> Set.remove r ptr, Some ptr, Some ldst in Option.fold w ~init:acc ~f:(fun acc x -> - update acc s x ip ldst) |> fun init -> + update si acc s x ip l ldst) |> fun init -> Set.fold r ~init ~f:(fun acc x -> - update acc s x ip None) + update si acc s x ip l None) - let liveness_ctrl acc s ip c = + let liveness_ctrl si acc s ip l c = Ctrl.free_vars c |> Set.fold ~init:acc - ~f:(fun acc x -> update acc s x ip None) + ~f:(fun acc x -> update si acc s x ip l None) - let liveness cfg blks slots t = + let liveness cfg blks slots t si = let ip = ref 0 in let nums = Vec.create () in let init = @@ -261,12 +257,12 @@ module Make(M : Scalars.L) = struct let s = ref @@ get t l in let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i -> let op = Insn.op i in - let acc = liveness_insn acc !s !ip i in + let acc = liveness_insn si acc !s !ip i in Vec.push nums (Insn.label i); - s := Analysis.transfer_op slots !s op; + s := S.Analysis.transfer_op slots !s op; incr ip; acc) in - let acc = liveness_ctrl acc !s !ip @@ Blk.ctrl b in + let acc = liveness_ctrl si acc !s !ip l @@ Blk.ctrl b in Vec.push nums l; incr ip; acc) in @@ -292,7 +288,7 @@ module Make(M : Scalars.L) = struct | _ -> acc end | _ -> acc in - s := Analysis.transfer_op slots !s op; + s := S.Analysis.transfer_op slots !s op; acc)) let debug_show slots rs nums deads p subst = @@ -301,7 +297,7 @@ module Make(M : Scalars.L) = struct let ppr ppf x = match Map.find rs x with | None -> Format.fprintf ppf "none" | Some r when Range.is_bad r -> - Format.fprintf ppf "escapes" + Format.fprintf ppf "bad" | Some r -> Format.fprintf ppf "%a (%a to %a)" Range.pp r @@ -327,12 +323,13 @@ module Make(M : Scalars.L) = struct __FUNCTION__ Var.pp key Virtual.pp_operand data)) let run fn = - let slots = Analysis.collect_slots fn in + let slots = S.Analysis.collect_slots fn in if Map.is_empty slots then empty else let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in - let t = Analysis.analyze cfg blks slots in - let rs, nums = liveness cfg blks slots t in + let t = S.Analysis.analyze cfg blks slots in + let si = S.analyze' t cfg blks slots in + let rs, nums = liveness cfg blks slots t si in let p = partition slots rs in let deads = collect_deads blks slots rs t in let subst = make_subst slots p in diff --git a/src/lib/slot_initialization.ml b/src/lib/slot_initialization.ml index ec1955c2..54e80d41 100644 --- a/src/lib/slot_initialization.ml +++ b/src/lib/slot_initialization.ml @@ -59,8 +59,7 @@ module Make(M : Scalars.L) = struct s := Analysis.transfer_op slots !s op; acc) - let analyze cfg blks slots = - let t = Analysis.analyze cfg blks slots in + let analyze' t cfg blks slots = let bad = Label.Hash_set.create () in let s = Graphlib.fixpoint (module Cfg) cfg ~init:(Solution.create init_constraints @@ top_state slots) @@ -79,4 +78,8 @@ module Make(M : Scalars.L) = struct m "%s: load at %a is potentially uninitialized%!" __FUNCTION__ Label.pp l)); {soln = s; bad} + + let analyze cfg blks slots = + let t = Analysis.analyze cfg blks slots in + analyze' t cfg blks slots end diff --git a/src/test/data/opt/badload1.vir.opt b/src/test/data/opt/badload1.vir.opt index 881fdd40..c0937b1a 100644 --- a/src/test/data/opt/badload1.vir.opt +++ b/src/test/data/opt/badload1.vir.opt @@ -12,7 +12,6 @@ export function w $foo(w %x) { st.w 0x5_w, %a ; @8 jmp @6(0x5_w) @6(%v.1): - st.w 0x4_w, %a ; @10 %1 = add.w %v.1, 0x4_w ; @14 ret %1 } diff --git a/src/test/data/opt/badload2.vir.opt b/src/test/data/opt/badload2.vir.opt index e5c11e17..1cd43f63 100644 --- a/src/test/data/opt/badload2.vir.opt +++ b/src/test/data/opt/badload2.vir.opt @@ -10,7 +10,6 @@ export function w $foo(w %x) { jmp @6 @6: %v.1 = ld.w %a ; @8 - st.w 0x4_w, %a ; @9 %1 = add.w %v.1, 0x4_w ; @13 ret %1 } From f8c2a3dd5bd2ff3582d0f2de9f6741cd23f5121d Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 23 Nov 2025 12:53:24 -0500 Subject: [PATCH 42/62] Make `peephole` run to a (bounded) fixpoint So far no more than 2 rounds are utilized on the testsuite --- src/lib/machine/x86/x86_amd64_peephole.ml | 122 ++++++++++++---------- 1 file changed, 68 insertions(+), 54 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_peephole.ml b/src/lib/machine/x86/x86_amd64_peephole.ml index d351eb7b..484dbb74 100644 --- a/src/lib/machine/x86/x86_amd64_peephole.ml +++ b/src/lib/machine/x86/x86_amd64_peephole.ml @@ -10,20 +10,22 @@ module Lset = Label.Tree_set let decomp i = Insn.label i, Insn.insn i -let map_insns fn t = - if Ltree.is_empty t then fn - else Func.map_blks fn ~f:(fun b -> - Blk.map_insns b ~f:(fun i -> - match Ltree.find t @@ Insn.label i with - | Some insn -> Insn.with_insn i insn - | None -> i)) - -let filter_not_in fn t = - if Lset.is_empty t then fn - else Func.map_blks fn ~f:(fun b -> - Blk.insns b |> Seq.filter ~f:(fun i -> - not @@ Lset.mem t @@ Insn.label i) |> - Seq.to_list |> Blk.with_insns b) +let map_insns changed fn t = + if Ltree.is_empty t then fn else + let () = changed := true in + Func.map_blks fn ~f:(fun b -> + Blk.map_insns b ~f:(fun i -> + match Ltree.find t @@ Insn.label i with + | Some insn -> Insn.with_insn i insn + | None -> i)) + +let filter_not_in changed fn t = + if Lset.is_empty t then fn else + let () = changed := true in + Func.map_blks fn ~f:(fun b -> + Blk.insns b |> Seq.filter ~f:(fun i -> + not @@ Lset.mem t @@ Insn.label i) |> + Seq.to_list |> Blk.with_insns b) (* Blocks that consist of a single instruction of the form: @@ -46,10 +48,10 @@ let collect_singles fn = | _ -> acc) (* Union-find with path compression. *) -let find_with_compression m l = +let find_with_compression changed m l = let parent l = Ltree.find !m l |> Option.value ~default:l in - let l = ref l in - let p = ref @@ parent !l in + let l = ref l and orig = l in + let p = ref @@ parent orig in while Label.(!l <> !p) do let g = parent !p in if Label.(g <> !p) then begin @@ -58,6 +60,7 @@ let find_with_compression m l = end; l := g done; + if Label.(!p <> orig) then changed := true; !p (* For blocks collected in the above analysis, thread them through to @@ -83,10 +86,10 @@ let find_with_compression m l = @3: ... *) -let jump_threading fn = +let jump_threading changed fn = let singles = collect_singles fn in if not @@ Label.Tree.is_empty singles then - let find = find_with_compression @@ ref singles in + let find = find_with_compression changed @@ ref singles in Func.map_blks fn ~f:(fun b -> Blk.map_insns b ~f:(fun i -> Insn.with_insn i @@ match Insn.insn i with @@ -97,7 +100,7 @@ let jump_threading fn = else fn (* Remove blocks that are not reachable from the entry block. *) -let remove_disjoint fn = +let remove_disjoint changed fn = let reachable = with_return @@ fun {return} -> let cfg = Cfg.create ~is_barrier ~dests fn in let start = Func.entry fn in @@ -106,9 +109,12 @@ let remove_disjoint fn = ~start_tree:(fun n s -> if Label.(n = start) then s else return s) ~enter_node:(fun _ n s -> Lset.add s n) in - Func.blks fn |> Seq.map ~f:Blk.label |> - Seq.filter ~f:(Fn.non @@ Lset.mem reachable) |> - Seq.to_list |> Func.remove_blks_exn fn + let dead = + Func.blks fn |> Seq.map ~f:Blk.label |> + Seq.filter ~f:(Fn.non @@ Lset.mem reachable) |> + Seq.to_list in + if not @@ List.is_empty dead then changed := true; + Func.remove_blks_exn fn dead (* Invert conditional branches based on the block layout. @@ -146,8 +152,8 @@ let collect_invert_branches afters fn = ~key:lb ~data:(JMP (Jlbl a)) | _ -> acc)) -let invert_branches afters fn = - map_insns fn @@ collect_invert_branches afters fn +let invert_branches changed afters fn = + map_insns changed fn @@ collect_invert_branches afters fn (* Eliminate useless unconditional jumps where a fallthrough would suffice. @@ -176,8 +182,8 @@ let collect_implicit_fallthroughs afters fn = Lset.add acc la | _ -> acc)) -let implicit_fallthroughs afters fn = - filter_not_in fn @@ collect_implicit_fallthroughs afters fn +let implicit_fallthroughs changed afters fn = + filter_not_in changed fn @@ collect_implicit_fallthroughs afters fn (* Deallocating the stack pointer followed by a LEAVE instruction is redundant. @@ -212,8 +218,8 @@ let collect_dealloc_stack_before_leave fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let dealloc_stack_before_leave fn = - filter_not_in fn @@ collect_dealloc_stack_before_leave fn +let dealloc_stack_before_leave changed fn = + filter_not_in changed fn @@ collect_dealloc_stack_before_leave fn let collect_redundant_spill_after_reload fn = Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b -> @@ -229,8 +235,8 @@ let collect_redundant_spill_after_reload fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let redundant_spill_after_reload fn = - filter_not_in fn @@ collect_redundant_spill_after_reload fn +let redundant_spill_after_reload changed fn = + filter_not_in changed fn @@ collect_redundant_spill_after_reload fn (* If we have a LEA of the same address as a subsequent load, then use the result of the LEA as the address for the load. @@ -252,8 +258,8 @@ let collect_reuse_lea fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let reuse_lea fn = - map_insns fn @@ collect_reuse_lea fn +let reuse_lea changed fn = + map_insns changed fn @@ collect_reuse_lea fn let collect_and_test fn = Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b -> @@ -270,8 +276,8 @@ let collect_and_test fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let and_test fn = - filter_not_in fn @@ collect_and_test fn +let and_test changed fn = + filter_not_in changed fn @@ collect_and_test fn let immty = function | #Type.imm as imm -> imm @@ -293,8 +299,8 @@ let collect_lea_mov fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let lea_mov fn = - map_insns fn @@ collect_lea_mov fn +let lea_mov changed fn = + map_insns changed fn @@ collect_lea_mov fn (* TODO: fill me in *) let combinable_binop = function @@ -334,8 +340,8 @@ let collect_mov_op fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let mov_op fn = - map_insns fn @@ collect_mov_op fn +let mov_op changed fn = + map_insns changed fn @@ collect_mov_op fn let collect_mov_to_store fn = Func.blks fn |> Seq.fold ~init:Ltree.empty ~f:(fun acc b -> @@ -350,20 +356,28 @@ let collect_mov_to_store fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let mov_to_store fn = - map_insns fn @@ collect_mov_to_store fn +let mov_to_store changed fn = + map_insns changed fn @@ collect_mov_to_store fn + +let max_rounds = 5 let run fn = - let fn = jump_threading fn in - let fn = remove_disjoint fn in - let afters = Func.collect_afters fn in - let fn = invert_branches afters fn in - let fn = implicit_fallthroughs afters fn in - let fn = dealloc_stack_before_leave fn in - let fn = redundant_spill_after_reload fn in - let fn = reuse_lea fn in - let fn = and_test fn in - let fn = lea_mov fn in - let fn = mov_op fn in - let fn = mov_to_store fn in - fn + let rec loop i fn = + if i > max_rounds then fn else + let () = Logs.debug (fun m -> + m "%s: peephole round %d%!" __FUNCTION__ i) in + let changed = ref false in + let fn = jump_threading changed fn in + let fn = remove_disjoint changed fn in + let afters = Func.collect_afters fn in + let fn = invert_branches changed afters fn in + let fn = implicit_fallthroughs changed afters fn in + let fn = dealloc_stack_before_leave changed fn in + let fn = redundant_spill_after_reload changed fn in + let fn = reuse_lea changed fn in + let fn = and_test changed fn in + let fn = lea_mov changed fn in + let fn = mov_op changed fn in + let fn = mov_to_store changed fn in + if !changed then loop (i + 1) fn else fn in + loop 1 fn From f367b5d84c55bcabe969a294d058e8d8308efe57 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 23 Nov 2025 13:06:33 -0500 Subject: [PATCH 43/62] Refine `and_test` peephole opt We should be selective about which condition code is being tested --- src/lib/machine/x86/x86_amd64_peephole.ml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_peephole.ml b/src/lib/machine/x86/x86_amd64_peephole.ml index 484dbb74..57fbc048 100644 --- a/src/lib/machine/x86/x86_amd64_peephole.ml +++ b/src/lib/machine/x86/x86_amd64_peephole.ml @@ -261,17 +261,33 @@ let collect_reuse_lea fn = let reuse_lea changed fn = map_insns changed fn @@ collect_reuse_lea fn +let and_test_cc = function + | Ce | Cne | Cs | Cns -> true + | _ -> false + +let and_test_act = function + | Jcc (cc, _) + | One (SETcc cc, _) + | Two (CMOVcc cc, _, _) + -> and_test_cc cc + | _ -> false + let collect_and_test fn = Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b -> let rec go acc = function | [] | [_] -> acc | (_, Two (AND, Oreg (r1, _), _)) :: (l, Two (TEST_, Oreg (r1', _), Oreg (r2', _))) - :: xs when Rv.(r1 = r1') && Rv.(r1 = r2') -> + :: (_, act) + :: xs when Rv.(r1 = r1') + && Rv.(r1 = r2') + && and_test_act act -> go (Lset.add acc l) xs | (_, Two (AND, Oreg (r1, _), _)) :: (l, Two (CMP, Oreg (r1', _), Oimm (0L, _))) - :: xs when Rv.equal r1 r1' -> + :: (_, act) + :: xs when Rv.equal r1 r1' + && and_test_act act -> go (Lset.add acc l) xs | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) From 5b5e9ad4e5032709c83c0c588c41a3e75fef408a Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 23 Nov 2025 22:12:39 -0500 Subject: [PATCH 44/62] Block comments and cleanups in lexer --- src/lib/virtual_lexer.mll | 40 +++++++++++++++++++++++--------------- src/lib/virtual_parser.mly | 6 +++--- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/lib/virtual_lexer.mll b/src/lib/virtual_lexer.mll index 6cd8ab2c..8333a289 100644 --- a/src/lib/virtual_lexer.mll +++ b/src/lib/virtual_lexer.mll @@ -2,8 +2,6 @@ open Lexing open Virtual_parser - let string_buff = Buffer.create 256 - let char_for_backslash = function | 'n' -> '\010' | 'r' -> '\013' @@ -56,7 +54,7 @@ let exp = ('E' | 'e') (integer | ninteger) let inf = ("INF" | "inf" | "INFINITY" | "infinity") let nan = ("NAN" | "NaN" | "nan") let flt = ('-'? digit+ '.' digit+ exp?) | ('-'? inf) | ('-'? nan) -let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] +let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r'] let imm = ['w' 'l' 'b' 'h'] let imm_base = ['w' 'l'] let fp = ['s' 'd'] @@ -65,6 +63,7 @@ let special = ['m' 'f'] let typ = (basic | special) rule token = parse + | "(;" { block_comment 0 lexbuf; token lexbuf } | ';' { line_comment lexbuf; token lexbuf } | '\n' { newline lexbuf; token lexbuf } | space { token lexbuf } @@ -72,13 +71,13 @@ rule token = parse | '-' { MINUS } | ':' (ident as id) { TYPENAME id } | ':' { COLON } + | '@' (integer as id) { LABEL id } | '$' (ident as id) { SYM id } | '@' (ident as id) { LABEL id } - | '@' (integer as id) { LABEL id } - | '%' (ident as id) '.' (integer as i) { VAR (id, int_of_string i) } - | '%' (ident as id) { IDENT id } | '%' (integer as id) { TEMP id } | '%' (integer as id) '.' (integer as i) { TEMPI (id, int_of_string i) } + | '%' (ident as id) '.' (integer as i) { VAR (id, int_of_string i) } + | '%' (ident as id) { IDENT id } | "module" space+ (ident as id) { MODULE id } | "align" { ALIGN } | "const" { CONST () } @@ -92,7 +91,7 @@ rule token = parse | ',' { COMMA } | '=' { EQUALS } | "->" { ARROW } - | "..." { ELIPSIS } + | "..." { ELLIPSIS } | "sb" { SB } | "sh" { SH } | "sw" { SW } @@ -184,9 +183,9 @@ rule token = parse | "section" { SECTION } | "noreturn" { NORETURN } | '"' { - Buffer.clear string_buff; - string lexbuf; - STRING (Buffer.contents string_buff) + let buf = Buffer.create 64 in + string buf lexbuf; + STRING (Buffer.contents buf) } | eof { EOF } | (posints as i) '_' (imm as t) { @@ -204,17 +203,26 @@ rule token = parse | "false" { BOOL false } | _ { raise Error } -and string = parse +and string buf = parse | '"' { () } | '\\' (backslash_escapes as c) { - Buffer.add_char string_buff (char_for_backslash c); - string lexbuf + Buffer.add_char buf (char_for_backslash c); + string buf lexbuf } + | eof { raise Error } | _ as c { - Buffer.add_char string_buff c; - string lexbuf + Buffer.add_char buf c; + string buf lexbuf } +and block_comment depth = parse + | "(;" { block_comment (depth + 1) lexbuf } + | ";)" { if depth > 0 then block_comment (depth - 1) lexbuf } + | '\n' { newline lexbuf; block_comment depth lexbuf } + | eof { raise Error } + | _ { block_comment depth lexbuf } + and line_comment = parse - | '\n' { () } + | '\n' { newline lexbuf } + | eof { () } | _ { line_comment lexbuf } diff --git a/src/lib/virtual_parser.mly b/src/lib/virtual_parser.mly index 2e94e8ea..2a91d792 100644 --- a/src/lib/virtual_parser.mly +++ b/src/lib/virtual_parser.mly @@ -120,7 +120,7 @@ %token COMMA %token EQUALS %token ARROW -%token ELIPSIS +%token ELLIPSIS %token SB SH SW W L B H S D Z %token ADD DIV MUL SUB NEG %token REM MULH UMULH UDIV UREM AND OR ASR LSL LSR ROL ROR XOR NOT @@ -268,7 +268,7 @@ func: { make_fn slots blks args l name return true } func_args: - | ELIPSIS { !!([], true) } + | ELLIPSIS { !!([], true) } | t = type_arg x = var { let+ x = x in [x, t], false } | t = type_arg x = var COMMA rest = func_args { @@ -456,7 +456,7 @@ call_args: let+ a = a and+ rest = rest in Arg a :: rest } - | a = operand COMMA ELIPSIS COMMA vargs = separated_nonempty_list(COMMA, operand) + | a = operand COMMA ELLIPSIS COMMA vargs = separated_nonempty_list(COMMA, operand) { let+ a = a and+ vargs = Context.List.all vargs in Arg a :: Core.List.map vargs ~f:(fun a -> Varg a) From 071ea12d326457d918aea36a728d2d867559cf34 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sun, 23 Nov 2025 22:22:40 -0500 Subject: [PATCH 45/62] Use a heuristic for coalescing trivial DU chains --- src/lib/regalloc/regalloc_irc.ml | 129 +++++++++++++----- src/lib/regalloc/regalloc_irc_state.ml | 21 +++ .../analyze_array.vir.opt.sysv.amd64.regalloc | 7 +- ...rime_main_licm.vir.opt.sysv.amd64.regalloc | 8 +- .../opt/qsort.vir.opt.sysv.amd64.regalloc | 26 ++-- ...rt_inline_swap.vir.opt.sysv.amd64.regalloc | 5 +- .../opt/spill1.vir.opt.sysv.amd64.regalloc | 62 ++++----- .../opt/spill2.vir.opt.sysv.amd64.regalloc | 30 ++-- .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 14 +- 9 files changed, 188 insertions(+), 114 deletions(-) diff --git a/src/lib/regalloc/regalloc_irc.ml b/src/lib/regalloc/regalloc_irc.ml index eabc40ed..dbcbab94 100644 --- a/src/lib/regalloc/regalloc_irc.ml +++ b/src/lib/regalloc/regalloc_irc.ml @@ -50,7 +50,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let label = Insn.label i in let insn = Insn.insn i in let use = M.Insn.reads insn in - Set.iter use ~f:(update_cost ~loop_depth t); + Set.iter use ~f:(fun u -> + update_cost ~loop_depth t u; + inc_use t u); let def = M.Insn.writes insn in (* if isMoveInstruction(I) then *) let+ out = match M.Regalloc.is_copy insn with @@ -82,14 +84,16 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct Set.iter def ~f:(fun d -> (* forall l \in live AddEdge(l,d) *) + add_def t d label; Set.iter out ~f:(fun o -> add_edge t o d)); (* live := use(I) U (live\def(I)) *) Set.union use (Set.diff out def) (* Build the interference graph and other initial state for the algorithm. *) - let build t live = + let build t = (* forall b \in blocks in program *) + let live = Option.value_exn t.live in Func.blks t.fn |> C.Seq.iter ~f:(fun b -> let l = Blk.label b in let loop_depth = match Loop.blk t.loop l with @@ -98,12 +102,15 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* NB: levels start at 0 *) (Loop.(level (get t.loop lp)) :> int) + 1 in (* live := liveOut(b) *) - let out = Live.outs live l in + let out = ref @@ Live.outs live l in (* forall I \in instructions(b) in reverse order *) - let+ _out = - Blk.insns b ~rev:true |> - C.Seq.fold ~init:out ~f:(build_insn ~loop_depth t) in - ()) + let insns = Blk.insns b ~rev:true |> Seq.to_list in + let ord = ref (List.length insns - 1) in + C.List.iter insns ~f:(fun i -> + Hashtbl.set t.insn_blks ~key:(Insn.label i) ~data:(l, !ord); + let+ out' = build_insn ~loop_depth t !out i in + out := out'; + decr ord)) (* Initialize the worklists. *) let make_worklist t = @@ -127,6 +134,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct node_moves t n |> Lset.iter ~f:(fun m -> (* if m \in activeMoves then *) if Lset.mem t.amoves m then begin + Logs.debug (fun m_ -> + m_ "%s: enabling move %a for node %a%!" + __FUNCTION__ Label.pp m Rv.pp n); (* activeMoves := activeMoves \ {m} *) t.amoves <- Lset.remove t.amoves m; (* worklistMoves := worklistMoves U {m} *) @@ -188,12 +198,17 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct end let ok t a r = - (* t \in precolored *) - exclude_from_coloring t a || - (* degree[t] < K *) - degree t a < Regs.node_k a || - (* (a,r) \in adjSet *) - has_edge t a r + let res = + (* t \in precolored *) + exclude_from_coloring t a || + (* degree[t] < K *) + degree t a < Regs.node_k a || + (* (a,r) \in adjSet *) + has_edge t a r in + Logs.debug (fun m -> + m "%s: %a, %a: %b%!" + __FUNCTION__ Rv.pp a Rv.pp r res); + res (* forall t \in Adjacent(v), OK(t,u) *) let all_adjacent_ok t u v = @@ -206,16 +221,20 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct if degree[n] >= k then k := k + 1 return (k < K) *) - let conservative t nk nodes = - let k = Set.fold nodes ~init:0 ~f:(fun k n -> - if degree t n >= Regs.node_k n then k + 1 else k) in - k < nk + let conservative t nodes = + Set.fold nodes ~init:0 ~f:(fun k n -> + if degree t n >= Regs.node_k n then k + 1 else k) (* Conservative(Adjacent(u) U Adjacent(v)) *) let conservative_adj t u v = assert (Regs.same_class_node u v); let nodes = Set.union (adjacent t u) (adjacent t v) in - conservative t (Regs.node_k u) nodes + let nk = Regs.node_k u in + let k = conservative t nodes in + Logs.debug (fun m -> + m "%s: u=%a, v=%a, k=%d, nk=%d%!" + __FUNCTION__ Rv.pp u Rv.pp v k nk); + k < nk (* XXX: the algorithm in the paper does: @@ -269,12 +288,43 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct if (* degree[u] >= K *) degree t u >= Regs.node_k u && (* u \in freezeWorklist *) - Hash_set.mem t.wfreeze u then begin + Hash_set.mem t.wfreeze u + then (* freezeWorklist := freezeWorklist \ {u} *) - Hash_set.remove t.wfreeze u; + let () = Hash_set.remove t.wfreeze u in (* spillWorklist := spillWorklist U {u} *) add_spill t u - end + + (* We can bypass the Briggs conservative heuristic if this + move is trivially coalescable. + + Criteria: + + 1. The source node is not precolored. + 2. This move is its only use. + 3. All definitions dominate the use. + 4. The source node is not live-out. + *) + let is_trivial_du t m v = + can_be_colored t v && num_uses t v = 1 && + match Hashtbl.find t.insn_blks m with + | None -> false + | Some (bm, om) -> + let live = Option.value_exn t.live in + let out = Live.outs live bm in + not (Set.mem out v) && + Hashtbl.find t.defs v |> + Option.value ~default:Lset.empty |> + Lset.to_sequence |> Seq.for_all ~f:(fun d -> + match Hashtbl.find t.insn_blks d with + | None -> false + | Some (bd, od) when Label.(bm = bd) -> + Logs.debug (fun m -> + m "%s: bm=%a, om=%d, bd=%a, od=%d%!" + __FUNCTION__ Label.pp bm om Label.pp bd od); + od < om + | Some (bd, _) -> + Semi_nca.Tree.is_descendant_of t.dom ~parent:bd bm) (* P(u,v) = S(u,v) * (W(u,v) / (1 + D'(u) + D'(v))) @@ -307,6 +357,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let dv = effective_degree c.src pv in let w = Float.of_int @@ Int.pow 10 c.loop in let p = w /. Float.of_int (1 + du + dv) in + (* If if this move is trivially coalescable, then bump the weight a + bit so that it can coalesce earlier. *) + let p = if is_trivial_du t m c.src then p *. 2.0 else p in (* If one of the nodes is pre-colored, then this coalesce will be much riskier. If both are pre-colored, then we should avoid it at all costs (see topmost condition). *) @@ -340,38 +393,42 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* worklistMoves := worklistMoves \ {m} *) t.wmoves <- Lset.remove t.wmoves m; (* if u = v then *) - if Rv.(u = v) then begin + if Rv.(u = v) then (* coalescedMoves := coalescedMoves U {m} *) - t.cmoves <- Lset.add t.cmoves m; + let () = t.cmoves <- Lset.add t.cmoves m in + Logs.debug (fun m -> m "%s: already coalesced%!" __FUNCTION__); (* AddWorkList(u) *) add_worklist t u - end else if + else if (* v \in precolored *) exclude_from_coloring t v || (* (u,v) \in adjSet *) - has_edge t u v then begin + has_edge t u v + then + (* constrainedMoves := constrainedMoves U {m} *) + let () = t.kmoves <- Lset.add t.kmoves m in Logs.debug (fun m_ -> m_ "%s: constraining %a%!" __FUNCTION__ Label.pp m); - (* constrainedMoves := constrainedMoves U {m} *) - t.kmoves <- Lset.add t.kmoves m; (* addWorkList(u) *) add_worklist t u; (* addWorkList(v) *) add_worklist t v - end else if + else if (* u \in precolored ^ (\forall t \in Adjacent(v), OK(t,u)) *) (exclude_from_coloring t u && all_adjacent_ok t u v) || (* u \notin precolored ^ Conservative(Adjacent(u), Adjacent(v)) *) - (can_be_colored t u && conservative_adj t u v) then begin + (can_be_colored t u && (is_trivial_du t m v || conservative_adj t u v)) + then (* coalescedMoves := coalescedMoves U {m} *) - t.cmoves <- Lset.add t.cmoves m; + let () = t.cmoves <- Lset.add t.cmoves m in (* Combine(u,v) *) combine t u v; (* AddWorkList(u) *) add_worklist t u - end else + else (* activeMoves := activeMoves U {m} *) - t.amoves <- Lset.add t.amoves m + let () = t.amoves <- Lset.add t.amoves m in + Logs.debug (fun m -> m "%s: adding to active moves%!" __FUNCTION__) (* pre: m \in copies @@ -627,10 +684,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* Clear the relevant state for the next round. *) let new_round t = + t.live <- None; Hashtbl.clear t.adjlist; Hashtbl.clear t.degree; Hashtbl.clear t.copies; + Hashtbl.clear t.nuse; + Hashtbl.clear t.defs; Hashtbl.clear t.moves; + Hashtbl.clear t.insn_blks; Hashtbl.clear t.spill_cost; (* This doesn't seem to happen in the paper, but we should discard the previous coloring since we introduced new spill/reload code. @@ -649,8 +710,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct m "%s: $%s: round %d of %d" __FUNCTION__ (Func.name t.fn) round max_rounds); (* Build the interference graph. *) - let live = Live.compute ~keep:t.keep t.fn in - let* () = build t live in + t.live <- Some (Live.compute ~keep:t.keep t.fn); + let* () = build t in make_worklist t; (* Process the worklists. *) let continue = ref true in diff --git a/src/lib/regalloc/regalloc_irc_state.ml b/src/lib/regalloc/regalloc_irc_state.ml index f9adcce7..394bbfc4 100644 --- a/src/lib/regalloc/regalloc_irc_state.ml +++ b/src/lib/regalloc/regalloc_irc_state.ml @@ -48,7 +48,10 @@ module Make(M : Machine_intf.S) = struct adjlist : Rv.Set.t Rv.Table.t; degree : int Rv.Table.t; moves : Lset.t Rv.Table.t; + insn_blks : (Label.t * int) Label.Table.t; copies : copy Label.Table.t; + nuse : int Rv.Table.t; + defs : Lset.t Rv.Table.t; mutable wmoves : Lset.t; (* worklist moves *) mutable amoves : Lset.t; (* active moves *) mutable cmoves : Lset.t; (* coalesced moves *) @@ -72,6 +75,7 @@ module Make(M : Machine_intf.S) = struct loop : Loop.t; spill_cost : int Rv.Table.t; dom : Label.t Semi_nca.tree; + mutable live : Live.t option; } (* Explicit registers and variables that correspond to stack slots @@ -123,7 +127,10 @@ module Make(M : Machine_intf.S) = struct adjlist = Rv.Table.create (); degree; moves = Rv.Table.create (); + insn_blks = Label.Table.create (); copies = Label.Table.create (); + nuse = Rv.Table.create (); + defs = Rv.Table.create (); wmoves = Lset.empty; amoves = Lset.empty; cmoves = Lset.empty; @@ -147,6 +154,7 @@ module Make(M : Machine_intf.S) = struct loop; spill_cost; dom; + live = None; } let add_spill t n = @@ -219,6 +227,19 @@ module Make(M : Machine_intf.S) = struct let move_related t n = not @@ Lset.is_empty @@ node_moves t n + let inc_use t n = + Hashtbl.update t.nuse n ~f:(function + | Some n -> n + 1 + | None -> 1) + + let num_uses t n = + Hashtbl.find t.nuse n |> Option.value ~default:0 + + let add_def t n l = + Hashtbl.update t.defs n ~f:(function + | None -> Lset.singleton l + | Some s -> Lset.add s l) + (* if n \in coalescedNodes then GetAlias(alias[n]) else n *) diff --git a/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc index 7b584c3e..9e6e62a7 100644 --- a/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc @@ -68,11 +68,10 @@ export function $analyze_array { ; returns: rax mov ecx, r13d ; @75 sub ecx, ebx ; @122 add eax, dword ptr [rbp - 0x10] ; @77 - add ecx, eax ; @78 - lea eax, qword ptr [r14 + 0x1] ; @79 - mov dword ptr [rbp - 0x10], ecx ; @186 + add eax, ecx ; @78 + inc r14d ; @79 + mov dword ptr [rbp - 0x10], eax ; @186 mov edi, r13d ; @119 - mov r14d, eax ; @120 jmp @13 ; @121 } diff --git a/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc index d5905974..f2cbb6ea 100644 --- a/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc @@ -15,7 +15,7 @@ export function $main { ; returns: rax push rbx ; @69 mov rdi, qword ptr [rsi + 0x8] ; @5 call $atoi ; rdi ; @60 - mov r14d, eax ; @56 + mov r14d, eax ; @61 mov r13d, 0x1_w ; @57 mov eax, 0x1_w ; @58 @3: @@ -38,10 +38,8 @@ export function $main { ; returns: rax mov edx, ebx ; @36 xor al, al ; @37 call $printf ; rax rdi rdx rsi ; @38 - lea ecx, qword ptr [r14 - 0x1] ; @25 - lea eax, qword ptr [r13 + 0x1] ; @26 - mov r14d, ecx ; @31 - mov r13d, eax ; @32 + dec r14d ; @25 + inc r13d ; @26 mov eax, r12d ; @33 jmp @3 ; @34 @9: diff --git a/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc index aa502040..2c542c94 100644 --- a/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc @@ -20,13 +20,13 @@ function $partition { ; returns: rax push r15 ; @132 push rbx ; @133 mov r12, rdi ; @49 - mov rcx, rsi ; @100 - lea rax, qword ptr [r12 + rdx*4] ; @50 + mov rcx, rdx ; @101 + lea rax, qword ptr [r12 + rcx*4] ; @50 mov qword ptr [rbp - 0x10], rax ; @124 - mov r14d, dword ptr [r12 + rdx*4] ; @11 - lea rax, qword ptr [rcx - 0x1] ; @51 - lea r15, qword ptr [rdx - 0x1] ; @52 - mov rbx, rcx ; @97 + mov r14d, dword ptr [r12 + rcx*4] ; @11 + lea rax, qword ptr [rsi - 0x1] ; @51 + lea r15, qword ptr [rcx - 0x1] ; @52 + mov rbx, rsi ; @97 @8: cmp rbx, r15 ; @92 jle @14 ; @93 @@ -44,18 +44,17 @@ function $partition { ; returns: rax leave ; @139 ret ; @88 @14: - lea rcx, qword ptr [r12 + rbx*4] ; @58 - mov edx, dword ptr [rcx] ; @22 - cmp edx, r14d ; @81 + lea rsi, qword ptr [r12 + rbx*4] ; @58 + mov ecx, dword ptr [rsi] ; @22 + cmp ecx, r14d ; @81 jg @19 ; @82 @18: lea r13, qword ptr [rax + 0x1] ; @61 lea rdi, qword ptr [r12 + r13*4] ; @63 - mov rsi, rcx ; @76 call $swap ; rdi rsi ; @77 mov rax, r13 ; @74 @19: - inc rbx ; @71 + inc rbx ; @60 jmp @8 ; @73 } @@ -68,8 +67,8 @@ export function $qsort { push r14 ; @144 push rbx ; @145 mov r14, rdi ; @119 + mov rbx, rsi ; @120 mov r13, rdx ; @121 - mov rbx, rsi ; @122 @38: cmp rbx, r13 ; @114 jl @39 ; @115 @@ -90,7 +89,6 @@ export function $qsort { mov rdi, r14 ; @44 mov rsi, rbx ; @106 call $qsort ; rdi rdx rsi ; @108 - lea rax, qword ptr [r12 + 0x1] ; @66 - mov rbx, rax ; @104 + lea rbx, qword ptr [r12 + 0x1] ; @66 jmp @38 ; @105 } diff --git a/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc index b345194d..ab3f3f24 100644 --- a/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc @@ -38,8 +38,8 @@ export function $qsort { push r14 ; @122 push rbx ; @123 mov r14, rdi ; @113 + mov rbx, rsi ; @114 mov r13, rdx ; @115 - mov rbx, rsi ; @116 @39: cmp rbx, r13 ; @108 jl @40 ; @109 @@ -60,7 +60,6 @@ export function $qsort { mov rdi, r14 ; @45 mov rsi, rbx ; @100 call $qsort ; rdi rdx rsi ; @102 - lea rax, qword ptr [r12 + 0x1] ; @67 - mov rbx, rax ; @98 + lea rbx, qword ptr [r12 + 0x1] ; @67 jmp @39 ; @99 } diff --git a/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc index 4e696f0a..e5a3c04c 100644 --- a/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc @@ -35,48 +35,48 @@ export function $foo { ; returns: rax add eax, 0x3_w ; @59 mov dword ptr [rsp + 0x40], eax ; @233 mov eax, dword ptr [rsp + 0x30] ; @235 - lea r8d, qword ptr [rax + 0x4] ; @60 + lea r10d, qword ptr [rax + 0x4] ; @60 mov eax, dword ptr [rsp + 0x2c] ; @236 - lea esi, qword ptr [rax + 0x5] ; @61 + lea r12d, qword ptr [rax + 0x5] ; @61 mov eax, dword ptr [rsp + 0x28] ; @237 - lea r13d, qword ptr [rax + 0x6] ; @62 + lea esi, qword ptr [rax + 0x6] ; @62 mov eax, dword ptr [rsp + 0x24] ; @238 - lea r15d, qword ptr [rax + 0x7] ; @63 + lea r11d, qword ptr [rax + 0x7] ; @63 mov eax, dword ptr [rsp + 0x20] ; @239 - lea r11d, qword ptr [rax + 0x8] ; @64 + lea edx, qword ptr [rax + 0x8] ; @64 mov eax, dword ptr [rsp + 0x1c] ; @240 - lea ecx, qword ptr [rax + 0x9] ; @65 + lea r9d, qword ptr [rax + 0x9] ; @65 mov eax, dword ptr [rsp + 0x18] ; @241 - lea r12d, qword ptr [rax + 0xa] ; @66 - mov eax, dword ptr [rsp + 0x14] ; @242 - lea r9d, qword ptr [rax + 0xb] ; @67 - mov eax, dword ptr [rsp + 0x10] ; @243 - lea r10d, qword ptr [rax + 0xc] ; @68 - mov eax, dword ptr [rsp + 0xc] ; @244 - lea ebx, qword ptr [rax + 0xd] ; @69 - mov eax, dword ptr [rsp + 0x8] ; @245 - lea edx, qword ptr [rax + 0xe] ; @70 - mov eax, dword ptr [rsp + 0x4] ; @246 - lea r14d, qword ptr [rax + 0xf] ; @71 - mov eax, dword ptr [rsp] ; @247 - add eax, 0x10_w ; @72 + add eax, 0xa_w ; @66 + mov ecx, dword ptr [rsp + 0x14] ; @242 + lea r13d, qword ptr [rcx + 0xb] ; @67 + mov ecx, dword ptr [rsp + 0x10] ; @243 + add ecx, 0xc_w ; @68 + mov r8d, dword ptr [rsp + 0xc] ; @244 + lea ebx, qword ptr [r8 + 0xd] ; @69 + mov r8d, dword ptr [rsp + 0x8] ; @245 + lea r14d, qword ptr [r8 + 0xe] ; @70 + mov r8d, dword ptr [rsp + 0x4] ; @246 + lea r15d, qword ptr [r8 + 0xf] ; @71 + mov r8d, dword ptr [rsp] ; @247 + add r8d, 0x10_w ; @72 dec edi ; @73 test edi, edi ; @189 je @21 ; @190 @168: - mov dword ptr [rsp], eax ; @248 - mov dword ptr [rsp + 0x4], r14d ; @249 - mov dword ptr [rsp + 0x8], edx ; @250 + mov dword ptr [rsp], r8d ; @248 + mov dword ptr [rsp + 0x4], r15d ; @249 + mov dword ptr [rsp + 0x8], r14d ; @250 mov dword ptr [rsp + 0xc], ebx ; @251 - mov dword ptr [rsp + 0x10], r10d ; @252 - mov dword ptr [rsp + 0x14], r9d ; @253 - mov dword ptr [rsp + 0x18], r12d ; @254 - mov dword ptr [rsp + 0x1c], ecx ; @255 - mov dword ptr [rsp + 0x20], r11d ; @256 - mov dword ptr [rsp + 0x24], r15d ; @257 - mov dword ptr [rsp + 0x28], r13d ; @258 - mov dword ptr [rsp + 0x2c], esi ; @259 - mov dword ptr [rsp + 0x30], r8d ; @260 + mov dword ptr [rsp + 0x10], ecx ; @252 + mov dword ptr [rsp + 0x14], r13d ; @253 + mov dword ptr [rsp + 0x18], eax ; @254 + mov dword ptr [rsp + 0x1c], r9d ; @255 + mov dword ptr [rsp + 0x20], edx ; @256 + mov dword ptr [rsp + 0x24], r11d ; @257 + mov dword ptr [rsp + 0x28], esi ; @258 + mov dword ptr [rsp + 0x2c], r12d ; @259 + mov dword ptr [rsp + 0x30], r10d ; @260 mov eax, dword ptr [rsp + 0x40] ; @261 mov dword ptr [rsp + 0x34], eax ; @262 mov eax, dword ptr [rsp + 0x44] ; @263 diff --git a/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc index 66ef99ef..2eccda13 100644 --- a/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc @@ -12,11 +12,11 @@ export function $foo { ; returns: rax mov qword ptr [rsp], rsi ; @99 mov r15, rdx ; @92 mov r14, rcx ; @93 - mov r10d, r8d ; @94 + mov r11d, r8d ; @94 xor eax, eax ; @95 xor r13d, r13d ; @96 @3: - cmp r13d, r10d ; @85 + cmp r13d, r11d ; @85 jl @6 ; @86 @7: add rsp, 0x10_l ; @108 @@ -37,29 +37,27 @@ export function $foo { ; returns: rax mov r12d, eax ; @79 xor eax, eax ; @80 @9: - cmp eax, r10d ; @74 + cmp eax, r11d ; @74 jl @21 ; @75 @22: lea eax, qword ptr [rsi + rdi*1] ; @48 lea ecx, qword ptr [r8 + r9*1] ; @49 imul eax, ecx ; @73 add eax, r12d ; @51 - inc r13d ; @71 + inc r13d ; @52 jmp @3 ; @72 @21: - lea edx, qword ptr [rsi + rax*1] ; @53 - lea ecx, qword ptr [rdi + rax*1] ; @54 + lea ecx, qword ptr [rsi + rax*1] ; @53 + lea edx, qword ptr [rdi + rax*1] ; @54 lea ebx, qword ptr [r8 + rax*1] ; @55 - lea r11d, qword ptr [r9 + rax*1] ; @56 - imul edx, ecx ; @69 - mov ecx, ebx ; @58 - imul ecx, r11d ; @68 - add edx, ebx ; @59 - add r11d, ecx ; @60 - mov ecx, edx ; @61 - xor ecx, r11d ; @67 - add ecx, r12d ; @62 + lea r10d, qword ptr [r9 + rax*1] ; @56 + imul ecx, edx ; @69 + mov edx, ebx ; @58 + imul edx, r10d ; @68 + add ecx, ebx ; @59 + add edx, r10d ; @60 + xor ecx, edx ; @67 + add r12d, ecx ; @62 inc eax ; @63 - mov r12d, ecx ; @64 jmp @9 ; @66 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index d49b71d7..b05d37c8 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -26,22 +26,22 @@ export function $foo { ; returns: rax mov dword ptr [rbp - 0xc], 0x30_w ; @55 lea rax, qword ptr [rbp + 0x10] ; @56 @63: - mov edx, dword ptr [rbp - 0x10] ; @70 - cmp edx, 0x28_w ; @107 + mov ecx, dword ptr [rbp - 0x10] ; @70 + cmp ecx, 0x28_w ; @107 jbe @64 ; @108 @65: - lea rcx, qword ptr [rax + 0x8] ; @84 + lea rdx, qword ptr [rax + 0x8] ; @84 jmp @66 ; @106 @64: lea rax, qword ptr [rbp - 0x90] ; @75 - lea rcx, qword ptr [rbp + rdx*1 - 0xc0] ; @77 - add edx, 0x8_w ; @79 + lea rdx, qword ptr [rbp + rcx*1 - 0xc0] ; @77 + add ecx, 0x8_w ; @79 mov dword ptr [rbp - 0xc], 0x40_w ; @80 - mov dword ptr [rbp - 0x10], edx ; @81 + mov dword ptr [rbp - 0x10], ecx ; @81 @66: mov rax, qword ptr [rax] ; @87 mov qword ptr [rbp - 0x10], rax ; @88 - mov rcx, qword ptr [rcx] ; @89 + mov rcx, qword ptr [rdx] ; @89 @61: movsd xmm0, qword ptr [rbp - 0x10] ; @6 addsd xmm0, qword ptr [rip + @97] ; @98 From 048ec9d83b06c4a5b6bbb5225152fd87e31d2569 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Mon, 24 Nov 2025 00:25:05 -0500 Subject: [PATCH 46/62] Rename `Sroa_coalesce_common` to `Scalars_common` --- src/lib/passes/coalesce_slots/coalesce_slots.ml | 3 +-- src/lib/passes/promote_slots/promote_slots.ml | 4 ++-- src/lib/passes/{sroa_coalesce_common.ml => scalars_common.ml} | 0 src/lib/passes/sroa/sroa.ml | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) rename src/lib/passes/{sroa_coalesce_common.ml => scalars_common.ml} (100%) diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml index 2f504300..a8a510cc 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -4,10 +4,9 @@ open Regular.Std open Virtual open Scalars open Coalesce_slots_impl -open Sroa_coalesce_common +open Scalars_common module E = Monad.Result.Error - module V = Make(VL) module A = Make(AL) diff --git a/src/lib/passes/promote_slots/promote_slots.ml b/src/lib/passes/promote_slots/promote_slots.ml index 17b05255..c8fef41f 100644 --- a/src/lib/passes/promote_slots/promote_slots.ml +++ b/src/lib/passes/promote_slots/promote_slots.ml @@ -50,7 +50,7 @@ open E.Syntax let run fn = if Dict.mem (Func.dict fn) Tags.ssa then - let module S = Slot_initialization.Make(Sroa_coalesce_common.VL) in + let module S = Slot_initialization.Make(Scalars_common.VL) in let slots = S.Analysis.collect_slots fn in let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in @@ -65,7 +65,7 @@ let run fn = let run_abi fn = let open Abi in if Dict.mem (Func.dict fn) Tags.ssa then - let module S = Slot_initialization.Make(Sroa_coalesce_common.AL) in + let module S = Slot_initialization.Make(Scalars_common.AL) in let slots = S.Analysis.collect_slots fn in let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in diff --git a/src/lib/passes/sroa_coalesce_common.ml b/src/lib/passes/scalars_common.ml similarity index 100% rename from src/lib/passes/sroa_coalesce_common.ml rename to src/lib/passes/scalars_common.ml diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml index d613f9b9..44f9705b 100644 --- a/src/lib/passes/sroa/sroa.ml +++ b/src/lib/passes/sroa/sroa.ml @@ -1,7 +1,7 @@ open Core open Virtual open Sroa_impl -open Sroa_coalesce_common +open Scalars_common module V = Make(VL) module A = Make(AL) From a053452367f54fe02ce252e3730e1878d603de0d Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Mon, 24 Nov 2025 00:58:08 -0500 Subject: [PATCH 47/62] Use the Allen algebra in SROA --- src/lib/allen_interval_algebra.ml | 9 +- src/lib/passes/sroa/sroa_impl.ml | 214 ++++++++++++++++++------------ 2 files changed, 138 insertions(+), 85 deletions(-) diff --git a/src/lib/allen_interval_algebra.ml b/src/lib/allen_interval_algebra.ml index 711ed8e8..a51e53a0 100644 --- a/src/lib/allen_interval_algebra.ml +++ b/src/lib/allen_interval_algebra.ml @@ -1,10 +1,10 @@ -open Core - (** Allen's Interval Algebra. {:https://en.wikipedia.org/wiki/Allen%27s_interval_algebra} *) +open Core + type t = | Before | Meets @@ -44,7 +44,10 @@ module type S = sig (** A point in the interval. *) type point - (** An inclusive interval. *) + (** An inclusive interval. + + Invariant: [lo t <= hi t] + *) type t (** The lower-bound of the interval. *) diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index 04d6709f..d6a88fc9 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -13,6 +13,66 @@ open Regular.Std open Scalars module Slot = Virtual.Slot +module Allen = Allen_interval_algebra + +type range = { + lo : int64; + hi : int64; +} + +module Range = struct + type t = range + let lo r = r.lo [@@inline] + let hi r = r.hi [@@inline] + let size r = Int64.(r.hi - r.lo) [@@inline] + let coverage a b = Int64.(b.hi - a.lo) [@@inline] + let pp ppf r = Format.fprintf ppf "[0x%Lx, 0x%Lx)" r.lo r.hi +end + +module Algebra = Allen.Make(struct + include Range + type point = int64 + include Int64.Replace_polymorphic_compare + end) + +let basic_size ty = Type.sizeof_basic ty / 8 [@@inline] + +(* A partition of elements at a particular offset+size range. *) +type 'a partition = { + off : int64; + size : int64; + mems : 'a list; +} + +type 'a partitions = 'a partition list Var.Map.t + +module Partition = struct + type 'a t = 'a partition + + let cmp a b = Int64.compare a.off b.off + + (* Check if a partition covers the entire slot `s`. *) + let is_entire_slot s p = match p.off with + | 0L -> Int64.(of_int (Slot.size s) = p.size) + | _ -> false + [@@inline] + + let range p = { + lo = p.off; + hi = Int64.(p.off + p.size); + } [@@inline] + + let pp ppa ppf p = + Format.fprintf ppf "0x%Lx:%Ld: %a" + p.off p.size + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") + ppa) p.mems + + let add_member x p = {p with mems = x :: p.mems} [@@inline] + let change off size x p = {off; size; mems = x :: p.mems} [@@inline] + let singleton off size x = {off; size; mems = [x]} [@@inline] +end module Make(M : Scalars.L) : sig val run : M.Func.t -> M.Func.t Context.t @@ -31,29 +91,37 @@ end = struct type accesses = access list Var.Map.t - let basic_size ty = Type.sizeof_basic ty / 8 - let sizeof_access a = basic_size a.ty + module Access = struct + type t = access + + let sizeof a = basic_size a.ty [@@inline] - let cmp_access a b = - match Int64.compare a.off b.off with - | 0 -> Int.compare (sizeof_access a) (sizeof_access b) - | c -> c + let cmp a b = + match Int64.compare a.off b.off with + | 0 -> Int.compare (sizeof a) (sizeof b) + | c -> c - let pp_access ppf a = - let neg = Int64.is_negative a.off in - let pre, off = if neg then '-', Int64.neg a.off else '+', a.off in - Format.fprintf ppf "(%a %a.%a %c0x%Lx)" - Label.pp (Insn.label a.insn) - pp_load_or_store a.ldst - Type.pp_basic a.ty - pre off + let range a = { + lo = a.off; + hi = Int64.(a.off + of_int (sizeof a)); + } [@@inline] + + let pp ppf a = + let neg = Int64.is_negative a.off in + let pre, off = if neg then '-', Int64.neg a.off else '+', a.off in + Format.fprintf ppf "(%a %a.%a %c0x%Lx)" + Label.pp (Insn.label a.insn) + pp_load_or_store a.ldst + Type.pp_basic a.ty + pre off + end let collect_accesses slots fn t : accesses = (* Group all memory accesses by their corresponding slot. *) Func.blks fn |> Seq.fold ~init:Var.Map.empty ~f:(fun init b -> let s = ref @@ get t @@ Blk.label b in - Blk.insns b |> Seq.fold ~init ~f:(fun acc i -> - let op = Insn.op i in + Blk.insns b |> Seq.fold ~init ~f:(fun acc insn -> + let op = Insn.op insn in let acc = match Insn.load_or_store_to op with | None -> acc | Some (ptr, ty, ldst) -> match Map.find !s ptr with @@ -65,77 +133,49 @@ end = struct __FUNCTION__ Var.pp base); acc | Some Offset (base, off) -> - Map.add_multi acc ~key:base ~data:{insn = i; off; ty; ldst} + Map.add_multi acc ~key:base ~data:{insn; off; ty; ldst} | _ -> acc in s := Analysis.transfer_op slots !s op; acc)) |> - Map.map ~f:(List.sort ~compare:cmp_access) - - let overlaps oa sa ob sb = - Int64.(oa < ob + of_int sb && ob < oa + of_int sa) - - let within oa sa ob sb = - Int64.(oa >= ob && oa + of_int sa <= ob + of_int sb) - - (* A partition of memory accesses at a particular offset+size range. *) - type partition = { - off : int64; - size : int; - mems : access list; - } - - type partitions = partition list Var.Map.t - - let cmp_partition a b = Int64.compare a.off b.off - - (* Check if a partition covers the entire slot `s`. *) - let is_entire_slot s p = match p.off with - | 0L -> Slot.size s = p.size - | _ -> false - - let pp_partition ppf p = - Format.fprintf ppf "0x%Lx:%d: %a" - p.off p.size - (Format.pp_print_list - ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") - pp_access) p.mems - - let match_part x c = {c with mems = x :: c.mems} - let grow_part off size x c = {off; size; mems = x :: c.mems} - let new_part off size x = {off; size; mems = [x]} + Map.map ~f:(List.sort ~compare:Access.cmp) (* Sort the memory accesses into self-contained, non-overlapping partitions, which are the fully-or-partially scalarized sub-objects of the aggregate. *) - let partition_acesses : accesses -> partitions = fun m -> + let partition_acesses : accesses -> access partitions = fun m -> let rec merge acc c = function - | [] -> List.sort (c :: acc) ~compare:cmp_partition + | [] -> List.sort (c :: acc) ~compare:Partition.cmp | x :: xs -> - let sx = sizeof_access x in - if Int64.(c.off = x.off) && c.size = sx then - (* Access exactly matches the current partition. *) - let p = match_part x c in - merge acc p xs - else if overlaps c.off c.size x.off sx then - (* Access overlaps with current partition, so the partition - must increase in size. *) - let open Int64 in - let o' = min c.off x.off in - let ec = c.off + of_int c.size in - let ex = x.off + of_int sx in - let e' = max ec ex in - let s' = to_int_exn (e' - o') in - let p = grow_part o' s' x c in - merge acc p xs - else - (* No overlap, so we start a new partition. *) - let p = new_part x.off sx x in - merge (c :: acc) p xs in + let rc = Partition.range c in + let rx = Access.range x in + let a = Algebra.relate rc rx in + let acc, p = match a with + | Equal | Started_by | Finished_by | Contains -> + (* Partition subsumes the access. *) + acc, Partition.add_member x c + | During | Finishes -> + (* Access subsumes the partition *) + acc, Partition.change rx.lo (Range.size rx) x c + | Overlaps | Starts -> + (* Extend the upper bound. *) + acc, Partition.change rc.lo (Range.coverage rc rx) x c + | Overlapped_by -> + (* Extend the lower bound. *) + acc, Partition.change rx.lo (Range.coverage rx rc) x c + | Before | After | Meets | Met_by -> + (* No overlap, so we start a new partition. *) + let sx = Int64.of_int @@ Access.sizeof x in + c :: acc, Partition.singleton x.off sx x in + Logs.debug (fun m -> + m "%s: partition %a, access %a: %a%!" + __FUNCTION__ Range.pp rc Range.pp rx Allen.pp a); + merge acc p xs in (* pre: each access list is sorted *) Map.filter_map m ~f:(function | [] -> None | x :: xs -> - let p = new_part x.off (sizeof_access x) x in + let sx = Int64.of_int @@ Access.sizeof x in + let p = Partition.singleton x.off sx x in Some (merge [] p xs)) (* Turn each partition into a concrete slot. *) @@ -143,25 +183,34 @@ end = struct Map.to_sequence parts |> Seq.filter_map ~f:(fun (base, ps) -> Map.find slots base |> Option.map ~f:(fun s -> base, ps, s)) |> Context.Seq.fold ~init:Scalar.Map.empty ~f:(fun init (base, ps, s) -> - Seq.of_list ps |> Seq.filter ~f:(not @. is_entire_slot s) |> + Seq.of_list ps |> Seq.filter ~f:(not @. Partition.is_entire_slot s) |> Context.Seq.fold ~init ~f:(fun acc p -> let open Context.Syntax in + let size = Int64.to_int_exn p.size in (* TODO: look through `p.mems` and see if there is a store that is larger than other acesses (i.e. `st.l` followed by one or more `ld.w`). If so, this partition could be broken down further if we modify the store instruction(s). *) let* x = Context.Var.fresh in - let*? s = Slot.create x ~size:p.size ~align:p.size in + let*? s = Slot.create x ~size ~align:size in Logs.debug (fun m -> - m "%s: new slot %a, base=%a, off=0x%Lx, size=%d%!" + m "%s: new slot %a, base=%a, off=0x%Lx, size=%Ld%!" __FUNCTION__ Var.pp x Var.pp base p.off p.size); !!(Map.set acc ~key:(base, p.off) ~data:s))) (* Find the corresponding partition for [base+off, base+off+size). *) - let find_partition (parts : partitions) base off size = - Map.find parts base |> - Option.bind ~f:(List.find ~f:(fun p -> - within off size p.off p.size)) + let find_partition (parts : 'a partitions) base off size = + Map.find parts base |> Option.bind ~f:(fun ps -> + let r = {lo = off; hi = Int64.(off + of_int size)} in + List.find ps ~f:(fun (p : 'a partition) -> + let rp = Partition.range p in + let a = Algebra.relate r rp in + Logs.debug (fun m -> + m "%s: relating %a to %a: %a%!" + __FUNCTION__ Range.pp r Range.pp rp Allen.pp a); + match a with + | Starts | During | Finishes | Equal -> true + | _ -> false)) (* Exact cover for a scalar at `base + off`. *) let rewrite_insn_exact (m : scalars) i ~exact ~base ~off = @@ -257,7 +306,8 @@ end = struct Var.pp key (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") - (fun ppf -> Format.fprintf ppf " %a" pp_partition)) + (fun ppf -> Format.fprintf ppf " %a" + (Partition.pp Access.pp))) data)) (Map.to_alist parts)) From 492de752ca6abc06a661c5fe1cdc356c4a975eb5 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 25 Nov 2025 01:39:52 -0500 Subject: [PATCH 48/62] Small fixes for jump tables - Unsigned comparisons (need to do this in other places in isel) - the `sub` in jmptbl may be larger than `INT32_MAX` --- src/lib/machine/x86/x86_amd64_isel.ml | 34 ++++++-- src/test/data/opt/switchcaseprop.vir | 19 +++++ src/test/data/opt/switchcaseprop.vir.opt | 21 ++++- .../opt/switchcaseprop.vir.opt.sysv.amd64 | 84 +++++++++++++------ 4 files changed, 120 insertions(+), 38 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_isel.ml b/src/lib/machine/x86/x86_amd64_isel.ml index 469dcb3d..d8fec5c7 100644 --- a/src/lib/machine/x86/x86_amd64_isel.ml +++ b/src/lib/machine/x86/x86_amd64_isel.ml @@ -58,6 +58,14 @@ let can_lea_ty = function | `i16 | `i32 | `i64 -> true | _ -> false +let compare_u a b = + Int64.(compare (a lxor min_value) (b lxor min_value)) + +let lt_u a b = compare_u a b < 0 [@@inline] [@@ocaml.warning "-32"] +let le_u a b = compare_u a b <= 0 [@@inline] [@@ocaml.warning "-32"] +let gt_u a b = compare_u a b > 0 [@@inline] [@@ocaml.warning "-32"] +let ge_u a b = compare_u a b >= 0 [@@inline] [@@ocaml.warning "-32"] + (* pre: `tbl` is non-empty TODO: @@ -75,9 +83,10 @@ let adjust_table d tbl = let acc = Vec.create () in let _ = List.fold tbl ~init:lowest ~f:(fun p (v, l) -> let diff = Int64.(v - p) in - for _ = 0 to Int64.to_int_trunc diff - 1 do - Vec.push acc d; - done; + let rec loop i = if lt_u i diff then + let () = Vec.push acc d in + loop @@ Int64.succ i in + loop 0L; Vec.push acc l; Int64.succ v) in Vec.to_list acc, lowest, highest @@ -1954,6 +1963,10 @@ end = struct let tbl, lowest, highest = adjust_table d tbl in let highest' = Int64.(highest - lowest) in let diff = Int64.(highest - highest') in + let diff_fits = fits_int32_pos diff in + let* tdiff = if diff_fits then !!None else + let+ r = C.Var.fresh >>| Rv.var GPR in + Some r in let* tl, tladdr = fresh_label_addr in let* tbase = C.Var.fresh >>| Rv.var GPR in let* tidx = C.Var.fresh >>| Rv.var GPR in @@ -1981,11 +1994,16 @@ end = struct I.mov (Oreg (tidx, xt)) (Oreg (x, xt)) ]; (* Subtract the difference from the index if needed. *) - ( if Int64.(diff = 1L) - then [I.dec (Oreg (tidx, `i64))] - else if Int64.(diff > 0L) - then [I.sub (Oreg (tidx, `i64)) (Oimm (diff, `i64))] - else [] + ( match diff with + | 0L -> [] + | 1L -> [I.dec (Oreg (tidx, `i64))] + | _ when diff_fits -> + [I.sub (Oreg (tidx, `i64)) (Oimm (diff, `i64))] + | _ -> + let tdiff = Option.value_exn tdiff in [ + I.mov (Oreg (tdiff, `i64)) (Oimm (diff, `i64)); + I.sub (Oreg (tidx, `i64)) (Oreg (tdiff, `i64)); + ] ); [ (* Compare against highest value. *) I.cmp (Oreg (tidx, `i64)) (Oimm (highest', `i64)); diff --git a/src/test/data/opt/switchcaseprop.vir b/src/test/data/opt/switchcaseprop.vir index 6ef49f61..1c80f737 100644 --- a/src/test/data/opt/switchcaseprop.vir +++ b/src/test/data/opt/switchcaseprop.vir @@ -17,3 +17,22 @@ export function w $foo(w %x) { %x = add.w %x, 1_w ret %x } + +export function w $bar(w %x) { +@start: + switch.w %x, @default [0x2_w -> @one, + 0x3_w -> @two, + 0x4_w -> @three, + 0x5_w -> @three] +@default: + ret %x +@one: + %x = add.w %x, 1_w + ret %x +@two: + %x = add.w %x, 1_w + ret %x +@three: + %x = add.w %x, 1_w + ret %x +} diff --git a/src/test/data/opt/switchcaseprop.vir.opt b/src/test/data/opt/switchcaseprop.vir.opt index e75f970d..249a0cfb 100644 --- a/src/test/data/opt/switchcaseprop.vir.opt +++ b/src/test/data/opt/switchcaseprop.vir.opt @@ -2,9 +2,22 @@ module foo export function w $foo(w %x) { @2: - switch.w %x, @10(%x) [0x1_w -> @10(0x2_w), - 0x2_w -> @10(0x3_w), - 0x3_w -> @10(0x4_w)] -@10(%0): + switch.w %x, @18(%x) [0x1_w -> @18(0x2_w), + 0x2_w -> @18(0x3_w), + 0x3_w -> @18(0x4_w)] +@18(%0): ret %0 } + +export function w $bar(w %x) { +@10: + switch.w %x, @19(%x) [0x2_w -> @19(0x3_w), + 0x3_w -> @19(0x4_w), + 0x4_w -> @14, + 0x5_w -> @14] +@14: + %2 = add.w %x, 0x1_w ; @20 + jmp @19(%2) +@19(%1): + ret %1 +} diff --git a/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 b/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 index c71df64e..4311036e 100644 --- a/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 +++ b/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 @@ -2,31 +2,63 @@ module foo export function $foo { ; returns: rax @2: - mov %x:w, edi ; @26 - test %x:w, %x:w ; @27 - je @11 ; @28 - mov %2:w, %x:w ; zx ; @29 - dec %2:l ; @30 - cmp %2:l, 0x2_l ; @31 - ja @11 ; @32 - lea %1:l, qword ptr [rip + @25] ; @33 - movsxd %3:l, dword ptr [%1 + %2*4] ; @34 - add %3:l, %1:l ; @35 - jmp %3:l ; @36 - .tbl @25 [@12, @13, @14] ; @37 -@11: - mov %0:w, %x:w ; @23 - jmp @10 ; @24 -@12: - mov %0:w, 0x2_w ; @21 - jmp @10 ; @22 -@13: - mov %0:w, 0x3_w ; @19 - jmp @10 ; @20 -@14: - mov %0:w, 0x4_w ; @17 - jmp @10 ; @18 + mov %x:w, edi ; @36 + test %x:w, %x:w ; @37 + je @21 ; @38 + mov %4:w, %x:w ; zx ; @39 + dec %4:l ; @40 + cmp %4:l, 0x2_l ; @41 + ja @21 ; @42 + lea %3:l, qword ptr [rip + @35] ; @43 + movsxd %5:l, dword ptr [%3 + %4*4] ; @44 + add %5:l, %3:l ; @45 + jmp %5:l ; @46 + .tbl @35 [@22, @23, @24] ; @47 +@21: + mov %0:w, %x:w ; @33 + jmp @18 ; @34 +@22: + mov %0:w, 0x2_w ; @31 + jmp @18 ; @32 +@23: + mov %0:w, 0x3_w ; @29 + jmp @18 ; @30 +@24: + mov %0:w, 0x4_w ; @27 + jmp @18 ; @28 +@18: + mov eax, %0:w ; @25 + ret ; @26 +} + +export function $bar { ; returns: rax @10: - mov eax, %0:w ; @15 - ret ; @16 + mov %x:w, edi ; @62 + cmp %x:w, 0x2_w ; @63 + jb @48 ; @64 + mov %7:w, %x:w ; zx ; @65 + sub %7:l, 0x2_l ; @66 + cmp %7:l, 0x3_l ; @67 + ja @48 ; @68 + lea %6:l, qword ptr [rip + @61] ; @69 + movsxd %8:l, dword ptr [%6 + %7*4] ; @70 + add %8:l, %6:l ; @71 + jmp %8:l ; @72 + .tbl @61 [@49, @50, @14, @14] ; @73 +@48: + mov %1:w, %x:w ; @59 + jmp @19 ; @60 +@49: + mov %1:w, 0x3_w ; @57 + jmp @19 ; @58 +@50: + mov %1:w, 0x4_w ; @55 + jmp @19 ; @56 +@14: + lea %2:w, qword ptr [%x + 0x1] ; @20 + mov %1:w, %2:w ; @53 + jmp @19 ; @54 +@19: + mov eax, %1:w ; @51 + ret ; @52 } From ca108f0f7af00d672c5300c881cde435a0e55003 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Tue, 25 Nov 2025 23:19:53 -0500 Subject: [PATCH 49/62] Remove `fmt` from test_opt Just compare them raw. We still only produce a diff when there is an actual failure, because it is of course slower than checking equality on strings. --- src/test/test_opt.ml | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index 3f911c0c..18350f3d 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -2,17 +2,6 @@ open Core open OUnit2 open Cgen -let fmt s = - (* Ignore lines starting with a double comment *) - let s = - String.split_lines s |> List.filter ~f:(fun ln -> - not @@ String.is_prefix ln ~prefix:";;") |> - String.concat in - (* Ignore returns/newlines/tabs/spaces *) - String.filter s ~f:(function - | '\r' | '\n' | '\t' | ' ' -> false - | _ -> true) - let from_file filename = let open Context.Syntax in let* m = Parse.Virtual.from_file filename in @@ -23,14 +12,14 @@ let from_file filename = let overwrite = false let compare_outputs filename' expected p' = - if String.(fmt p' <> fmt expected) then + let expected' = String.chop_suffix_if_exists expected ~suffix:"\n" in + if String.(p' <> expected') then if overwrite then (* Assume we're being tested via `dune test`, which runs with "_build/default/test/" as the CWD. *) Out_channel.write_all ("../../../test/" ^ filename') ~data:(p' ^ "\n") else - let expected = String.chop_suffix_if_exists expected ~suffix:"\n" in - let diff = Odiff.strings_diffs expected p' in + let diff = Odiff.strings_diffs expected' p' in let msg = Format.sprintf "Diff:\n\n%s" (Odiff.string_of_diffs diff) in assert_failure msg From 3b0ccabc8db1debbe174a336f324152ffe6291d1 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Wed, 26 Nov 2025 00:04:21 -0500 Subject: [PATCH 50/62] Peephole rule for merging adjacent blocks --- src/lib/machine/x86/x86_amd64_peephole.ml | 35 +++++++++ .../opt/vaarg1.vir.opt.sysv.amd64.regalloc | 2 - .../opt/vasum.vir.opt.sysv.amd64.regalloc | 72 +++++++++++++++++++ src/test/test_opt.ml | 1 + 4 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc diff --git a/src/lib/machine/x86/x86_amd64_peephole.ml b/src/lib/machine/x86/x86_amd64_peephole.ml index 57fbc048..cb5f0ae6 100644 --- a/src/lib/machine/x86/x86_amd64_peephole.ml +++ b/src/lib/machine/x86/x86_amd64_peephole.ml @@ -99,6 +99,40 @@ let jump_threading changed fn = | insn -> insn)) else fn +let is_merge_candidate cfg b1 b2 = + match Seq.hd @@ Blk.insns ~rev:true b1 with + | Some i when is_barrier (Insn.insn i) -> false + | Some _ | None -> + let l1 = Blk.label b1 and l2 = Blk.label b2 in Seq.( + equal Label.equal (Cfg.Node.succs l1 cfg) (singleton l2) && + equal Label.equal (Cfg.Node.preds l2 cfg) (singleton l1)) + +(* find m l = None ==> no change + find m l = Some None ==> delete + find m l = Some (Some b) ==> replace *) +let collect_merge_blks fn = + let cfg = Cfg.create ~is_barrier ~dests fn in + let rec go m = function + | [] | [_] -> m + | b1 :: b2 :: rest when is_merge_candidate cfg b1 b2 -> + let label = Blk.label b1 in + let insns = Seq.(to_list @@ append (Blk.insns b1) (Blk.insns b2)) in + let b1' = Blk.create ~label ~insns in + let m = Ltree.add_exn m ~key:label ~data:(Some b1') in + let m = Ltree.add_exn m ~key:(Blk.label b2) ~data:None in + go m rest + | _ :: rest -> go m rest in + Func.blks fn |> Seq.to_list |> go Ltree.empty + +let merge_blks changed fn = + let m = collect_merge_blks fn in + if Ltree.is_empty m then fn else + let () = changed := true in + Func.blks fn |> Seq.filter_map ~f:(fun b -> + match Ltree.find m @@ Blk.label b with + | None -> Some b | Some b' -> b') |> + Seq.to_list |> Func.with_blks fn + (* Remove blocks that are not reachable from the entry block. *) let remove_disjoint changed fn = let reachable = with_return @@ fun {return} -> @@ -385,6 +419,7 @@ let run fn = let changed = ref false in let fn = jump_threading changed fn in let fn = remove_disjoint changed fn in + let fn = merge_blks changed fn in let afters = Func.collect_afters fn in let fn = invert_branches changed afters fn in let fn = implicit_fallthroughs changed afters fn in diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index b05d37c8..eeafde73 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -25,7 +25,6 @@ export function $foo { ; returns: rax mov dword ptr [rbp - 0x10], 0x8_w ; @53 mov dword ptr [rbp - 0xc], 0x30_w ; @55 lea rax, qword ptr [rbp + 0x10] ; @56 -@63: mov ecx, dword ptr [rbp - 0x10] ; @70 cmp ecx, 0x28_w ; @107 jbe @64 ; @108 @@ -42,7 +41,6 @@ export function $foo { ; returns: rax mov rax, qword ptr [rax] ; @87 mov qword ptr [rbp - 0x10], rax ; @88 mov rcx, qword ptr [rdx] ; @89 -@61: movsd xmm0, qword ptr [rbp - 0x10] ; @6 addsd xmm0, qword ptr [rip + @97] ; @98 .fp64 @97, 1.234 ; @99 diff --git a/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc new file mode 100644 index 00000000..7e857eef --- /dev/null +++ b/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc @@ -0,0 +1,72 @@ +module vasum + +export function $sum { ; returns: rax +@19: + push rbp ; @127 + mov rbp, rsp ; @128 + sub rsp, 0xc0_l ; @129 + mov r10d, edi ; @111 + mov qword ptr [rbp - 0xa8], rsi ; @21 + mov qword ptr [rbp - 0xa0], rdx ; @23 + mov qword ptr [rbp - 0x98], rcx ; @25 + mov qword ptr [rbp - 0x90], r8 ; @27 + mov qword ptr [rbp - 0x88], r9 ; @29 + test al, al ; @106 + je @2 ; @107 +@18: + movdqa xmmword ptr [rbp - 0x80], xmm0 ; @32 + movdqa xmmword ptr [rbp - 0x70], xmm1 ; @34 + movdqa xmmword ptr [rbp - 0x60], xmm2 ; @36 + movdqa xmmword ptr [rbp - 0x50], xmm3 ; @38 + movdqa xmmword ptr [rbp - 0x40], xmm4 ; @40 + movdqa xmmword ptr [rbp - 0x30], xmm5 ; @42 + movdqa xmmword ptr [rbp - 0x20], xmm6 ; @44 + movdqa xmmword ptr [rbp - 0x10], xmm7 ; @46 +@2: + lea rax, qword ptr [rbp + 0x10] ; @50 + mov r8d, 0x8_w ; @101 + xor edx, edx ; @102 + xor ecx, ecx ; @103 +@3: + cmp ecx, r10d ; @95 + jb @56 ; @96 +@8: + mov eax, edx ; @93 + leave ; @130 + ret ; @94 +@56: + cmp r8d, 0x28_w ; @87 + jbe @57 ; @88 +@58: + lea rsi, qword ptr [rax + 0x8] ; @70 + mov rdi, rax ; @85 + jmp @59 ; @86 +@57: + lea rdi, qword ptr [rbp + r8*1 - 0xb0] ; @65 + add r8d, 0x8_w ; @66 + mov rsi, rax ; @79 +@59: + add edx, dword ptr [rdi] ; @16 + inc ecx ; @17 + mov rax, rsi ; @73 + jmp @3 ; @77 +} + +export function $twenty_eight { ; returns: rax +@13: + push rbp ; @131 + mov rbp, rsp ; @132 + mov edi, 0x7_w ; @14 + mov esi, 0x1_w ; @115 + mov edx, 0x2_w ; @116 + mov ecx, 0x3_w ; @117 + mov r8d, 0x4_w ; @118 + mov r9d, 0x5_w ; @119 + sub rsp, 0x10_l ; @120 + mov dword ptr [rsp], 0x6_w ; @121 + mov dword ptr [rsp + 0x8], 0x7_w ; @122 + xor al, al ; @123 + call $sum ; r8 r9 rax rcx rdi rdx rsi ; @124 + leave ; @133 + ret ; @114 +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index 18350f3d..e2f8ca99 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -419,6 +419,7 @@ let regalloc_suite = "Test register allocation" >::: [ "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_regalloc "promote2-partial"; "Parallel moves (SysV AMD64)" >:: test_sysv_amd64_regalloc "parallel"; "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_regalloc "sumphi"; + "Variadic sum (SysV AMD64)" >:: test_sysv_amd64_regalloc "vasum"; ] let native_suite = "Test native code" >::: [ From b685e4c16e76ccce0e6f9f8c6a1d21965fd1017d Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Thu, 27 Nov 2025 01:27:59 -0500 Subject: [PATCH 51/62] Fixes targets not being initialized at exe time --- src/bin/cli.ml | 24 ++++-------------------- src/lib/machine/machine.ml | 10 ++++++++++ src/lib/machine/machine.mli | 3 +++ src/lib/target.ml | 2 +- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/bin/cli.ml b/src/bin/cli.ml index 0368c51b..cc3f90e1 100644 --- a/src/bin/cli.ml +++ b/src/bin/cli.ml @@ -1,23 +1,6 @@ open Cmdliner -(* Why is this here, when we have `Cgen.Target.find`? - - The targets should be getting declared at the toplevel in their - respective modules, but the evaluation of these modules is not - guaranteed to happen by the time we start parsing command-line - arguments. - - It just so happens that, for the front-end executable, we should - know all of the out-of-the-box targets provided by the library, - so this feels like a tolerable compromise in the design of extending - the supported targets. -*) -let targets = - Core.Map.of_alist_exn (module Core.String) @@ - Core.List.map ~f:(fun t -> Cgen.Target.name t, t) @@ - Cgen.Machine.[ - X86.Amd64_sysv.target; - ] +let () = Cgen.Machine.force_initialization () let bail () = exit Cmd.Exit.ok @@ -100,7 +83,8 @@ let dump_no_comment = let man_targets = `S "TARGET" :: `Pre "Supported target platforms" :: begin - Core.Map.data targets |> + Cgen.Target.enum_targets () |> + Core.Sequence.to_list |> Core.List.map ~f:(fun t -> `P (Format.asprintf "%a" Cgen.Target.pp t)) end @@ -151,7 +135,7 @@ let go f file output dump nc target log_level = let dump = match dump_of_string_opt dump with | None -> fatal "invalid dump option: %s\n%!" dump () | Some d -> d in - let target = match Core.Map.find targets target with + let target = match Cgen.Target.find target with | None -> fatal "invalid target: %s\n%!" target () | Some t -> t in f {file; output; dump; nc; target} diff --git a/src/lib/machine/machine.ml b/src/lib/machine/machine.ml index 65729865..df1d9ca0 100644 --- a/src/lib/machine/machine.ml +++ b/src/lib/machine/machine.ml @@ -1 +1,11 @@ +open Core + module X86 = X86 + +let targets = [ + X86.Amd64_sysv.target; +] + +let force_initialization () = + Logs.debug (fun m -> m "forcing initialization of targets%!"); + ignore (Sys.opaque_identity targets) diff --git a/src/lib/machine/machine.mli b/src/lib/machine/machine.mli index d842c8f8..3a890dbd 100644 --- a/src/lib/machine/machine.mli +++ b/src/lib/machine/machine.mli @@ -7,3 +7,6 @@ module X86 : sig val target : Target.t end end + +(** Ensures that all targets are registered. *) +val force_initialization : unit -> unit diff --git a/src/lib/target.ml b/src/lib/target.ml index 7cb56d66..80864f72 100644 --- a/src/lib/target.ml +++ b/src/lib/target.ml @@ -24,7 +24,7 @@ let declare ~name ~word ~little () = targets := m; t -let find = Map.find !targets +let find name = Map.find !targets name let name t = t.name let word t = t.word From 7b6d90762dd45cd0acaf2d0c70e300d3935d2fec Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Thu, 27 Nov 2025 01:56:03 -0500 Subject: [PATCH 52/62] Adds more debug logging --- src/lib/egraph/egraph.ml | 39 +++++++++++++ src/lib/egraph/egraph.mli | 2 +- src/lib/egraph/egraph_common.ml | 4 +- src/lib/egraph/egraph_rewrite.ml | 16 ++++++ src/lib/egraph/egraph_sched.ml | 57 ++++++++++++++++--- src/lib/egraph/extractor/extractor_cfg.ml | 20 ++++++- src/lib/egraph/extractor/extractor_core.ml | 16 ++++++ src/lib/isel/isel_match.ml | 3 +- src/lib/matcher.ml | 13 ++++- src/lib/matcher.mli | 5 +- src/lib/passes/egraph_opt/egraph_opt.ml | 4 +- src/lib/passes/egraph_opt/egraph_opt_rules.ml | 13 ++++- .../passes/simplify_cfg/simplify_cfg_brsel.ml | 7 +++ .../simplify_cfg/simplify_cfg_contract.ml | 21 +++++-- .../simplify_cfg/simplify_cfg_duplicate_br.ml | 5 ++ .../simplify_cfg/simplify_cfg_merge_blks.ml | 3 + .../simplify_cfg/simplify_cfg_merge_rets.ml | 4 ++ .../simplify_cfg/simplify_cfg_short_circ.ml | 15 ++++- .../simplify_cfg/simplify_cfg_tailrec.ml | 13 +++++ .../simplify_cfg_two_case_switch.ml | 5 ++ 20 files changed, 235 insertions(+), 30 deletions(-) diff --git a/src/lib/egraph/egraph.ml b/src/lib/egraph/egraph.ml index 04d27c6c..d2facdbb 100644 --- a/src/lib/egraph/egraph.ml +++ b/src/lib/egraph/egraph.ml @@ -1,4 +1,5 @@ open Core +open Regular.Std open Virtual include Egraph_common @@ -32,6 +33,43 @@ let check_ssa fn = Input.E.failf "Expected SSA form for function %s" (Func.name fn) () +let debug_dump t = + Logs.debug (fun m -> + let pp_lmoved ppf (l, s) = + Format.fprintf ppf " %a: %s%!" Label.pp l + (Iset.to_list s |> + List.to_string ~f:Id.to_string) in + let pp_sep ppf () = Format.fprintf ppf "\n" in + m "%s: lmoved:\n%a%!" + __FUNCTION__ + (Format.pp_print_list ~pp_sep pp_lmoved) + (Hashtbl.to_alist t.lmoved)); + Logs.debug (fun m -> + let pp_lmoved ppf (id, s) = + Format.fprintf ppf " %d: %s%!" id + (Lset.to_list s |> + List.to_string ~f:Label.to_string) in + let pp_sep ppf () = Format.fprintf ppf "\n" in + m "%s: imoved:\n%a%!" + __FUNCTION__ + (Format.pp_print_list ~pp_sep pp_lmoved) + (Vec.to_sequence_mutable t.imoved |> + Seq.mapi ~f:Tuple2.create |> + Seq.to_list)); + Logs.debug (fun m -> + let pp_ilbl ppf (id, l) = + Format.fprintf ppf " %d: %a%!" id + (Format.pp_print_option + ~none:(fun ppf () -> Format.fprintf ppf "") + Label.pp) (Uopt.to_option l) in + let pp_sep ppf () = Format.fprintf ppf "\n" in + m "%s: ilbl:\n%a%!" + __FUNCTION__ + (Format.pp_print_list ~pp_sep pp_ilbl) + (Vec.to_sequence_mutable t.ilbl |> + Seq.mapi ~f:Tuple2.create |> + Seq.to_list)) + let run ?(depth_limit = 6) ?(match_limit = 20) fn tenv rules = let open Context.Syntax in let*? () = check_ssa fn in @@ -39,4 +77,5 @@ let run ?(depth_limit = 6) ?(match_limit = 20) fn tenv rules = let t = init input depth_limit match_limit rules in let*? () = Builder.run t in let ex = Extractor.init t in + debug_dump t; Extractor.cfg ex diff --git a/src/lib/egraph/egraph.mli b/src/lib/egraph/egraph.mli index 43775154..5d348352 100644 --- a/src/lib/egraph/egraph.mli +++ b/src/lib/egraph/egraph.mli @@ -55,7 +55,7 @@ type rule type rules (** Compiles the rules. *) -val compile : rule list -> rules +val compile : name:string -> rule list -> rules (** [run fn tenv rules ?depth_limit ?match_limit] constructs an e-graph from a function [fn] and applies the [rules] eagerly to produce a diff --git a/src/lib/egraph/egraph_common.ml b/src/lib/egraph/egraph_common.ml index 89b0a4d8..d3fc60df 100644 --- a/src/lib/egraph/egraph_common.ml +++ b/src/lib/egraph/egraph_common.ml @@ -117,11 +117,11 @@ type t = { type egraph = t -let compile rules = +let compile ~name rules = (* XXX: the previous implementation also reversed the rule order, and now the testsuite relies on that behavior. Maybe the real fix is in changing how the extractor breaks ties. *) - Matcher.compile @@ List.rev_map rules + Matcher.compile ~name @@ List.rev_map rules ~f:(fun r -> r.pre, (r.post, r.subsume)) let find t id = Uf.find t.classes id diff --git a/src/lib/egraph/egraph_rewrite.ml b/src/lib/egraph/egraph_rewrite.ml index 0d8b9dc6..822d232a 100644 --- a/src/lib/egraph/egraph_rewrite.ml +++ b/src/lib/egraph/egraph_rewrite.ml @@ -50,6 +50,9 @@ let union ?ty t id oid = (* Called when a duplicate node is inserted. *) let duplicate ?l t id = + Logs.debug (fun m -> + m "%s: inserted already hash-consed term %d:\n %a%!" + __FUNCTION__ id (Enode.pp ~node:(node t)) (node t id)); Option.iter l ~f:(Sched.duplicate t id); id @@ -154,6 +157,14 @@ and rewrite ?ty ?l ~d t rws y = Uf.union t.classes rws.id oid; rws.id <- oid in try + let prev = rws.id in + Logs.debug (fun m -> + let s = Map.to_alist @@ Y.subst y in + m "%s: matched on rule %d for term %d\n pat: %a\n subst: %s%!" + __FUNCTION__ (Y.rule y) prev + Matcher.pp_pat (Y.pat y) + (List.to_string s ~f:(fun (x, id) -> + Format.asprintf "%s=%d" x id))); let env = Map.map (Y.subst y) ~f:(subst_info t) in let action, subsume = Y.payload y in let go env p = check env p; assemble env p in @@ -171,5 +182,10 @@ and rewrite ?ty ?l ~d t rws y = (* Rewrite is OK, integrate with the current e-class. *) let continue = not (subsume || Enode.is_const (node t oid)) in if continue then default oid else optimal oid; + Logs.debug (fun m -> + m "%s: rewrote term %d to %d, continue=%b:\n prev: %a\n curr: %a%!" + __FUNCTION__ prev oid continue + (Enode.pp ~node:(node t)) (node t prev) + (Enode.pp ~node:(node t)) (node t oid)); continue with Mismatch -> true diff --git a/src/lib/egraph/egraph_sched.ml b/src/lib/egraph/egraph_sched.ml index a61984fa..e86bda6c 100644 --- a/src/lib/egraph/egraph_sched.ml +++ b/src/lib/egraph/egraph_sched.ml @@ -49,13 +49,23 @@ let lca t a b = (* Note that `id` must be the canonical e-class. *) let move t old l id = + Logs.debug (fun m -> + let pp_old ppf old = + if List.is_empty old + then Format.fprintf ppf "" + else Format.fprintf ppf "from %s " + (List.to_string ~f:Label.to_string old) in + m "%s: moving term %d %ato %a%!" + __FUNCTION__ id pp_old old Label.pp l); add_moved t id old; set_label t id l; Hashtbl.update t.lmoved l ~f:(function | None -> Iset.singleton id | Some s -> Iset.add s id) -let mark_use t id a = add_moved t id [a] +let mark_use t id a = + Logs.debug (fun m -> m "%s: id=%d, a=%a%!" __FUNCTION__ id Label.pp a); + add_moved t id [a] (* Update when we union two nodes together. Should not be called if both IDs are the same. *) @@ -63,19 +73,30 @@ let merge t a b u = assert (a <> b); let cid = find t a in (* Link the ID to the label, along with the union ID. *) - let link ?p l = + let link ?p dir l = + Logs.debug (fun m -> + m "%s: merge dominated %s: a=%d, b=%d, cid=%d, u=%d, l=%a, p=%a%!" + __FUNCTION__ + (match dir with `left -> "left" | `right -> "right") + a b cid u Label.pp l + (Format.pp_print_option + ~none:(fun ppf () -> Format.fprintf ppf "") + Label.pp) p); Option.iter p ~f:(mark_use t cid); set_label t cid l; set_label t u l in match labelof t a, labelof t b with | None, None -> () - | None, Some pb -> link pb - | Some pa, None -> link pa + | None, Some pb -> link `right pb + | Some pa, None -> link `left pa | Some pa, Some pb when Label.(pa = pb) -> () - | Some pa, Some pb when dominates t ~parent:pb pa -> link pb ~p:pa - | Some pa, Some pb when dominates t ~parent:pa pb -> link pa ~p:pb + | Some pa, Some pb when dominates t ~parent:pb pa -> link `right pb ~p:pa + | Some pa, Some pb when dominates t ~parent:pa pb -> link `left pa ~p:pb | Some pa, Some pb -> let pc = lca t pa pb in + Logs.debug (fun m -> + m "%s: merge LCA: a=%d, b=%d, cid=%d, u=%d, pa=%a, pb=%a, pc=%a%!" + __FUNCTION__ a b cid u Label.pp pa Label.pp pb Label.pp pc); assert (cid = find t b); assert (cid = find t u); clear_label t a; @@ -94,6 +115,10 @@ let rec useof t l : enode -> unit = function useof t l @@ node t c) let default_placement t id l n = + Logs.debug (fun m -> + m "%s: placing term %d at %a:\n node: %a%!" + __FUNCTION__ id Label.pp l + (Enode.pp ~node:(node t)) n); move t [] l id; useof t l n @@ -103,15 +128,28 @@ let duplicate t id a = let cid = find t id in match labelof t cid with | Some b when Label.(b = a) -> () - | Some b when dominates t ~parent:b a -> mark_use t cid a + | Some b when dominates t ~parent:b a -> + Logs.debug (fun m -> + m "%s: %d at %a dominated by previous term %d at %a%!" + __FUNCTION__ id Label.pp a cid Label.pp b); + mark_use t cid a | Some b when dominates t ~parent:a b -> + Logs.debug (fun m -> + m "%s: %d at %a dominates previous term %d at %a%!" + __FUNCTION__ id Label.pp a cid Label.pp b); mark_use t cid b; set_label t id a | Some b -> let c = lca t a b in + Logs.debug (fun m -> + m "%s: %d at %a LCA with previous term %d at %a: %a%!" + __FUNCTION__ id Label.pp a cid Label.pp b Label.pp c); clear_label t cid; move t [a; b] c cid; | None -> + Logs.debug (fun m -> + m "%s: %d at %a has no previous term, cid=%d%!" + __FUNCTION__ id Label.pp a cid); (* This e-class wasn't moved, though it wasn't registered to begin with (even though it was hash-consed). *) default_placement t id a @@ node t id @@ -220,6 +258,11 @@ module Licm = struct let exists_in_loop t parent = List.exists ~f:(is_child_loop ~parent t) let licm_move t l l' lp id lhs = + Logs.debug (fun m -> + m "%s: LICM for term %d: l=%a, l'=%a:\n loop: %a\n node: %a%!" + __FUNCTION__ id Label.pp l Label.pp l' + Loops.pp_data (Loops.get t.input.loop lp) + (Enode.pp ~node:(node t)) (node t id)); let l' = match lhs with | None -> l' | Some x -> match partition_uses t x with diff --git a/src/lib/egraph/extractor/extractor_cfg.ml b/src/lib/egraph/extractor/extractor_cfg.ml index bc89bc02..9d6e0926 100644 --- a/src/lib/egraph/extractor/extractor_cfg.ml +++ b/src/lib/egraph/extractor/extractor_cfg.ml @@ -486,7 +486,10 @@ module Hoisting = struct Context.Seq.iter ~f:(fun (id, cid) -> match extract t id with | None -> extract_fail l id | Some e -> - Context.unless (should_skip t l id cid) @@ fun () -> + let@ () = Context.unless (should_skip t l id cid) in + Logs.debug (fun m -> + m "%s: id=%d, cid=%d was moved to l=%a, OK to hoist%!" + __FUNCTION__ id cid Label.pp l); pure t env e >>| ignore) end @@ -507,10 +510,21 @@ let reify t env l = begin match op, args with | Oset _, [E (Id {canon; _}, _, _)] when Common.is_pinned t.eg canon -> + Logs.debug (fun m -> + m "%s: pinned: id=%d, l=%a%!" + __FUNCTION__ id Label.pp l); exp t env l e - | _ -> !!() + | _ -> + Logs.debug (fun m -> + m "%s: delaying CFG extraction of id=%d, l=%a%!" + __FUNCTION__ id Label.pp l); + !!() end - | Some e -> exp t env l e + | Some e -> + Logs.debug (fun m -> + m "%s: eagerly extracting id=%d to l=%a in CFG%!" + __FUNCTION__ id Label.pp l); + exp t env l e (* Rewrite a single instruction. *) let step_insn t env i = diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml index 33bbd9f7..13c84d03 100644 --- a/src/lib/egraph/extractor/extractor_core.ml +++ b/src/lib/egraph/extractor/extractor_core.ml @@ -37,6 +37,8 @@ module Cost : sig val pure : int -> t val incr : t -> t val add : t -> t -> t + val opc : t -> Int63.t + val depth : t -> Int63.t end = struct include Int63 @@ -141,6 +143,19 @@ end = struct | Some _ | None -> term)) end +let debug_dump t = + Logs.debug (fun m -> + let pp ppf (cid, (c, n)) = + Format.fprintf ppf + " %d:\n cost:\n depth: %a\n opc: %a\n node: %a%!" + cid Int63.pp (Cost.depth c) Int63.pp (Cost.opc c) + (Enode.pp ~node:(node t.eg)) n in + m "%s: cost table:\n%a" + __FUNCTION__ + (Format.pp_print_list pp + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")) + (Hashtbl.to_alist t.table)) + let init eg = let t = { eg; @@ -149,6 +164,7 @@ let init eg = impure = Z.zero; } in Saturation.go t; + debug_dump t; t let rec must_remain_fixed op args = match (op : Enode.op) with diff --git a/src/lib/isel/isel_match.ml b/src/lib/isel/isel_match.ml index e54d1a23..32703a2c 100644 --- a/src/lib/isel/isel_match.ml +++ b/src/lib/isel/isel_match.ml @@ -66,7 +66,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* XXX: don't try this at home! The representations are exactly the same, but we need to erase the type constraints on the input. *) let pats : (Matcher.pat * callback list) list = Obj.magic rules in - let prog = Matcher.compile pats ~commute:true in + let name = Format.asprintf "%a-isel" Target.pp M.target in + let prog = Matcher.compile ~name pats ~commute:true in prog, VM.create () (* Translate a substitution we got from the matcher diff --git a/src/lib/matcher.ml b/src/lib/matcher.ml index 71af273e..be56c51d 100644 --- a/src/lib/matcher.ml +++ b/src/lib/matcher.ml @@ -260,12 +260,15 @@ module Make(M : L) = struct end type 'a program = { + name : string; rule : (pat * 'a) array; code : insn Vec.t; root : (op, label) Hashtbl.t; rmin : int array; } + let name p = p.name + let pp_program ppf p = Vec.iteri p.code ~f:(fun l i -> Format.fprintf ppf "@%d: %a\n" l pp_insn i) @@ -490,7 +493,8 @@ module Make(M : L) = struct | [] -> failwithf "compile_tree: empty sequence at rule %d" i () - let compile ?(commute = false) rules = + let compile ?(commute = false) ~name rules = + let[@alert "-deprecated"] t0 = Time.now () in let rule = Array.of_list rules in let code = Vec.create () in let rmin, root = match rules with @@ -506,7 +510,12 @@ module Make(M : L) = struct compile_tree forest i pat); let root = Hashtbl.map forest ~f:(linearize code) in compute_rmin code, root in - {rule; code; root; rmin} + let[@alert "-deprecated"] t = Time.now () in + let[@alert "-deprecated"] elapsed = Time.(Span.to_sec (diff t t0)) in + Logs.debug (fun m -> + m "%s: ruleset %s: compiled %d rules to %d instructions in %g seconds, commute=%b%!" + __FUNCTION__ name (Array.length rule) (Vec.length code) elapsed commute); + {name; rule; code; root; rmin} end let compile = Compiler.compile diff --git a/src/lib/matcher.mli b/src/lib/matcher.mli index 3f78c262..c31cb7b8 100644 --- a/src/lib/matcher.mli +++ b/src/lib/matcher.mli @@ -72,6 +72,9 @@ module Make(M : L) : sig (** A compiled VM program. *) type 'a program + (** The name of the ruleset that produced the program. *) + val name : 'a program -> string + (** Pretty-print the program. *) val pp_program : Format.formatter -> 'a program -> unit @@ -89,7 +92,7 @@ module Make(M : L) : sig and the [VM] (see below) will prioritize yielding matches in this order. *) - val compile : ?commute:bool -> (pat * 'a) list -> 'a program + val compile : ?commute:bool -> name:string -> (pat * 'a) list -> 'a program (** Returns [true] is the program is empty. *) val is_empty : 'a program -> bool diff --git a/src/lib/passes/egraph_opt/egraph_opt.ml b/src/lib/passes/egraph_opt/egraph_opt.ml index 36361e23..39b2f842 100644 --- a/src/lib/passes/egraph_opt/egraph_opt.ml +++ b/src/lib/passes/egraph_opt/egraph_opt.ml @@ -3,5 +3,5 @@ module Rules = Egraph_opt_rules -let run tenv fn = Egraph.run fn tenv Rules.all -let run_no_rules tenv fn = Egraph.run fn tenv Rules.none +let run tenv fn = Egraph.run fn tenv @@ Rules.all () +let run_no_rules tenv fn = Egraph.run fn tenv @@ Rules.none () diff --git a/src/lib/passes/egraph_opt/egraph_opt_rules.ml b/src/lib/passes/egraph_opt/egraph_opt_rules.ml index 24d8d8da..d737a530 100644 --- a/src/lib/passes/egraph_opt/egraph_opt_rules.ml +++ b/src/lib/passes/egraph_opt/egraph_opt_rules.ml @@ -2041,5 +2041,14 @@ module Groups = struct ] end -let all = Egraph.compile Groups.all -let none = Egraph.compile [] +let thunked name g = + let r = ref None in + fun () -> match !r with + | Some p -> p + | None -> + let p = Egraph.compile ~name g in + r := Some p; + p + +let all = thunked "egraph-all" Groups.all +let none = thunked "egraph-none" [] diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml b/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml index 2f3d9cb1..362f799d 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml @@ -58,8 +58,15 @@ let collect tenv env fn = let run tenv env fn = let xs = collect tenv env fn in let+ () = Context.List.iter xs ~f:(fun (b, c, l, args) -> + Logs.debug (fun m -> + m "%s: block %a, simplifying br on %a to %a%!" + __FUNCTION__ Label.pp (Blk.label b) + Var.pp c Label.pp l); let+ sels = Context.List.map args ~f:(fun (ty, y, n) -> let+ x, sel = Context.Virtual.sel ty c y n in + Logs.debug (fun m -> + m "%s: inserting %a%!" __FUNCTION__ + Insn.pp_op (Insn.op sel)); Hashtbl.set env.typs ~key:x ~data:(ty :> Type.t); `var x, sel) in let args, sels = List.unzip sels in diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml b/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml index 098bca2d..2ac42523 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml @@ -84,20 +84,29 @@ let find_loc env s : local -> local option = function | #global -> assert false | #local as l -> l -let map_dst changed env s d = match find_dst env s d with +let map_dst key changed env s d = match find_dst env s d with | Some x when equal_dst d x -> x - | Some x -> changed := true; x + | Some x -> + Logs.debug (fun m -> + m "%s: block %a: contracted %a to %a%!" + __FUNCTION__ Label.pp key pp_dst d pp_dst x); + changed := true; x | None -> d -let map_loc changed env s l = match find_loc env s l with +let map_loc key changed env s l = match find_loc env s l with | Some x when equal_local l x -> x - | Some x -> changed := true; x + | Some x -> + Logs.debug (fun m -> + m "%s: block %a: contracted %a to %a%!" + __FUNCTION__ Label.pp key pp_local l pp_local x); + changed := true; x | None -> l let contract_blks changed env (s : singles) = Hashtbl.map_inplace env.blks ~f:(fun b -> - let dst = map_dst changed env s in - let loc = map_loc changed env s in + let key = Blk.label b in + let dst = map_dst key changed env s in + let loc = map_loc key changed env s in Blk.map_ctrl b ~f:(function | (`hlt | `ret _) as x -> x | `jmp d -> `jmp (dst d) diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml b/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml index 5b549cc0..cc952caf 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml @@ -75,6 +75,11 @@ let collect env = | `br (c, y, n) -> Option.merge (true_br env c y n) (false_br env c y n) ~f:merge_br |> Option.value_map ~default:acc ~f:(fun br -> + Logs.debug (fun m -> + m "%s: block %a, simplifying c=%a, y=%a, n=%a to %a%!" + __FUNCTION__ Label.pp (Blk.label b) + Var.pp c pp_dst y pp_dst n + Ctrl.pp (br :> ctrl)); Label.Tree.set acc ~key ~data:br) | _ -> acc) diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml b/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml index ce668b8a..45c0b47e 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml @@ -49,6 +49,9 @@ and merge subst env l l' b b' = env.cfg <- Cfg.Node.remove l' env.cfg; List.iter es ~f:(fun e -> env.cfg <- Cfg.Edge.insert e env.cfg); + Logs.debug (fun m -> + m "%s: merged block %a into %a%!" + __FUNCTION__ Label.pp l' Label.pp l); try_merge ~child:l' subst env l let run env = diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml b/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml index 117bd9bc..90b7828e 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml @@ -15,6 +15,10 @@ let map_blk tbl rb b = match Hashtbl.find tbl @@ Blk.label b with let commit env tbl rb fn = let key = Blk.label rb in + Logs.debug (fun m -> + m "%s: merged returns to new block %a: %s%!" + __FUNCTION__ Label.pp key + (Hashtbl.keys tbl |> List.to_string ~f:Label.to_string)); Hashtbl.map_inplace env.blks ~f:(map_blk tbl rb); Hashtbl.set env.blks ~key ~data:rb; env.ret <- Some key; diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml b/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml index eead9500..34bcfd74 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml @@ -98,14 +98,23 @@ let redir changed env cphi = | `br (c, y, n) -> let y' = brcond env cphi c y ~f:(fun y _ -> changed := true; y) in let n' = brcond env cphi c n ~f:(fun _ n -> changed := true; n) in - if phys_equal y y' && phys_equal n n' then b - else Blk.with_ctrl b @@ `br (c, y', n') - | `jmp `label (l, args) -> + if phys_equal y y' && phys_equal n n' then b else + let () = Logs.debug (fun m -> + m "%s: block %a: simplified br c=%a, y=%a, n=%a: y'=%a, n'=%a%!" + __FUNCTION__ Label.pp (Blk.label b) + Var.pp c pp_dst y pp_dst n + pp_dst y' pp_dst n') in + Blk.with_ctrl b @@ `br (c, y', n') + | `jmp (`label (l, args) as loc) -> Option.value ~default:b begin let* y, n, i, ne = Label.Tree.find cphi l in let* x = List.nth args i >>= var_of_operand in let+ c = if ne then Hashtbl.find env.flag x else !!x in changed := true; + Logs.debug (fun m -> + m "%s: block %a: simplified jmp %a: c=%a, y=%a, n=%a%!" + __FUNCTION__ Label.pp (Blk.label b) + pp_local loc Var.pp c pp_dst y pp_dst n); Blk.with_ctrl b @@ `br (c, y, n) end | _ -> b) diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml b/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml index 796c529f..6ff05548 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml @@ -75,4 +75,17 @@ let go env fn = let* h = new_entry fn e in let*? fn = Func.with_blks fn (h :: blks) in env.start <- Blk.label h; + Logs.debug (fun m -> + let pp_call ppf (l, (args, l')) = + Format.fprintf ppf " %a:\n args: %s\n l': %a%!" + Label.pp l + (List.to_string args ~f:(fun a -> + Format.asprintf "%a" pp_operand a)) + Label.pp l' in + m "%s: transformed $%s to tailrec, h=%a:\n%a%!" + __FUNCTION__ (Func.name fn) Label.pp (Blk.label h) + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + pp_call) + (Label.Tree.to_list calls)); !!fn diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml b/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml index 2d8ef7f7..77625df6 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml @@ -17,6 +17,11 @@ let go fn = (i :> operand) (`int (v, t)) in let b = Blk.append_insn b cmp in + Logs.debug (fun m -> + m "%s: switch at %a collapsed: i=%a, c=%a, k=%a, d=%a%!" + __FUNCTION__ Label.pp (Blk.label b) + pp_operand (i :> operand) + Var.pp c pp_dst (k :> dst) pp_dst (d :> dst)); Some (Blk.with_ctrl b @@ `br (c, (k :> dst), (d :> dst))) | _ -> !!None) >>| Seq.to_list in Func.update_blks_exn fn bs From 087285d07441ca0baff433ec7bd8d9379003f5e4 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Thu, 27 Nov 2025 12:48:57 -0500 Subject: [PATCH 53/62] Adds `bsearch` test, fixes some bugs --- src/lib/machine/x86/x86_amd64_common.ml | 2 +- src/lib/machine/x86/x86_amd64_isel.ml | 10 ++- src/lib/machine/x86/x86_amd64_peephole.ml | 16 +++++ src/test/data/opt/bsearch.driver.sysv.amd64.c | 38 +++++++++++ .../data/opt/bsearch.driver.sysv.amd64.output | 13 ++++ src/test/data/opt/bsearch.vir | 40 +++++++++++ src/test/data/opt/bsearch.vir.opt | 34 ++++++++++ src/test/data/opt/bsearch.vir.opt.sysv | 34 ++++++++++ src/test/data/opt/bsearch.vir.opt.sysv.amd64 | 66 +++++++++++++++++++ .../opt/bsearch.vir.opt.sysv.amd64.regalloc | 65 ++++++++++++++++++ src/test/test_opt.ml | 23 +++---- 11 files changed, 327 insertions(+), 14 deletions(-) create mode 100644 src/test/data/opt/bsearch.driver.sysv.amd64.c create mode 100644 src/test/data/opt/bsearch.driver.sysv.amd64.output create mode 100644 src/test/data/opt/bsearch.vir create mode 100644 src/test/data/opt/bsearch.vir.opt create mode 100644 src/test/data/opt/bsearch.vir.opt.sysv create mode 100644 src/test/data/opt/bsearch.vir.opt.sysv.amd64 create mode 100644 src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc diff --git a/src/lib/machine/x86/x86_amd64_common.ml b/src/lib/machine/x86/x86_amd64_common.ml index df61f012..828d8008 100644 --- a/src/lib/machine/x86/x86_amd64_common.ml +++ b/src/lib/machine/x86/x86_amd64_common.ml @@ -685,7 +685,7 @@ module Insn = struct let unop_reads op a = match op with | CALL args -> Set.union (rset' [`rsp]) @@ - Set.union (rset_mem [a]) @@ + Set.union (rset [a]) @@ Regvar.Set.of_list args | DEC | INC diff --git a/src/lib/machine/x86/x86_amd64_isel.ml b/src/lib/machine/x86/x86_amd64_isel.ml index d8fec5c7..9ec94c4c 100644 --- a/src/lib/machine/x86/x86_amd64_isel.ml +++ b/src/lib/machine/x86/x86_amd64_isel.ml @@ -1944,11 +1944,16 @@ end = struct ] | _ -> !!None - let call_sym_x env = + let call_sym_x_y env = let*! args = S.callargs env "x" in let*! s, o = S.sym env "y" in !!![I.call args (Osym (s, o))] + let call_r_x_y env = + let*! args = S.callargs env "x" in + let*! y, yt = S.regvar env "y" in + !!![I.call args (Oreg (y, yt))] + let jmp_tbl_x_y env = let*! x, xt = S.regvar env "x" in let*! d, tbl = S.table env "y" in @@ -2336,7 +2341,8 @@ end = struct ] let call = [ - call_sym_x; + call_sym_x_y; + call_r_x_y; ] end diff --git a/src/lib/machine/x86/x86_amd64_peephole.ml b/src/lib/machine/x86/x86_amd64_peephole.ml index cb5f0ae6..af7724dc 100644 --- a/src/lib/machine/x86/x86_amd64_peephole.ml +++ b/src/lib/machine/x86/x86_amd64_peephole.ml @@ -364,6 +364,15 @@ let combinable_binop = function | CMP -> true | _ -> false +(* Combine with unary ops that don't write to their operand. *) +let combinable_unop = function + | CALL _ + | DIV + | IDIV + | IMUL1 + | MUL -> true + | _ -> false + let rset_mem' o l = List.exists l ~f:(Set.mem @@ rset [o]) let collect_mov_op fn = @@ -387,6 +396,13 @@ let collect_mov_op fn = && not (Set.mem (rset [o]) r1) -> let i = Two (op, Oreg (r1', r1t), o) in go (Ltree.set acc ~key:l ~data:i) xs + | (_, Two (MOV, Oreg (r1, _), o)) + :: (l, One (op, Oreg (r2', _))) + :: xs when combinable_unop op + && Rv.(r1 = r2') + && not (Set.mem (rset [o]) r1) -> + let i = One (op, o) in + go (Ltree.set acc ~key:l ~data:i) xs | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.c b/src/test/data/opt/bsearch.driver.sysv.amd64.c new file mode 100644 index 00000000..e7725ae2 --- /dev/null +++ b/src/test/data/opt/bsearch.driver.sysv.amd64.c @@ -0,0 +1,38 @@ +#include + +extern void *bsearch(const void *key, const void *base, size_t nel, + size_t width, int (*cmp)(const void *, const void *)); + +extern int intcmp(const void *a, const void *b); + +static void test_bsearch(const int *arr, size_t n, int key) { + const int *res = bsearch(&key, arr, n, sizeof(int), intcmp); + if (res) { + printf("found %d at index %ld, value %d\n", key, (long)(res - arr), *res); + } else { + printf("%d not found\n", key); + } +} + +int main() { + int arr[] = {-10, -3, -1, 0, 1, 2, 4, 7, 10, 25, 100}; + + size_t n = sizeof(arr) / sizeof(arr[0]); + + /* found */ + test_bsearch(arr, n, -10); + test_bsearch(arr, n, -1); + test_bsearch(arr, n, 0); + test_bsearch(arr, n, 4); + test_bsearch(arr, n, 25); + test_bsearch(arr, n, 100); + + /* not found */ + test_bsearch(arr, n, -11); + test_bsearch(arr, n, -2); + test_bsearch(arr, n, 3); + test_bsearch(arr, n, 5); + test_bsearch(arr, n, 8); + test_bsearch(arr, n, 99); + test_bsearch(arr, n, 101); +} diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.output b/src/test/data/opt/bsearch.driver.sysv.amd64.output new file mode 100644 index 00000000..a80e8202 --- /dev/null +++ b/src/test/data/opt/bsearch.driver.sysv.amd64.output @@ -0,0 +1,13 @@ +found -10 at index 0, value -10 +found -1 at index 2, value -1 +found 0 at index 3, value 0 +found 4 at index 6, value 4 +found 25 at index 9, value 25 +found 100 at index 10, value 100 +-11 not found +-2 not found +3 not found +5 not found +8 not found +99 not found +101 not found diff --git a/src/test/data/opt/bsearch.vir b/src/test/data/opt/bsearch.vir new file mode 100644 index 00000000..2ef348a8 --- /dev/null +++ b/src/test/data/opt/bsearch.vir @@ -0,0 +1,40 @@ +module bsearch + +export function w $intcmp(l %a, l %b) { +@start: + %ia = ld.w %a + %ib = ld.w %b + %k = sub.w %ia, %ib + ret %k +} + +export function l $bsearch(l %key, l %base, l %nel, l %width, l %cmp) { +@start: + jmp @hdr +@hdr: + %c = gt.l %nel, 0_l + br %c, @cmp, @null +@cmp: + %nel2 = udiv.l %nel, 2_l + %off = mul.l %width, %nel2 + %try = add.l %base, %off + %sign = call.w %cmp(%key, %try) + %c = slt.w %sign, 0_w + br %c, @less, @notless +@less: + %nel = udiv.l %nel, 2_l + jmp @hdr +@notless: + %c = sgt.w %sign, 0_w + br %c, @greater, @equal +@greater: + %base = add.l %try, %width + %ne2 = udiv.l %nel, 2_l + %ne2 = add.l %ne2, 1_l + %nel = sub.l %nel, %ne2 + jmp @hdr +@equal: + ret %try +@null: + ret 0_l +} diff --git a/src/test/data/opt/bsearch.vir.opt b/src/test/data/opt/bsearch.vir.opt new file mode 100644 index 00000000..acb1c8f1 --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt @@ -0,0 +1,34 @@ +module bsearch + +export function w $intcmp(l %a, l %b) { +@2: + %ia.1 = ld.w %a ; @3 + %ib.1 = ld.w %b ; @4 + %1 = sub.w %ia.1, %ib.1 ; @27 + ret %1 +} + +export function l $bsearch(l %key, l %base, l %nel, l %width, l %cmp) { +@6: + jmp @7(%nel, %base) +@7(%nel.1, %base.1): + %2 = ne.l %nel.1, 0x0_l ; @28 + br %2, @8, @26(0x0_l) +@8: + %3 = lsr.l %nel.1, 0x1_l ; @29 + %4 = mul.l %width, %3 ; @30 + %5 = add.l %base.1, %4 ; @31 + %sign.1 = call.w %cmp(%key, %5) ; @16 + %6 = slt.w %sign.1, 0x0_w ; @32 + br %6, @7(%3, %base.1), @12 +@12: + %7 = sgt.w %sign.1, 0x0_w ; @33 + br %7, @19, @26(%5) +@19: + %8 = add.l %5, %width ; @34 + %9 = add.l %3, 0x1_l ; @35 + %10 = sub.l %nel.1, %9 ; @36 + jmp @7(%10, %8) +@26(%0): + ret %0 +} diff --git a/src/test/data/opt/bsearch.vir.opt.sysv b/src/test/data/opt/bsearch.vir.opt.sysv new file mode 100644 index 00000000..1595ba27 --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt.sysv @@ -0,0 +1,34 @@ +module bsearch + +export function $intcmp(l %a/rdi, l %b/rsi) { +@2: + %ia.1 = ld.w %a ; @3 + %ib.1 = ld.w %b ; @4 + %1 = sub.w %ia.1, %ib.1 ; @27 + ret rax/%1 +} + +export function $bsearch(l %key/rdi, l %base/rsi, l %nel/rdx, l %width/rcx, l %cmp/r8) { +@6: + jmp @7(%nel, %base) +@7(%nel.1, %base.1): + %2 = ne.l %nel.1, 0x0_l ; @28 + br %2, @8, @26(0x0_l) +@8: + %3 = lsr.l %nel.1, 0x1_l ; @29 + %4 = mul.l %width, %3 ; @30 + %5 = add.l %base.1, %4 ; @31 + %sign.1/w/rax = call %cmp(%key/rdi, %5/rsi) ; @16 + %6 = slt.w %sign.1, 0x0_w ; @32 + br %6, @7(%3, %base.1), @12 +@12: + %7 = sgt.w %sign.1, 0x0_w ; @33 + br %7, @19, @26(%5) +@19: + %8 = add.l %5, %width ; @34 + %9 = add.l %3, 0x1_l ; @35 + %10 = sub.l %nel.1, %9 ; @36 + jmp @7(%10, %8) +@26(%0): + ret rax/%0 +} diff --git a/src/test/data/opt/bsearch.vir.opt.sysv.amd64 b/src/test/data/opt/bsearch.vir.opt.sysv.amd64 new file mode 100644 index 00000000..9c5c5a7d --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt.sysv.amd64 @@ -0,0 +1,66 @@ +module bsearch + +export function $intcmp { ; returns: rax +@2: + mov %a:l, rdi ; @3 + mov %b:l, rsi ; @40 + mov %ia.1:w, dword ptr [%a] ; @41 + mov %ib.1:w, dword ptr [%b] ; @4 + mov %1:w, %ia.1:w ; @27 + sub %1:w, %ib.1:w ; @39 + mov eax, %1:w ; @37 + ret ; @38 +} + +export function $bsearch { ; returns: rax +@6: + mov %key:l, rdi ; @77 + mov %base:l, rsi ; @78 + mov %nel:l, rdx ; @79 + mov %width:l, rcx ; @80 + mov %cmp:l, r8 ; @81 + mov %nel.1:l, %nel:l ; @82 + mov %base.1:l, %base:l ; @83 + jmp @7 ; @84 +@7: + test %nel.1:l, %nel.1:l ; @72 + jne @8 ; @73 + jmp @42 ; @74 +@42: + xor %0:w, %0:w ; @70 + jmp @26 ; @71 +@8: + mov %3:l, %nel.1:l ; @29 + shr %3:l, 0x1_b ; @69 + mov %4:l, %width:l ; @30 + imul %4:l, %3:l ; @68 + lea %5:l, qword ptr [%base.1 + %4*1] ; @31 + mov rdi, %key:l ; @16 + mov rsi, %5:l ; @65 + call %cmp:l ; rdi rsi ; @66 + mov %sign.1:w, eax ; @67 + test %sign.1:w, %sign.1:w ; @60 + js @43 ; @61 + jmp @12 ; @62 +@43: + mov %nel.1:l, %3:l ; @58 + jmp @7 ; @59 +@12: + cmp %sign.1:w, 0x0_w ; @53 + jg @19 ; @54 + jmp @44 ; @55 +@44: + mov %0:l, %5:l ; @51 + jmp @26 ; @52 +@26: + mov rax, %0:l ; @49 + ret ; @50 +@19: + lea %8:l, qword ptr [%5 + %width*1] ; @34 + lea %9:l, qword ptr [%3 + 0x1] ; @35 + mov %10:l, %nel.1:l ; @36 + sub %10:l, %9:l ; @48 + mov %nel.1:l, %10:l ; @45 + mov %base.1:l, %8:l ; @46 + jmp @7 ; @47 +} diff --git a/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc new file mode 100644 index 00000000..a717a761 --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc @@ -0,0 +1,65 @@ +module bsearch + +export function $intcmp { ; returns: rax +@2: + mov eax, dword ptr [rdi] ; @41 + sub eax, dword ptr [rsi] ; @39 + ret ; @38 +} + +export function $bsearch { ; returns: rax +@6: + push rbp ; @89 + mov rbp, rsp ; @90 + sub rsp, 0x18_l ; @91 + push r12 ; @92 + push r13 ; @93 + push r14 ; @94 + push r15 ; @95 + push rbx ; @96 + mov qword ptr [rbp - 0x18], rdi ; @85 + mov r15, rsi ; @78 + mov r13, rdx ; @79 + mov r14, rcx ; @80 + mov qword ptr [rbp - 0x10], r8 ; @86 +@7: + test r13, r13 ; @72 + jne @8 ; @73 +@42: + xor eax, eax ; @70 + jmp @26 ; @71 +@8: + mov rbx, r13 ; @29 + shr rbx, 0x1_b ; @69 + mov rax, r14 ; @30 + imul rax, rbx ; @68 + lea r12, qword ptr [r15 + rax*1] ; @31 + mov rdi, qword ptr [rbp - 0x18] ; @87 + mov rsi, r12 ; @65 + call qword ptr [rbp - 0x10] ; rdi rsi ; @66 + test eax, eax ; @60 + jns @12 ; @61 +@43: + mov r13, rbx ; @58 + jmp @7 ; @59 +@12: + cmp eax, 0x0_w ; @53 + jg @19 ; @54 +@44: + mov rax, r12 ; @51 +@26: + pop rbx ; @97 + pop r15 ; @98 + pop r14 ; @99 + pop r13 ; @100 + pop r12 ; @101 + leave ; @102 + ret ; @50 +@19: + lea r15, qword ptr [r12 + r14*1] ; @34 + lea rcx, qword ptr [rbx + 0x1] ; @35 + mov rax, r13 ; @36 + sub rax, rcx ; @48 + mov r13, rax ; @45 + jmp @7 ; @47 +} diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index e2f8ca99..c6a57f0e 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -11,8 +11,10 @@ let from_file filename = (* Toggle this to overwrite cases that differ. *) let overwrite = false -let compare_outputs filename' expected p' = - let expected' = String.chop_suffix_if_exists expected ~suffix:"\n" in +let compare_outputs ?(chop_end = true) filename' expected p' = + let expected' = if chop_end + then String.chop_suffix_if_exists expected ~suffix:"\n" + else expected in if String.(p' <> expected') then if overwrite then (* Assume we're being tested via `dune test`, which runs with @@ -20,7 +22,8 @@ let compare_outputs filename' expected p' = Out_channel.write_all ("../../../test/" ^ filename') ~data:(p' ^ "\n") else let diff = Odiff.strings_diffs expected' p' in - let msg = Format.sprintf "Diff:\n\n%s" (Odiff.string_of_diffs diff) in + let msg = Format.sprintf "Diff (%s):\n\n%s" + filename' (Odiff.string_of_diffs diff) in assert_failure msg let from_file_abi filename = @@ -173,14 +176,7 @@ let test_native target abi ext name _ = end; if Shexp_process.(eval @@ file_exists driver_output) then let contents = In_channel.read_all driver_output in - let msg = Format.asprintf - "Unequal output\n\ - ---------------------------\n\ - Got:\n%s\n\ - ---------------------------\n\ - Expected:\n%s\n" - p.stdout contents in - assert_bool msg @@ String.(contents = p.stdout) + compare_outputs ~chop_end:false driver_output contents p.stdout (* Specific ABI lowering tests. *) let test_sysv = test_abi Machine.X86.Amd64_sysv.target "sysv" @@ -312,6 +308,7 @@ let opt_suite = "Test optimizations" >::: [ "Slot coalesce 1 (full opts)" >:: test "coalesce1a"; "Bad load 1" >:: test "badload1"; "Bad load 2" >:: test "badload2"; + "Binary search" >:: test "bsearch"; ] let abi_suite = "Test ABI lowering" >::: [ @@ -344,6 +341,7 @@ let abi_suite = "Test ABI lowering" >::: [ "Collatz recursive (SysV)" >:: test_sysv "collatz_rec"; "Ackermann (SysV)" >:: test_sysv "ackermann"; "Quicksort (SysV)" >:: test_sysv "qsort"; + "Binary search (SysV)" >:: test "bsearch"; ] let isel_suite = "Test instruction selection" >::: [ @@ -376,6 +374,7 @@ let isel_suite = "Test instruction selection" >::: [ "Ackermann (SysV AMD64)" >:: test_sysv_amd64 "ackermann"; "Quicksort (SysV AMD64)" >:: test_sysv_amd64 "qsort"; "Parallel moves (SysV AMD64)" >:: test_sysv_amd64 "parallel"; + "Binary search (SysV AMD64)" >:: test_sysv_amd64 "bsearch"; ] let regalloc_suite = "Test register allocation" >::: [ @@ -420,6 +419,7 @@ let regalloc_suite = "Test register allocation" >::: [ "Parallel moves (SysV AMD64)" >:: test_sysv_amd64_regalloc "parallel"; "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_regalloc "sumphi"; "Variadic sum (SysV AMD64)" >:: test_sysv_amd64_regalloc "vasum"; + "Binary search (SysV AMD64)" >:: test_sysv_amd64_regalloc "bsearch"; ] let native_suite = "Test native code" >::: [ @@ -449,6 +449,7 @@ let native_suite = "Test native code" >::: [ "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi"; "Returning, passing, and dereferencing a struct (SysV AMD64)" >:: test_sysv_amd64_native "unref"; "Sink 1 (SysV AMD64)" >:: test_sysv_amd64_native "sink1"; + "Binary search (SysV AMD64)" >:: test_sysv_amd64_native "bsearch"; ] let () = run_test_tt_main @@ test_list [ From fee78ada38565ce1c9fc56ee7014f6b6d4021aaf Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 28 Nov 2025 00:26:28 -0500 Subject: [PATCH 54/62] Change C style --- .clang-format | 179 +++----------- Makefile | 10 +- src/lib/float_stubs.c | 218 ++++++++++++------ .../data/opt/ackermann.driver.sysv.amd64.c | 3 +- .../opt/analyze_array.driver.sysv.amd64.c | 3 +- .../data/opt/and_test.driver.sysv.amd64.c | 3 +- src/test/data/opt/bsearch.driver.sysv.amd64.c | 6 +- .../data/opt/clz_ctz_8.driver.sysv.amd64.c | 4 +- src/test/data/opt/collatz.driver.sysv.amd64.c | 3 +- .../data/opt/collatz_rec.driver.sysv.amd64.c | 6 +- .../data/opt/cpyarray.driver.sysv.amd64.c | 3 +- src/test/data/opt/gcdext.driver.sysv.amd64.c | 6 +- src/test/data/opt/int_pow.driver.sysv.amd64.c | 3 +- src/test/data/opt/nosink.driver.sysv.amd64.c | 3 +- .../data/opt/palindrome.driver.sysv.amd64.c | 3 +- src/test/data/opt/popcnt.driver.sysv.amd64.c | 3 +- src/test/data/opt/prime.driver.sysv.amd64.c | 6 +- .../opt/promote2-partial.driver.sysv.amd64.c | 6 +- src/test/data/opt/qsort.driver.sysv.amd64.c | 3 +- src/test/data/opt/sink1.driver.sysv.amd64.c | 3 +- src/test/data/opt/spill1.driver.sysv.amd64.c | 3 +- src/test/data/opt/spill2.driver.sysv.amd64.c | 3 +- src/test/data/opt/sumphi.driver.sysv.amd64.c | 3 +- src/test/data/opt/unref.driver.sysv.amd64.c | 3 +- src/test/data/opt/uremby7.driver.sysv.amd64.c | 3 +- src/test/data/opt/vaarg1.driver.sysv.amd64.c | 3 +- src/test/data/opt/vaarg2.driver.sysv.amd64.c | 3 +- src/test/data/opt/vasum.driver.sysv.amd64.c | 3 +- tools/clang-format-all.sh | 18 ++ tools/ocp-indent-all.sh | 2 +- 30 files changed, 272 insertions(+), 246 deletions(-) create mode 100755 tools/clang-format-all.sh diff --git a/.clang-format b/.clang-format index 03ce0b43..e3263f0e 100644 --- a/.clang-format +++ b/.clang-format @@ -1,149 +1,40 @@ ---- -Language: Cpp -# BasedOnStyle: LLVM -AccessModifierOffset: -2 -AlignAfterOpenBracket: Align -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: false -AlignConsecutiveBitFields: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Right -AlignOperands: Align -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortEnumsOnASingleLine: true -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: All -AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: Never -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: MultiLine -BinPackArguments: true -BinPackParameters: true +BasedOnStyle: LLVM +IndentWidth: 2 +ContinuationIndentWidth: 2 +UseTab: Never +TabWidth: 2 + +# Function definitions like: +# static int +# name(...) +# { BraceWrapping: - AfterCaseLabel: false - AfterClass: false - AfterControlStatement: Never - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - BeforeLambdaBody: false - BeforeWhile: false - IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true - SplitEmptyNamespace: true -BreakBeforeBinaryOperators: None + AfterFunction: true + AfterStruct: true + AfterClass: true + AfterNamespace: true + AfterExternBlock: true + BeforeCatch: false + BeforeElse: false + +# K&R control-flow braces (same-line) BreakBeforeBraces: Attach -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: true -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: false -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DeriveLineEnding: true -DerivePointerAlignment: false -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: true -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Priority: 2 - SortPriority: 0 - - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Priority: 3 - SortPriority: 0 - - Regex: '.*' - Priority: 1 - SortPriority: 0 -IncludeIsMainRegex: '(Test)?$' -IncludeIsMainSourceRegex: '' -IndentCaseLabels: false -IndentCaseBlocks: false -IndentGotoLabels: true -IndentPPDirectives: None -IndentExternBlock: AfterExternBlock -IndentWidth: 2 -IndentWrappedFunctionNames: false -InsertTrailingCommas: None -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: true -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBinPackProtocolList: Auto -ObjCBlockIndentWidth: 2 -ObjCBreakBeforeNestedBlockParam: true -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 19 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 -PointerAlignment: Right -ReflowComments: true -SortIncludes: true -SortUsingDeclarations: true + +# Spacing rules +SpaceBeforeParens: ControlStatements SpaceAfterCStyleCast: false -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true SpaceBeforeCpp11BracedList: false -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyBlock: false -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInConditionalStatement: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false -Standard: Latest -StatementMacros: - - Q_UNUSED - - QT_REQUIRE_VERSION -TabWidth: 8 -UseCRLF: false -UseTab: Never -WhitespaceSensitiveMacros: - - STRINGIZE - - PP_STRINGIZE - - BOOST_PP_STRINGIZE -... +SpaceBeforeAssignmentOperators: true + +# Pointer alignment +PointerAlignment: Right +# Do not wrap long lines unless needed +ColumnLimit: 80 + +# Do not cram short functions onto one line +AllowShortFunctionsOnASingleLine: None + +# Keep return type on its own line if desired +AlwaysBreakAfterDefinitionReturnType: true +AlwaysBreakAfterReturnType: None diff --git a/Makefile b/Makefile index 34afe147..180bbd9d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ SRC := ./src/ -.PHONY: build clean install uninstall test doc deps indent status-clean check-style +.PHONY: build clean install uninstall test doc deps +.PHONY: ocaml-indent clang-indent indent status-clean check-style all: install @@ -25,9 +26,14 @@ doc: deps: $(MAKE) deps -C $(SRC) -indent: +ocaml-indent: sh tools/ocp-indent-all.sh +clang-indent: + sh tools/clang-format-all.sh + +indent: ocaml-indent clang-indent + status-clean: git diff --quiet --exit-code diff --git a/src/lib/float_stubs.c b/src/lib/float_stubs.c index d271cb17..9fa568ff 100644 --- a/src/lib/float_stubs.c +++ b/src/lib/float_stubs.c @@ -12,16 +12,19 @@ #define Float_val(v) (*(float *)(Data_custom_val(v))) -intnat cgen_float32_compare_unboxed(float f, float g) { +intnat +cgen_float32_compare_unboxed(float f, float g) { return (intnat)(f > g) - (intnat)(f < g) + (intnat)(f == f) - (intnat)(g == g); } -int cgen_float32_compare_to_untagged(value f, value g) { +int +cgen_float32_compare_to_untagged(value f, value g) { return cgen_float32_compare_unboxed(Float_val(f), Float_val(g)); } -value cgen_float32_compare(value f, value g) { +value +cgen_float32_compare(value f, value g) { return Val_int(cgen_float32_compare_to_untagged(f, g)); } @@ -30,7 +33,8 @@ value cgen_float32_compare(value f, value g) { - ocaml/runtime/hash.c - ocaml/stdlib/float.ml */ -value cgen_float32_hash(value x) { +value +cgen_float32_hash(value x) { uint32_t h = caml_hash_mix_float(0, Float_val(x)); h ^= h >> 16; h *= 0x85ebca6b; @@ -41,22 +45,23 @@ value cgen_float32_hash(value x) { } static struct custom_operations cgen_float32_custom_ops = { - .identifier = (char *)"cgen_float32_custom_ops", - .finalize = custom_finalize_default, - .compare = cgen_float32_compare_to_untagged, - .hash = cgen_float32_hash, - .serialize = custom_serialize_default, - .deserialize = custom_deserialize_default, - .compare_ext = custom_compare_ext_default, + .identifier = (char *)"cgen_float32_custom_ops", + .finalize = custom_finalize_default, + .compare = cgen_float32_compare_to_untagged, + .hash = cgen_float32_hash, + .serialize = custom_serialize_default, + .deserialize = custom_deserialize_default, + .compare_ext = custom_compare_ext_default, #if OCAML_VERSION_MAJOR >= 4 && OCAML_VERSION_MINOR >= 8 - .fixed_length = NULL, + .fixed_length = NULL, #endif }; #define Alloc_float() \ caml_alloc_custom(&cgen_float32_custom_ops, sizeof(float), 0, 1) -value cgen_float32_of_float(value x) { +value +cgen_float32_of_float(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -64,21 +69,36 @@ value cgen_float32_of_float(value x) { CAMLreturn(f); } -value cgen_float_of_float32(value x) { return caml_copy_double(Float_val(x)); } -value cgen_float32_is_zero(value x) { return Val_bool(Float_val(x) == 0.0f); } -value cgen_float32_is_inf(value x) { return Val_bool(isinf(Float_val(x))); } +value +cgen_float_of_float32(value x) { + return caml_copy_double(Float_val(x)); +} +value +cgen_float32_is_zero(value x) { + return Val_bool(Float_val(x) == 0.0f); +} +value +cgen_float32_is_inf(value x) { + return Val_bool(isinf(Float_val(x))); +} -value cgen_float32_is_negative(value x) { +value +cgen_float32_is_negative(value x) { return Val_bool(signbit(Float_val(x))); } -value cgen_float32_is_nan(value x) { return Val_bool(isnan(Float_val(x))); } +value +cgen_float32_is_nan(value x) { + return Val_bool(isnan(Float_val(x))); +} -value cgen_float32_is_unordered(value x, value y) { +value +cgen_float32_is_unordered(value x, value y) { return Val_bool(isunordered(Float_val(x), Float_val(y))); } -value cgen_float32_add(value x, value y) { +value +cgen_float32_add(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -86,7 +106,8 @@ value cgen_float32_add(value x, value y) { CAMLreturn(f); } -value cgen_float32_div(value x, value y) { +value +cgen_float32_div(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -94,7 +115,8 @@ value cgen_float32_div(value x, value y) { CAMLreturn(f); } -value cgen_float32_mul(value x, value y) { +value +cgen_float32_mul(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -102,7 +124,8 @@ value cgen_float32_mul(value x, value y) { CAMLreturn(f); } -value cgen_float32_neg(value x) { +value +cgen_float32_neg(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -110,7 +133,8 @@ value cgen_float32_neg(value x) { CAMLreturn(f); } -value cgen_float32_sub(value x, value y) { +value +cgen_float32_sub(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -118,7 +142,8 @@ value cgen_float32_sub(value x, value y) { CAMLreturn(f); } -value cgen_bits_of_float32(value x) { +value +cgen_bits_of_float32(value x) { CAMLparam1(x); CAMLlocal1(i); float f = Float_val(x); @@ -126,7 +151,8 @@ value cgen_bits_of_float32(value x) { CAMLreturn(i); } -value cgen_float32_of_bits(value x) { +value +cgen_float32_of_bits(value x) { CAMLparam1(x); CAMLlocal1(f); uint32_t i = Int32_val(x); @@ -135,32 +161,47 @@ value cgen_float32_of_bits(value x) { CAMLreturn(f); } -value cgen_int8_of_float32(value x) { return Val_int((int8_t)Float_val(x)); } -value cgen_int16_of_float32(value x) { return Val_int((int16_t)Float_val(x)); } +value +cgen_int8_of_float32(value x) { + return Val_int((int8_t)Float_val(x)); +} +value +cgen_int16_of_float32(value x) { + return Val_int((int16_t)Float_val(x)); +} -value cgen_int32_of_float32(value x) { +value +cgen_int32_of_float32(value x) { return caml_copy_int32((int32_t)Float_val(x)); } -value cgen_int64_of_float32(value x) { +value +cgen_int64_of_float32(value x) { return caml_copy_int64((int64_t)Float_val(x)); } -value cgen_uint8_of_float32(value x) { return Val_int((uint8_t)Float_val(x)); } +value +cgen_uint8_of_float32(value x) { + return Val_int((uint8_t)Float_val(x)); +} -value cgen_uint16_of_float32(value x) { +value +cgen_uint16_of_float32(value x) { return Val_int((uint16_t)Float_val(x)); } -value cgen_uint32_of_float32(value x) { +value +cgen_uint32_of_float32(value x) { return caml_copy_int32((uint32_t)Float_val(x)); } -value cgen_uint64_of_float32(value x) { +value +cgen_uint64_of_float32(value x) { return caml_copy_int64((uint64_t)Float_val(x)); } -value cgen_float32_of_int8(value x) { +value +cgen_float32_of_int8(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -168,7 +209,8 @@ value cgen_float32_of_int8(value x) { CAMLreturn(f); } -value cgen_float32_of_int16(value x) { +value +cgen_float32_of_int16(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -176,7 +218,8 @@ value cgen_float32_of_int16(value x) { CAMLreturn(f); } -value cgen_float32_of_int32(value x) { +value +cgen_float32_of_int32(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -184,7 +227,8 @@ value cgen_float32_of_int32(value x) { CAMLreturn(f); } -value cgen_float32_of_int64(value x) { +value +cgen_float32_of_int64(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -192,7 +236,8 @@ value cgen_float32_of_int64(value x) { CAMLreturn(f); } -value cgen_float32_of_uint8(value x) { +value +cgen_float32_of_uint8(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -200,7 +245,8 @@ value cgen_float32_of_uint8(value x) { CAMLreturn(f); } -value cgen_float32_of_uint16(value x) { +value +cgen_float32_of_uint16(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -208,7 +254,8 @@ value cgen_float32_of_uint16(value x) { CAMLreturn(f); } -value cgen_float32_of_uint32(value x) { +value +cgen_float32_of_uint32(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -216,7 +263,8 @@ value cgen_float32_of_uint32(value x) { CAMLreturn(f); } -value cgen_float32_of_uint64(value x) { +value +cgen_float32_of_uint64(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -224,7 +272,8 @@ value cgen_float32_of_uint64(value x) { CAMLreturn(f); } -value cgen_string_of_float32(value x) { +value +cgen_string_of_float32(value x) { CAMLparam1(x); CAMLlocal1(s); char buf[MAX_FLOAT_DIGITS] = {0}; @@ -233,27 +282,33 @@ value cgen_string_of_float32(value x) { CAMLreturn(s); } -value cgen_float32_equal(value x, value y) { +value +cgen_float32_equal(value x, value y) { return Val_bool(Float_val(x) == Float_val(y)); } -value cgen_float32_ge(value x, value y) { +value +cgen_float32_ge(value x, value y) { return Val_bool(Float_val(x) >= Float_val(y)); } -value cgen_float32_gt(value x, value y) { +value +cgen_float32_gt(value x, value y) { return Val_bool(Float_val(x) > Float_val(y)); } -value cgen_float32_le(value x, value y) { +value +cgen_float32_le(value x, value y) { return Val_bool(Float_val(x) <= Float_val(y)); } -value cgen_float32_lt(value x, value y) { +value +cgen_float32_lt(value x, value y) { return Val_bool(Float_val(x) < Float_val(y)); } -value cgen_bits_of_float(value x) { +value +cgen_bits_of_float(value x) { CAMLparam1(x); CAMLlocal1(d); double f = Double_val(x); @@ -261,7 +316,8 @@ value cgen_bits_of_float(value x) { CAMLreturn(d); } -value cgen_float_of_bits(value x) { +value +cgen_float_of_bits(value x) { CAMLparam1(x); CAMLlocal1(f); uint64_t i = Int64_val(x); @@ -269,88 +325,114 @@ value cgen_float_of_bits(value x) { CAMLreturn(f); } -value cgen_int8_of_float(value x) { return Val_int((int8_t)Double_val(x)); } -value cgen_int16_of_float(value x) { return Val_int((int16_t)Double_val(x)); } +value +cgen_int8_of_float(value x) { + return Val_int((int8_t)Double_val(x)); +} +value +cgen_int16_of_float(value x) { + return Val_int((int16_t)Double_val(x)); +} -value cgen_int32_of_float(value x) { +value +cgen_int32_of_float(value x) { return caml_copy_int32((int32_t)Double_val(x)); } -value cgen_int64_of_float(value x) { +value +cgen_int64_of_float(value x) { return caml_copy_int64((int64_t)Double_val(x)); } -value cgen_uint8_of_float(value x) { return Val_int((uint8_t)Double_val(x)); } -value cgen_uint16_of_float(value x) { return Val_int((uint16_t)Double_val(x)); } +value +cgen_uint8_of_float(value x) { + return Val_int((uint8_t)Double_val(x)); +} +value +cgen_uint16_of_float(value x) { + return Val_int((uint16_t)Double_val(x)); +} -value cgen_uint32_of_float(value x) { +value +cgen_uint32_of_float(value x) { return caml_copy_int32((uint32_t)Double_val(x)); } -value cgen_uint64_of_float(value x) { +value +cgen_uint64_of_float(value x) { return caml_copy_int64((uint64_t)Double_val(x)); } -value cgen_float_of_int8(value x) { +value +cgen_float_of_int8(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int8_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_int16(value x) { +value +cgen_float_of_int16(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int16_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_int32(value x) { +value +cgen_float_of_int32(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int32_t)Int32_val(x))); CAMLreturn(f); } -value cgen_float_of_int64(value x) { +value +cgen_float_of_int64(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int64_t)Int64_val(x))); CAMLreturn(f); } -value cgen_float_of_uint8(value x) { +value +cgen_float_of_uint8(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint8_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_uint16(value x) { +value +cgen_float_of_uint16(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint16_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_uint32(value x) { +value +cgen_float_of_uint32(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint32_t)Int32_val(x))); CAMLreturn(f); } -value cgen_float_of_uint64(value x) { +value +cgen_float_of_uint64(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint64_t)Int64_val(x))); CAMLreturn(f); } -value cgen_float_is_unordered(value x, value y) { +value +cgen_float_is_unordered(value x, value y) { return Val_bool(isunordered(Double_val(x), Double_val(y))); } -value cgen_float_is_ordered(value x, value y) { +value +cgen_float_is_ordered(value x, value y) { return Val_bool(!isunordered(Double_val(x), Double_val(y))); } diff --git a/src/test/data/opt/ackermann.driver.sysv.amd64.c b/src/test/data/opt/ackermann.driver.sysv.amd64.c index a984836e..2edb4408 100644 --- a/src/test/data/opt/ackermann.driver.sysv.amd64.c +++ b/src/test/data/opt/ackermann.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int m, int n); -int main() { +int +main() { assert(foo(0, 0) == 1); assert(foo(1, 3) == 5); assert(foo(2, 2) == 7); diff --git a/src/test/data/opt/analyze_array.driver.sysv.amd64.c b/src/test/data/opt/analyze_array.driver.sysv.amd64.c index 2548db80..7bbb43a4 100644 --- a/src/test/data/opt/analyze_array.driver.sysv.amd64.c +++ b/src/test/data/opt/analyze_array.driver.sysv.amd64.c @@ -3,7 +3,8 @@ extern int analyze_array(int *arr, int n); extern int analyze_array_inl(int *arr, int n); -int main() { +int +main() { int arr1[] = {3, 1, 4, 1, 5, 9, 2}; assert(analyze_array(arr1, 7) == 229); assert(analyze_array_inl(arr1, 7) == 229); diff --git a/src/test/data/opt/and_test.driver.sysv.amd64.c b/src/test/data/opt/and_test.driver.sysv.amd64.c index 306ee73a..2bb8ea11 100644 --- a/src/test/data/opt/and_test.driver.sysv.amd64.c +++ b/src/test/data/opt/and_test.driver.sysv.amd64.c @@ -4,7 +4,8 @@ extern int foo(int x, int y); extern int bar(int x, int y); extern long baz(long x); -int main() { +int +main() { assert(foo(3, 4) == 3); assert(foo(4, 4) == 4); assert(foo(1, 4) == 1); diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.c b/src/test/data/opt/bsearch.driver.sysv.amd64.c index e7725ae2..a4d5efa6 100644 --- a/src/test/data/opt/bsearch.driver.sysv.amd64.c +++ b/src/test/data/opt/bsearch.driver.sysv.amd64.c @@ -5,7 +5,8 @@ extern void *bsearch(const void *key, const void *base, size_t nel, extern int intcmp(const void *a, const void *b); -static void test_bsearch(const int *arr, size_t n, int key) { +static void +test_bsearch(const int *arr, size_t n, int key) { const int *res = bsearch(&key, arr, n, sizeof(int), intcmp); if (res) { printf("found %d at index %ld, value %d\n", key, (long)(res - arr), *res); @@ -14,7 +15,8 @@ static void test_bsearch(const int *arr, size_t n, int key) { } } -int main() { +int +main() { int arr[] = {-10, -3, -1, 0, 1, 2, 4, 7, 10, 25, 100}; size_t n = sizeof(arr) / sizeof(arr[0]); diff --git a/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c b/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c index 1532619b..8bdd5ca2 100644 --- a/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c +++ b/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c @@ -1,10 +1,10 @@ -#include #include extern unsigned char clz8(unsigned char n); extern unsigned char ctz8(unsigned char n); -int main() { +int +main() { assert(clz8(0b00001111) == 4); assert(ctz8(0b00001111) == 0); diff --git a/src/test/data/opt/collatz.driver.sysv.amd64.c b/src/test/data/opt/collatz.driver.sysv.amd64.c index 3dd398dd..3a1e3450 100644 --- a/src/test/data/opt/collatz.driver.sysv.amd64.c +++ b/src/test/data/opt/collatz.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int n); -int main() { +int +main() { assert(foo(1) == 0); assert(foo(2) == 1); assert(foo(3) == 7); diff --git a/src/test/data/opt/collatz_rec.driver.sysv.amd64.c b/src/test/data/opt/collatz_rec.driver.sysv.amd64.c index 61ce7be7..ed1030e0 100644 --- a/src/test/data/opt/collatz_rec.driver.sysv.amd64.c +++ b/src/test/data/opt/collatz_rec.driver.sysv.amd64.c @@ -2,11 +2,13 @@ extern int foo(int n, int count); -int bar(int n) { +int +bar(int n) { return foo(n, 0); } -int main() { +int +main() { assert(bar(1) == 0); assert(bar(2) == 1); assert(bar(3) == 7); diff --git a/src/test/data/opt/cpyarray.driver.sysv.amd64.c b/src/test/data/opt/cpyarray.driver.sysv.amd64.c index b5e44d25..49735158 100644 --- a/src/test/data/opt/cpyarray.driver.sysv.amd64.c +++ b/src/test/data/opt/cpyarray.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern void foo(int *dst, int *src, unsigned int n); -int main() { +int +main() { int dst[3]; int src[3] = {1, 2, 3}; foo(dst, src, 3); diff --git a/src/test/data/opt/gcdext.driver.sysv.amd64.c b/src/test/data/opt/gcdext.driver.sysv.amd64.c index ed6172a4..1cd1b678 100644 --- a/src/test/data/opt/gcdext.driver.sysv.amd64.c +++ b/src/test/data/opt/gcdext.driver.sysv.amd64.c @@ -8,13 +8,15 @@ struct S { extern struct S gcd(int a, int b); -void test(int a, int b, int x) { +void +test(int a, int b, int x) { struct S s = gcd(a, b); assert(s.gcd == x); assert(a * s.x + b * s.y == s.gcd); } -int main() { +int +main() { test(12, 18, 6); test(30, 20, 10); test(101, 103, 1); diff --git a/src/test/data/opt/int_pow.driver.sysv.amd64.c b/src/test/data/opt/int_pow.driver.sysv.amd64.c index 764a9398..2a37a2cb 100644 --- a/src/test/data/opt/int_pow.driver.sysv.amd64.c +++ b/src/test/data/opt/int_pow.driver.sysv.amd64.c @@ -4,7 +4,8 @@ extern int64_t int_pow(int64_t base, int64_t exponent); extern int64_t int_pow_alt(int64_t base, int64_t exponent); -int main() { +int +main() { assert(int_pow(2, 10) == 1024); assert(int_pow(5, 3) == 125); assert(int_pow(7, 0) == 1); diff --git a/src/test/data/opt/nosink.driver.sysv.amd64.c b/src/test/data/opt/nosink.driver.sysv.amd64.c index c272d3f2..bed0c02d 100644 --- a/src/test/data/opt/nosink.driver.sysv.amd64.c +++ b/src/test/data/opt/nosink.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int x); -int main() { +int +main() { assert(foo(0) == 0); assert(foo(1) == 9); assert(foo(2) == 15); diff --git a/src/test/data/opt/palindrome.driver.sysv.amd64.c b/src/test/data/opt/palindrome.driver.sysv.amd64.c index 3e1ec13e..80cef3f2 100644 --- a/src/test/data/opt/palindrome.driver.sysv.amd64.c +++ b/src/test/data/opt/palindrome.driver.sysv.amd64.c @@ -3,7 +3,8 @@ bool palindrome(int n); -int main() { +int +main() { assert(palindrome(0)); assert(palindrome(1)); assert(palindrome(12321)); diff --git a/src/test/data/opt/popcnt.driver.sysv.amd64.c b/src/test/data/opt/popcnt.driver.sysv.amd64.c index f2cc0f4f..65b3711b 100644 --- a/src/test/data/opt/popcnt.driver.sysv.amd64.c +++ b/src/test/data/opt/popcnt.driver.sysv.amd64.c @@ -5,7 +5,8 @@ extern unsigned short popcnt16(unsigned short n); extern unsigned int popcnt32(unsigned int n); extern unsigned long popcnt64(unsigned long n); -int main() { +int +main() { assert(popcnt8(0xFF) == 8); assert(popcnt8(0x0F) == 4); diff --git a/src/test/data/opt/prime.driver.sysv.amd64.c b/src/test/data/opt/prime.driver.sysv.amd64.c index 4ca232e7..35abd58a 100644 --- a/src/test/data/opt/prime.driver.sysv.amd64.c +++ b/src/test/data/opt/prime.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(unsigned int x); -unsigned int nth(int n) { +unsigned int +nth(int n) { unsigned int x = 1; unsigned int i = 0; while (1) { @@ -15,7 +16,8 @@ unsigned int nth(int n) { } } -int main() { +int +main() { for (unsigned int i = 1; i <= 20; ++i) { printf("%d\n", nth(i)); } diff --git a/src/test/data/opt/promote2-partial.driver.sysv.amd64.c b/src/test/data/opt/promote2-partial.driver.sysv.amd64.c index b0c7ebcd..c15d4734 100644 --- a/src/test/data/opt/promote2-partial.driver.sysv.amd64.c +++ b/src/test/data/opt/promote2-partial.driver.sysv.amd64.c @@ -2,12 +2,14 @@ extern int foo(int a, int b); -void test(int a, int b, int x) { +void +test(int a, int b, int x) { int gcd = foo(a, b); assert(gcd == x); } -int main() { +int +main() { test(12, 18, 6); test(30, 20, 10); test(101, 103, 1); diff --git a/src/test/data/opt/qsort.driver.sysv.amd64.c b/src/test/data/opt/qsort.driver.sysv.amd64.c index b02d498e..13e3c46d 100644 --- a/src/test/data/opt/qsort.driver.sysv.amd64.c +++ b/src/test/data/opt/qsort.driver.sysv.amd64.c @@ -3,7 +3,8 @@ extern void qsort(int *arr, long low, long high); -int main() { +int +main() { int arr[] = {10, 7, 8, 9, 1, 5, 22, 59, 6, 17, 54}; int n = sizeof(arr) / sizeof(arr[0]); qsort(arr, 0, n - 1); diff --git a/src/test/data/opt/sink1.driver.sysv.amd64.c b/src/test/data/opt/sink1.driver.sysv.amd64.c index 4bfa3fd1..a8ba4927 100644 --- a/src/test/data/opt/sink1.driver.sysv.amd64.c +++ b/src/test/data/opt/sink1.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int x); -int main() { +int +main() { assert(foo(0) == 1); assert(foo(1) == 0); assert(foo(-1) == 0); diff --git a/src/test/data/opt/spill1.driver.sysv.amd64.c b/src/test/data/opt/spill1.driver.sysv.amd64.c index cb7ec92d..3bf27f70 100644 --- a/src/test/data/opt/spill1.driver.sysv.amd64.c +++ b/src/test/data/opt/spill1.driver.sysv.amd64.c @@ -3,7 +3,8 @@ // NB: do not call this with 0 extern int foo(int n); -int main() { +int +main() { assert(foo(1) == 136); assert(foo(2) == 272); assert(foo(3) == 408); diff --git a/src/test/data/opt/spill2.driver.sysv.amd64.c b/src/test/data/opt/spill2.driver.sysv.amd64.c index 2f8984d0..02451abf 100644 --- a/src/test/data/opt/spill2.driver.sysv.amd64.c +++ b/src/test/data/opt/spill2.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int *a, int *b, int *c, int *d, int n); -int main() { +int +main() { int a0[] = {}; int b0[] = {}; int c0[] = {}; diff --git a/src/test/data/opt/sumphi.driver.sysv.amd64.c b/src/test/data/opt/sumphi.driver.sysv.amd64.c index 89c0499e..4fb55013 100644 --- a/src/test/data/opt/sumphi.driver.sysv.amd64.c +++ b/src/test/data/opt/sumphi.driver.sysv.amd64.c @@ -7,7 +7,8 @@ struct t { int sumphi(struct t a, struct t b, int x); -int main() { +int +main() { struct t s1 = {3, 4}; struct t s2 = {10, -2}; int res1 = sumphi(s1, s2, -1); diff --git a/src/test/data/opt/unref.driver.sysv.amd64.c b/src/test/data/opt/unref.driver.sysv.amd64.c index 60aec190..75604f07 100644 --- a/src/test/data/opt/unref.driver.sysv.amd64.c +++ b/src/test/data/opt/unref.driver.sysv.amd64.c @@ -9,7 +9,8 @@ extern int sump(struct t *); extern struct t mkt(int, int); extern int sumt(int, int); -int main() { +int +main() { struct t t1 = mkt(1, 2); struct t t2 = mkt(3, 4); struct t t3 = mkt(5, 6); diff --git a/src/test/data/opt/uremby7.driver.sysv.amd64.c b/src/test/data/opt/uremby7.driver.sysv.amd64.c index 9c1e081a..a63d8329 100644 --- a/src/test/data/opt/uremby7.driver.sysv.amd64.c +++ b/src/test/data/opt/uremby7.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern unsigned int foo(unsigned int x); -int main() { +int +main() { assert(foo(0) == 0); assert(foo(1) == 1); assert(foo(2) == 2); diff --git a/src/test/data/opt/vaarg1.driver.sysv.amd64.c b/src/test/data/opt/vaarg1.driver.sysv.amd64.c index 9d9482f7..305ffc92 100644 --- a/src/test/data/opt/vaarg1.driver.sysv.amd64.c +++ b/src/test/data/opt/vaarg1.driver.sysv.amd64.c @@ -7,7 +7,8 @@ struct S { extern long foo(long i, ...); -int main() { +int +main() { struct S s1 = {0.0, 1}; assert(foo(1, s1) == 3); diff --git a/src/test/data/opt/vaarg2.driver.sysv.amd64.c b/src/test/data/opt/vaarg2.driver.sysv.amd64.c index 4abf0674..54bb762d 100644 --- a/src/test/data/opt/vaarg2.driver.sysv.amd64.c +++ b/src/test/data/opt/vaarg2.driver.sysv.amd64.c @@ -8,7 +8,8 @@ struct t { long l2; }; -int main() { +int +main() { struct t s = {10, 32}; assert(foo(5, s) == 47); assert(!bar(0)); diff --git a/src/test/data/opt/vasum.driver.sysv.amd64.c b/src/test/data/opt/vasum.driver.sysv.amd64.c index 74579f54..e2ee8244 100644 --- a/src/test/data/opt/vasum.driver.sysv.amd64.c +++ b/src/test/data/opt/vasum.driver.sysv.amd64.c @@ -2,6 +2,7 @@ extern int twenty_eight(void); -int main() { +int +main() { assert(twenty_eight() == 28); } diff --git a/tools/clang-format-all.sh b/tools/clang-format-all.sh new file mode 100755 index 00000000..516180d1 --- /dev/null +++ b/tools/clang-format-all.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# C/C++ sources +files_to_indent='.*\.\(c\|h\|cc\|cpp\|cxx\|hpp\)$' + +VERSION=$(clang-format --version 2>/dev/null) + +if [ -z "$VERSION" ]; then + echo "Please install clang-format" + exit 1 +else + echo "Reindenting with $VERSION" +fi + +git ls-files | grep -e "$files_to_indent" | while read file +do + clang-format -i "$file" +done diff --git a/tools/ocp-indent-all.sh b/tools/ocp-indent-all.sh index 1020d842..a3564f73 100755 --- a/tools/ocp-indent-all.sh +++ b/tools/ocp-indent-all.sh @@ -1,6 +1,6 @@ #!/bin/sh -# a regex for files to indent +# OCaml sources files_to_indent='.*\.\(ml\|mli\)$' VERSION=$(ocp-indent --version) From 29e81b896986829e9dc90e96587df032260be32a Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 28 Nov 2025 01:05:07 -0500 Subject: [PATCH 55/62] silence warnings --- src/lib/machine/x86/x86_amd64_regalloc.ml | 8 ++++---- src/lib/passes/coalesce_slots/coalesce_slots_impl.ml | 2 +- src/lib/passes/sroa/sroa_impl.ml | 2 +- src/lib/scalars.ml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib/machine/x86/x86_amd64_regalloc.ml b/src/lib/machine/x86/x86_amd64_regalloc.ml index 9c4c75d7..ac44aad1 100644 --- a/src/lib/machine/x86/x86_amd64_regalloc.ml +++ b/src/lib/machine/x86/x86_amd64_regalloc.ml @@ -438,21 +438,21 @@ module Post_assign_slots = struct | Some o -> Abd (base, Int32.of_int_exn o) | None -> a - let assign_abd find base a b _d = match find b with + let assign_abd find _base a b _d = match find b with | None -> a | Some _ -> assert false - let assign_abis find base a b i _s = match find b, find i with + let assign_abis find _base a b i _s = match find b, find i with | None, None -> a | Some _, None -> assert false | None, Some _ -> assert false | Some _, Some _ -> assert false - let assign_aisd find base a i _s _d = match find i with + let assign_aisd find _base a i _s _d = match find i with | None -> a | Some _ -> assert false - let assign_abisd find base a b i _s _d = match find b, find i with + let assign_abisd find _base a b i _s _d = match find b, find i with | None, None -> a | None, Some _ -> assert false | Some _, Some _ -> assert false diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index b2ae86ae..38d9c75e 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -174,7 +174,7 @@ let partition slots rs = (* invariant: a group is never empty *) let canon_elt g = List.max_elt g ~compare:size_priority |> Option.value_exn -let make_subst slots p = +let make_subst _slots p = List.fold p ~init:Var.Map.empty ~f:(fun init -> function | [] | [_] -> init diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index d6a88fc9..a0178239 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -92,7 +92,7 @@ end = struct type accesses = access list Var.Map.t module Access = struct - type t = access + type t = access [@@ocaml.warning "-34"] let sizeof a = basic_size a.ty [@@inline] diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml index a8910ebb..5d5f720e 100644 --- a/src/lib/scalars.ml +++ b/src/lib/scalars.ml @@ -241,7 +241,7 @@ module Make(M : L) = struct [@@specialise] (* Initial constraints. *) - let initialize slots blks = + let initialize slots _blks = (* Set all slots to point to their own base address. *) let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in Label.Map.singleton Label.pseudoentry init |> From a61b39628f69b769c4397fb7994ab04b3dfd2f4a Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 28 Nov 2025 01:09:46 -0500 Subject: [PATCH 56/62] interval tree --- src/lib/dune | 2 + src/lib/interval_tree.ml | 270 ++++++++++++++++++++++++++++++++++ src/lib/interval_tree.mli | 5 + src/lib/interval_tree_intf.ml | 40 +++++ 4 files changed, 317 insertions(+) create mode 100644 src/lib/interval_tree.ml create mode 100644 src/lib/interval_tree.mli create mode 100644 src/lib/interval_tree_intf.ml diff --git a/src/lib/dune b/src/lib/dune index 38d6ce89..4e58fd24 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -31,6 +31,8 @@ hashmap hashmap_intf isel_internal + interval_tree + interval_tree_intf last_stores live_intf loops diff --git a/src/lib/interval_tree.ml b/src/lib/interval_tree.ml new file mode 100644 index 00000000..e02e08e2 --- /dev/null +++ b/src/lib/interval_tree.ml @@ -0,0 +1,270 @@ +(* Implementation borrowed from BAP *) + +open Core +open Regular.Std +open Interval_tree_intf + +module Make(I : Interval) = struct + type key = I.t [@@deriving compare, sexp] + type point = I.point [@@deriving compare, sexp] + + module P = Comparable.Make_plain(struct + type t = point [@@deriving compare, sexp] + end) + + type +'a node = { + rhs : 'a node option; + lhs : 'a node option; + key : key; + data : 'a; + height : int; + greatest : point; + least : point; + } [@@deriving fields, sexp] + + type +'a t = 'a node option [@@deriving sexp] + + let height = Option.value_map ~default:0 ~f:height + let least = Option.map ~f:least + let greatest = Option.map ~f:greatest + let empty = None + + let bound f lhs top rhs = + Option.merge lhs rhs ~f |> + Option.value_map ~default:top ~f:(f top) + + let create lhs key data rhs = + let hl = height lhs and hr = height rhs in + let mn = I.lower key and mx = I.upper key in + let ll = least lhs and lr = least rhs in + let gl = greatest lhs and gr = greatest rhs in + Some { + lhs; rhs; key; data; + height = max hl hr + 1; + least = bound P.min ll mn lr; + greatest = bound P.max gl mx gr; + } + + let singleton key data = create None key data None + + let rec min_binding = function + | Some {lhs = None; key; data; _} -> Some (key, data) + | Some {lhs; _} -> min_binding lhs + | None -> None + + let rec max_binding = function + | Some {rhs = None; key; data; _} -> Some (key, data) + | Some {rhs; _} -> max_binding rhs + | None -> None + + let bal l x d r = + let hl, hr = height l, height r in + if hl > hr + 2 then match l with + | None -> assert false + | Some t when height t.lhs >= height t.rhs -> + create t.lhs t.key t.data (create t.rhs x d r) + | Some t -> match t.rhs with + | None -> assert false + | Some rhs -> + create (create t.lhs t.key t.data rhs.lhs) + rhs.key rhs.data (create rhs.rhs x d r) + else if hr > hl + 2 then match r with + | None -> assert false + | Some t when height t.rhs >= height t.lhs -> + create (create l x d t.lhs) t.key t.data t.rhs + | Some t -> match t.lhs with + | None -> assert false + | Some lhs -> + create (create l x d lhs.lhs) lhs.key lhs.data + (create lhs.rhs t.key t.data t.rhs) + else create l x d r + + let rec add map key data = match map with + | None -> singleton key data + | Some t -> + let c = I.compare key t.key in + if c = 0 then bal map key data None + else if c < 0 + then bal (add t.lhs key data) t.key t.data t.rhs + else bal t.lhs t.key t.data (add t.rhs key data) + + let is_inside key p = + let low = I.lower key and high = I.upper key in + P.between ~low ~high p + + let lookup start a = + let open Seq.Generator in + let rec go = function + | None -> return () + | Some t when P.(a < t.least || a > t.greatest) -> return () + | Some t -> + if is_inside t.key a then + go t.lhs >>= fun () -> + yield (t.key, t.data) >>= fun () -> + go t.rhs + else + go t.lhs >>= fun () -> + go t.rhs in + start |> go |> run + + let is_dominated t r = + let open I in + P.(lower r >= lower t.key && upper r <= upper t.key) + + let has_intersections t r = + let open I in + let open P in + let p, q = lower t.key, upper t.key + and x, y = lower r, upper r in + if p <= x + then x <= q && p <= y + else p <= y && x <= q + + let can't_be_in_tree t r = + let open I in + P.(lower r > t.greatest || upper r < t.least) + + let can't_be_dominated t r = + let open I in + P.(lower r < t.least || upper r > t.greatest) + + let query ~skip_if ~take_if start r = + let open Seq.Generator in + let rec go = function + | None -> return () + | Some t when skip_if t r -> return () + | Some t when take_if t r -> + go t.lhs >>= fun () -> + yield (t.key, t.data) >>= fun () -> + go t.rhs + | Some t -> + go t.lhs >>= fun () -> + go t.rhs in + start |> go |> run + + let dominators m r = query m r + ~skip_if:can't_be_dominated + ~take_if:is_dominated + + let intersections m r = query m r + ~skip_if:can't_be_in_tree + ~take_if:has_intersections + + let dominates m r = not (Seq.is_empty (dominators m r)) + let intersects m r = not (Seq.is_empty (intersections m r)) + let contains m a = not (Seq.is_empty (lookup m a)) + + let[@tail_mod_cons] rec map m ~f = match m with + | None -> None + | Some m -> Some { + m with + lhs = (map[@tailcall false]) m.lhs ~f; + data = f m.data; + rhs = (map[@tailcall]) m.rhs ~f; + } + + let[@tail_mod_cons] rec mapi m ~f = match m with + | None -> None + | Some m -> Some { + m with + lhs = (mapi[@tailcall false]) m.lhs ~f; + data = f m.key m.data; + rhs = (mapi[@tailcall]) m.rhs ~f; + } + + let rec remove_min_binding = function + | Some {lhs = None; rhs; _} -> rhs + | Some t -> bal (remove_min_binding t.lhs) t.key t.data t.rhs + | None -> assert false + + let splice t1 t2 = match t1,t2 with + | None, t | t, None -> t + | _ -> match min_binding t2 with + | Some (key, data) -> bal t1 key data (remove_min_binding t2) + | None -> assert false + + let mem_equal x y = + let open I in + P.(lower x = lower y && upper x = upper y) + + let rec remove map mem = match map with + | None -> None + | Some t when mem_equal t.key mem -> + splice (remove t.lhs mem) (remove t.rhs mem) + | Some t -> match I.compare mem t.key with + | 1 -> bal t.lhs t.key t.data (remove t.rhs mem) + | _ -> bal (remove t.lhs mem) t.key t.data t.rhs + + let remove_if ~leave_if ~remove_if map mem = + let rec remove = function + | None -> None + | Some t when leave_if t mem -> Some t + | Some t when remove_if t mem -> + splice (remove t.lhs) (remove t.rhs) + | Some t -> + bal (remove t.lhs) t.key t.data (remove t.rhs) in + remove map + + let remove_intersections map mem = remove_if map mem + ~leave_if:can't_be_in_tree + ~remove_if:has_intersections + + let remove_dominators map mem = remove_if map mem + ~leave_if:can't_be_dominated + ~remove_if:is_dominated + + let filter_mapi map ~f = + let rec fmap = function + | None -> None + | Some t -> match f t.key t.data with + | None -> splice (fmap t.lhs) (fmap t.rhs) + | Some data -> + bal (fmap t.lhs) t.key data (fmap t.rhs) in + fmap map + + let filter_map map ~f = filter_mapi map ~f:(fun _ x -> f x) + let filter map ~f : _ t = filter_map map ~f:(fun x -> Option.some_if (f x) x) + + let to_sequence start = + let open Seq.Generator in + let rec go = function + | None -> return () + | Some t -> + go t.lhs >>= fun () -> + yield (t.key, t.data) >>= fun () -> + go t.rhs in + start |> go |> run + + module C = Container.Make(struct + type 'a t = 'a node option + + let rec fold m ~init ~f = Option.fold m ~init:init ~f:(fun acc m -> + fold m.rhs ~init:(f (fold m.lhs ~init:acc ~f) m.data) ~f) + + let rec iter m ~f = Option.iter m ~f:(fun m -> + iter m.lhs ~f; + f m.data; + iter m.rhs ~f) + + let iter = `Custom iter + let length = `Define_using_fold + end) + + let fold = C.fold + let count = C.count + let sum = C.sum + let iter = C.iter + let length = C.length + let is_empty = C.is_empty + let exists = C.exists + let mem = C.mem + let for_all = C.for_all + let find_map = C.find_map + let find = C.find + let to_list = C.to_list + let to_array = C.to_array + let min_elt = C.min_elt + let max_elt = C.max_elt + let fold_until = C.fold_until + let fold_result = C.fold_result +end diff --git a/src/lib/interval_tree.mli b/src/lib/interval_tree.mli new file mode 100644 index 00000000..7fde5076 --- /dev/null +++ b/src/lib/interval_tree.mli @@ -0,0 +1,5 @@ +open Interval_tree_intf + +module Make(I : Interval) : S + with type key := I.t + and type point := I.point diff --git a/src/lib/interval_tree_intf.ml b/src/lib/interval_tree_intf.ml new file mode 100644 index 00000000..c59e7701 --- /dev/null +++ b/src/lib/interval_tree_intf.ml @@ -0,0 +1,40 @@ +open Core +open Regular.Std + +module type Interval = sig + type t [@@deriving compare, sexp] + type point [@@deriving compare, sexp] + val lower : t -> point + val upper : t -> point +end + +module type S = sig + type 'a t [@@deriving sexp_of] + type key + type point + + val empty : 'a t + val singleton : key -> 'a -> 'a t + val least : 'a t -> point option + val greatest : 'a t -> point option + val min_binding : 'a t -> (key * 'a) option + val max_binding : 'a t -> (key * 'a) option + val add : 'a t -> key -> 'a -> 'a t + val dominators : 'a t -> key -> (key * 'a) seq + val intersections : 'a t -> key -> (key * 'a) seq + val intersects : 'a t -> key -> bool + val dominates : 'a t -> key -> bool + val contains : 'a t -> point -> bool + val lookup : 'a t -> point -> (key * 'a) seq + val map : 'a t -> f:('a -> 'b) -> 'b t + val mapi : 'a t -> f:(key -> 'a -> 'b) -> 'b t + val filter : 'a t -> f:('a -> bool) -> 'a t + val filter_map : 'a t -> f:('a -> 'b option) -> 'b t + val filter_mapi : 'a t -> f:(key -> 'a -> 'b option) -> 'b t + val remove : 'a t -> key -> 'a t + val remove_intersections : 'a t -> key -> 'a t + val remove_dominators : 'a t -> key -> 'a t + val to_sequence : 'a t -> (key * 'a) seq + + include Container.S1 with type 'a t := 'a t +end From f7a8cdc61c0a8356ed89f1b434b519a89dd53f07 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 28 Nov 2025 15:04:40 -0500 Subject: [PATCH 57/62] Use Quickcheck for float32 and bv_interval Yielded some fixes --- src/lib/bv_interval.ml | 157 ++++++------ src/lib/float_stubs.c | 4 + src/test/dune | 21 +- src/test/test_bv_interval.ml | 465 +++++++++++++++++++++++++++++++++-- src/test/test_float32.ml | 136 ++++++++++ 5 files changed, 683 insertions(+), 100 deletions(-) create mode 100644 src/test/test_float32.ml diff --git a/src/lib/bv_interval.ml b/src/lib/bv_interval.ml index 542cf621..325bee54 100644 --- a/src/lib/bv_interval.ml +++ b/src/lib/bv_interval.ml @@ -347,43 +347,36 @@ let sext t ~size = else create ~lo:(sext t.lo) ~hi:(sext t.hi) ~size let trunc t ~size = - (* This semantics is relaxed. A truncate to the same size should - just be a no-op. *) + (* Same width: no-op. *) if t.size = size then t else if t.size < size then - invalid_arg "trunc: `size` is not strictly less than `t.size`" + invalid_argf + "trunc: `size` %d is not strictly less than `size t` %d" + size t.size () + else if is_empty t then + create_empty ~size + else if is_full t then + create_full ~size + else if is_wrapped_hi t then + (* XXX: this could be more precise *) + create_full ~size else - let module B = (val Bitvec.modular t.size) in - let default un lower_div upper_div = - let lower_div, upper_div = - let lz = bv_clz lower_div t.size in - if t.size - lz > size then - let adj = B.(lower_div land pred (one lsl int Int.(size - 1))) in - B.(lower_div - adj), B.(upper_div - adj) - else lower_div, upper_div in - let w = t.size - bv_clz upper_div t.size in - if w <= size then union (create ~lo:lower_div ~hi:upper_div ~size) un - else if w = size + 1 then - let upper_div = B.(upper_div land (lnot (one lsl int size))) in - if Bv.(upper_div < lower_div) - then union (create ~lo:lower_div ~hi:upper_div ~size) un - else create_full ~size - else create_full ~size in - if is_empty t then create_empty ~size - else if is_full t then create_full ~size - else if is_wrapped_hi t then - let lz = bv_clz t.hi t.size in - if t.size - lz > size - || bv_cto t.hi t.size = size - then create_full ~size - else - let un = create - ~lo:Bv.(max_unsigned_value size) - ~hi:(Bv.extract ~hi:(size - 1) ~lo:0 t.hi) - ~size in - let upper_div = Bv.max_unsigned_value t.size in - if Bv.(t.lo = upper_div) then un else default un t.lo upper_div - else default (create_empty ~size) t.lo t.hi + let lo_big = Bv.to_bigint t.lo in + let hi_big = Bv.to_bigint t.hi in + let modulus_dst = Z.shift_left Z.one size in + let span = Z.sub hi_big lo_big in + if Z.geq span modulus_dst then + create_full ~size + else + let block_lo = Z.div lo_big modulus_dst in + let block_hi_minus1 = Z.div (Z.pred hi_big) modulus_dst in + if Z.equal block_lo block_hi_minus1 then + let lo_mod = Bv.extract ~hi:(size - 1) ~lo:0 t.lo in + let hi_mod = Bv.extract ~hi:(size - 1) ~lo:0 t.hi in + if Bv.(lo_mod = hi_mod) + then create_full ~size + else create ~lo:lo_mod ~hi:hi_mod ~size + else create_full ~size let add t1 t2 = if t1.size <> t2.size then @@ -417,9 +410,9 @@ let sub t1 t2 = else if is_full t1 || is_full t2 then create_full ~size else - let m = Bv.modulus size in - let lo = Bv.(succ ((t1.lo - t2.hi) mod m) mod m) in - let hi = Bv.((t1.hi - t2.lo) mod m) in + let module B = (val Bv.modular size) in + let lo = B.(t1.lo - t2.hi + one) in + let hi = B.(t1.hi - t2.lo) in if Bv.(lo = hi) then create_full ~size else @@ -429,6 +422,14 @@ let sub t1 t2 = then create_full ~size else x +let mul_single t1 t2 = + let size = t1.size in + match single_of t1 with + | Some s when Bv.(s = one) -> Some t2 + | Some s when Bv.(s = (ones mod modulus size)) -> + Some (sub (create_single ~value:Bv.zero ~size) t2) + | _ -> None + let mul t1 t2 = if t1.size <> t2.size then invalid_arg "mul: sizes must be equal" @@ -436,42 +437,46 @@ let mul t1 t2 = let size = t1.size in if is_empty t1 || is_empty t2 then create_empty ~size - else - let min1 = unsigned_min t1 in - let max1 = unsigned_max t1 in - let min2 = unsigned_min t2 in - let max2 = unsigned_max t2 in - let m2 = Bv.modulus (size * 2) in - let result = create - ~lo:Bv.(min1 * min2 mod m2) - ~hi:Bv.(succ (max1 * max2 mod m2) mod m2) - ~size:(size * 2) in - let ur = trunc result ~size in - let m = Bv.modulus size in - if not (is_wrapped_hi ur) - && (Bv.(signed_compare ur.hi zero m) >= 0 || - Bv.(ur.hi = min_signed_value size)) - then ur - else - let min1 = signed_min t1 in - let max1 = signed_max t1 in - let min2 = signed_min t2 in - let max2 = signed_max t2 in - let p = [| - Bv.(min1 * min2 mod m2); - Bv.(min1 * max2 mod m2); - Bv.(max1 * min2 mod m2); - Bv.(max1 * max2 mod m2); - |] in - let compare x y = Bv.signed_compare x y m2 in - let lo = Array.min_elt p ~compare in - let hi = Array.max_elt p ~compare in - let lo, hi = match lo, hi with - | None, _ | _, None -> assert false - | Some lo, Some hi -> (lo, Bv.(succ hi mod m2)) in - let result = create ~lo ~hi ~size:(size * 2) in - let sr = trunc result ~size in - if is_strictly_smaller_than ur sr then ur else sr + else match mul_single t1 t2 with + | Some t -> t + | None -> match mul_single t2 t1 with + | Some t -> t + | None -> + let min1 = unsigned_min t1 in + let max1 = unsigned_max t1 in + let min2 = unsigned_min t2 in + let max2 = unsigned_max t2 in + let m2 = Bv.modulus (size * 2) in + let result = create + ~lo:Bv.(min1 * min2 mod m2) + ~hi:Bv.(succ (max1 * max2 mod m2) mod m2) + ~size:(size * 2) in + let ur = trunc result ~size in + let m = Bv.modulus size in + if not (is_wrapped_hi ur) + && (Bv.(signed_compare ur.hi zero m) >= 0 || + Bv.(ur.hi = min_signed_value size)) + then ur + else + let min1 = signed_min t1 in + let max1 = signed_max t1 in + let min2 = signed_min t2 in + let max2 = signed_max t2 in + let p = [| + Bv.(min1 * min2 mod m2); + Bv.(min1 * max2 mod m2); + Bv.(max1 * min2 mod m2); + Bv.(max1 * max2 mod m2); + |] in + let compare x y = Bv.signed_compare x y m2 in + let lo = Array.min_elt p ~compare in + let hi = Array.max_elt p ~compare in + let lo, hi = match lo, hi with + | None, _ | _, None -> assert false + | Some lo, Some hi -> (lo, Bv.(succ hi mod m2)) in + let result = create ~lo ~hi ~size:(size * 2) in + let sr = trunc result ~size in + if is_strictly_smaller_than ur sr then ur else sr let smax t1 t2 = if t1.size <> t2.size then @@ -848,8 +853,8 @@ let logical_shift_right t1 t2 = let max2 = unsigned_max t2 in let min1 = unsigned_min t1 in let min2 = unsigned_min t2 in - let lo = Bv.(succ ((max1 lsr min2) mod m) mod m) in - let hi = Bv.((min1 lsr max2) mod m) in + let hi = Bv.(succ ((max1 lsr min2) mod m) mod m) in + let lo = Bv.((min1 lsr max2) mod m) in create ~lo ~hi ~size let arithmetic_shift_right t1 t2 = @@ -982,7 +987,7 @@ let ctz_range lo hi size = else if Bv.(lo = zero) then create ~lo:Bv.zero ~hi:(B.int (size + 1)) ~size else - let len = bv_clz B.(lo lxor succ hi) size in + let len = bv_clz B.(lo lxor pred hi) size in let hi = B.int (max (size - len - 1) (bv_ctz lo size) + 1) in create ~size ~lo:Bv.zero ~hi diff --git a/src/lib/float_stubs.c b/src/lib/float_stubs.c index 9fa568ff..f42c7d7f 100644 --- a/src/lib/float_stubs.c +++ b/src/lib/float_stubs.c @@ -73,10 +73,12 @@ value cgen_float_of_float32(value x) { return caml_copy_double(Float_val(x)); } + value cgen_float32_is_zero(value x) { return Val_bool(Float_val(x) == 0.0f); } + value cgen_float32_is_inf(value x) { return Val_bool(isinf(Float_val(x))); @@ -165,6 +167,7 @@ value cgen_int8_of_float32(value x) { return Val_int((int8_t)Float_val(x)); } + value cgen_int16_of_float32(value x) { return Val_int((int16_t)Float_val(x)); @@ -329,6 +332,7 @@ value cgen_int8_of_float(value x) { return Val_int((int8_t)Double_val(x)); } + value cgen_int16_of_float(value x) { return Val_int((int16_t)Double_val(x)); diff --git a/src/test/dune b/src/test/dune index 4ab88436..b69b7d54 100644 --- a/src/test/dune +++ b/src/test/dune @@ -1,6 +1,25 @@ (tests - (names test_allen test_bv_interval test_opt test_type) + (names test_allen test_opt test_type) (libraries ocamldiff ounit2 cgen shexp.process) + (modules test_allen test_opt test_type) (ocamlopt_flags -O2) (deps (glob_files_rec data/*))) + +(library + (name test_float32) + (modules test_float32) + (libraries cgen) + (ocamlopt_flags -O2) + (inline_tests) + (preprocess + (pps ppx_jane))) + +(library + (name test_bv_interval) + (modules test_bv_interval) + (libraries cgen) + (ocamlopt_flags -O2) + (inline_tests) + (preprocess + (pps ppx_jane))) diff --git a/src/test/test_bv_interval.ml b/src/test/test_bv_interval.ml index 1dcb6363..313817bc 100644 --- a/src/test/test_bv_interval.ml +++ b/src/test/test_bv_interval.ml @@ -1,28 +1,447 @@ open Core -open OUnit2 open Cgen module I = Bv_interval +module Q = Quickcheck -let expect x i = - let size = I.size i in - let i' = match x with - | `empty -> I.create_empty ~size - | `full -> I.create_full ~size - | `def (lo, hi) -> I.create ~lo ~hi ~size in - let msg = Format.asprintf "Expected %a, got %a" I.pp i' I.pp i in - assert_bool msg (I.equal i i') - -let test_1 _ = - let size = 32 in - let module B = (val Bv.modular size) in - let i1 = I.create ~lo:Bv.one ~hi:B.(int 5) ~size in - let i2 = I.create ~lo:B.(int 3) ~hi:B.(int 7) ~size in - expect (`def (B.int 1, B.int 7)) @@ I.union i1 i2; - expect (`def (B.int 3, B.int 5)) @@ I.intersect i1 i2 - -let suite = "Test BV intervals" >::: [ - "Test 1" >:: test_1; - ] - -let () = run_test_tt_main suite +let gen_size = + Q.Generator.small_positive_int |> + Q.Generator.filter ~f:(fun n -> n > 0 && n <= 64) + +let gen_small_size = + Q.Generator.small_positive_int |> + Q.Generator.filter ~f:(fun n -> n > 0 && n <= 8) + +let gen_bv size = + Q.Generator.map Int64.quickcheck_generator ~f:(fun x -> + Bv.(int64 x mod modulus size)) + +let gen_interval size = + Q.Generator.map (gen_bv size) ~f:(fun bv -> + I.create_single ~value:bv ~size) + +let gen_sized_interval = + Q.Generator.bind gen_size ~f:(fun size -> + gen_interval size |> Q.Generator.map + ~f:(fun t -> size, t)) + +let gen_interval_pair = + Q.Generator.bind gen_size ~f:(fun size -> + Q.Generator.both (gen_interval size) (gen_interval size) |> + Q.Generator.map ~f:(fun p -> size, p)) + +let gen_small_sized_interval = + Q.Generator.bind gen_small_size ~f:(fun size -> + gen_interval size |> Q.Generator.map + ~f:(fun t -> size, t)) + +let gen_small_interval_pair = + Q.Generator.bind gen_small_size ~f:(fun size -> + Q.Generator.both (gen_interval size) (gen_interval size) |> + Q.Generator.map ~f:(fun p -> size, p)) + +let all_bvs size = + List.init (1 lsl size) ~f:(fun n -> Bv.(int n mod modulus size)) + +let concrete_values size t = + all_bvs size |> List.filter ~f:(I.contains_value t) + +let oracle_sound_binop ~size ~interval_op ~concrete_op t1 t2 = + let vs1 = concrete_values size t1 in + let vs2 = concrete_values size t2 in + let results = List.bind vs1 ~f:(fun a -> + let ia = Bv.to_int a in + List.filter_map vs2 ~f:(fun b -> + let ib = Bv.to_int b in + concrete_op size ia ib |> + Option.map ~f:(fun n -> + Bv.(int n mod modulus size)))) in + List.is_empty results || + let r_int = interval_op t1 t2 in + List.for_all results ~f:(I.contains_value r_int) + +let oracle_sound_unop ~size ~interval_op ~concrete_op t = + let vs = concrete_values size t in + let results = List.filter_map vs ~f:(fun v -> + let i = Bv.to_int v in + concrete_op size i |> Option.map ~f:(fun r -> + Bv.(int r mod modulus size))) in + List.is_empty results || + let t_int = interval_op t in + List.for_all results ~f:(I.contains_value t_int) + +let prop_size_preserved (size, t) = Int.equal size (I.size t) + +let prop_bounds_valid (_size, t) = + let lo = I.lower t in + let hi = I.upper t in + I.is_full t || I.is_empty t || not (Bv.equal lo hi) + +let prop_empty_singleton_consistency (_size, t) = + not (I.is_empty t) || begin + Option.is_none (I.single_of t) && + not (I.is_single t) + end + +let prop_full_contains_all size = + let t = I.create_full ~size in + Q.test (gen_bv size) + ~trials:100 + ~f:(fun bv -> assert (I.contains_value t bv)); + true + +let prop_single_roundtrip size = + Q.test (gen_bv size) + ~trials:50 + ~f:(fun bv -> + let t = I.create_single ~value:bv ~size in + match I.single_of t with + | Some bv' -> assert (Bv.equal bv bv') + | None -> assert false); + true + +let prop_negative_nonnegative_disjoint (_size, t) = + not (I.is_all_negative t && I.is_all_non_negative t) + +let prop_binop_size_stable op (size, (t1, t2)) = + try + let r = op t1 t2 in + Int.equal (I.size r) size + with _ -> true + +let prop_singleton_contains (_size, t) = + match I.single_of t with + | Some v -> I.contains_value t v + | None -> true + +let prop_contains_implies_single_contains (_size, (t1, t2)) = + not (I.contains t1 t2) || match I.single_of t2 with + | None -> true + | Some v -> I.contains_value t1 v + +let prop_intersect_comm (_size, (t1, t2)) = + let r1 = I.intersect t1 t2 in + let r2 = I.intersect t2 t1 in + I.equal r1 r2 + +let prop_union_sound (_size, (t1, t2)) = + let u1 = I.union t1 t2 in + let u2 = I.union t2 t1 in + I.contains u1 t1 && + I.contains u1 t2 && + I.contains u2 t1 && + I.contains u2 t2 + +let prop_inverse_involutive (_size, t) = + I.equal (I.inverse (I.inverse t)) t + +let prop_extract_shrinks (size,t) = + if size > 1 then + let hi = size - 1 in + let lo = size / 2 in + let t' = I.extract t ~hi ~lo in + I.size t' = hi - lo + 1 + else true + +let prop_concat_size ((size1, t1), (size2, t2)) = + let t = I.concat t1 t2 in + Int.equal (I.size t) (size1 + size2) + +let mask_of_size size = (1 lsl size) - 1 +let add_mod size a b = (a + b) land mask_of_size size +let sub_mod size a b = (a - b) land mask_of_size size +let mul_mod size a b = (a * b) land mask_of_size size +let udiv_opt _size a b = if b = 0 then None else Some (a / b) +let urem_opt _size a b = if b = 0 then None else Some (a mod b) + +let to_signed size n = + let sign_bit = 1 lsl (size - 1) in + if (n land sign_bit) = 0 then n + else n - (1 lsl size) + +let sdiv_opt size a b = + if b = 0 then None else + let sa = to_signed size a in + let sb = to_signed size b in + if sb = 0 then None else + let q = sa / sb in + (* re-wrap as unsigned *) + let res = q land mask_of_size size in + Some res + +let srem_opt size a b = + if b = 0 then None else + let sa = to_signed size a in + let sb = to_signed size b in + if sb = 0 then None else + let m = Bv.modulus size in + let ba = Bv.(int sa mod m) in + let bb = Bv.(int sb mod m) in + let r = Bv.(srem ba bb mod m) in + Some (Bv.to_int r) + +let logand_mod _size a b = a land b +let logor_mod _size a b = a lor b +let logxor_mod _size a b = a lxor b + +let shl_opt size a b = + if b >= size then None else Some ((a lsl b) land mask_of_size size) + +let lshr_opt size a b = + if b >= size then None else Some (a lsr b) + +let ashr_opt size a b = + if b >= size then None else + let sa = to_signed size a in + let shifted = sa asr b in + Some (shifted land mask_of_size size) + +let popcount size a = + let a = a land mask_of_size size in + let rec loop x acc = if x = 0 then acc + else loop (x land (x - 1)) (acc + 1) in + loop a 0 + +let bv_clz x n = + let x = Bv.to_bigint x in + let rec aux i = + if i < 0 then n + else if Z.testbit x i then n - i - 1 + else aux (i - 1) in + aux (n - 1) + +let bv_ctz x n = + let x = Bv.to_bigint x in + let rec aux i = + if i >= n then n + else if Z.testbit x i then i + 1 + else aux (i + 1) in + aux 0 + +let clz size a = bv_clz Bv.(int a mod modulus size) size +let ctz size a = bv_ctz Bv.(int a mod modulus size) size + +let prop_add_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.add + ~concrete_op:(fun size a b -> Some (add_mod size a b)) + t1 t2 + +let prop_sub_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.sub + ~concrete_op:(fun size a b -> Some (sub_mod size a b)) + t1 t2 + +let prop_mul_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.mul + ~concrete_op:(fun size a b -> Some (mul_mod size a b)) + t1 t2 + +let prop_udiv_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.udiv + ~concrete_op:udiv_opt + t1 t2 + +let prop_urem_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.urem + ~concrete_op:urem_opt + t1 t2 + +let prop_sdiv_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.sdiv + ~concrete_op:sdiv_opt + t1 t2 + +let prop_srem_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.srem + ~concrete_op:srem_opt + t1 t2 + +let prop_logand_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logand + ~concrete_op:(fun size a b -> Some (logand_mod size a b)) + t1 t2 + +let prop_logor_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logor + ~concrete_op:(fun size a b -> Some (logor_mod size a b)) + t1 t2 + +let prop_logxor_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logxor + ~concrete_op:(fun size a b -> Some (logxor_mod size a b)) + t1 t2 + +let prop_lsl_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logical_shift_left + ~concrete_op:shl_opt + t1 t2 + +let prop_lsr_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logical_shift_right + ~concrete_op:lshr_opt + t1 t2 + +let prop_asr_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.arithmetic_shift_right + ~concrete_op:ashr_opt + t1 t2 + +let prop_lnot_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:I.lnot + ~concrete_op:(fun size a -> Some ((lnot a) land mask_of_size size)) + t + +let prop_neg_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:I.neg + ~concrete_op:(fun size a -> Some ((0 - a) land ((1 lsl size) - 1))) + t + +let prop_clz_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:(I.clz ~zero_is_poison:false) + ~concrete_op:(fun size a -> Some (clz size a)) + t + +let prop_ctz_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:(I.ctz ~zero_is_poison:false) + ~concrete_op:(fun size a -> Some (ctz size a)) + t + +let prop_popcnt_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:I.popcnt + ~concrete_op:(fun size a -> Some (popcount size a)) + t + +let qc1 sexp name gen prop = + match + Q.test_or_error gen + ~seed:(`Deterministic name) + ~trials:3000 + ~f:(fun x -> if prop x then Ok () else + Or_error.error_s Sexp.(List [Atom "failed"; sexp x])) + with Ok () -> () | Error e -> Error.raise e + +let sexp_binop x = [%sexp (x : int * (I.t * I.t))] +let sexp_unop x = [%sexp (x : int * I.t)] + +let%test_unit "add preserves size" = + qc1 sexp_binop "add-size" gen_interval_pair (prop_binop_size_stable I.add) + +let%test_unit "sub preserves size" = + qc1 sexp_binop "sub-size" gen_interval_pair (prop_binop_size_stable I.sub) + +let%test_unit "mul preserves size" = + qc1 sexp_binop "mul-size" gen_interval_pair (prop_binop_size_stable I.mul) + +let%test_unit "size preserved" = + qc1 sexp_unop "size" gen_sized_interval prop_size_preserved + +let%test_unit "bounds valid" = + qc1 sexp_unop "bounds" gen_sized_interval prop_bounds_valid + +let%test_unit "singleton roundtrip" = + qc1 sexp_of_int "single" gen_size prop_single_roundtrip + +let%test_unit "empty singleton consistency" = + qc1 sexp_unop "empty-single" gen_sized_interval prop_empty_singleton_consistency + +let%test_unit "intersect commutative" = + qc1 sexp_binop "intersect-comm" gen_interval_pair prop_intersect_comm + +let%test_unit "union commutative" = + qc1 sexp_binop "union-comm" gen_interval_pair prop_union_sound + +let%test_unit "inverse involutive" = + qc1 sexp_unop "inverse" gen_sized_interval prop_inverse_involutive + +let%test_unit "contains → contains single" = + qc1 sexp_binop "contains-single" gen_interval_pair prop_contains_implies_single_contains + +let%test_unit "add sound" = + qc1 sexp_binop "bv-add" gen_small_interval_pair prop_add_sound + +let%test_unit "sub sound" = + qc1 sexp_binop "bv-sub" gen_small_interval_pair prop_sub_sound + +let%test_unit "mul sound" = + qc1 sexp_binop "bv-mul" gen_small_interval_pair prop_mul_sound + +let%test_unit "udiv sound" = + qc1 sexp_binop "bv-udiv" gen_small_interval_pair prop_udiv_sound + +let%test_unit "urem sound" = + qc1 sexp_binop "bv-urem" gen_small_interval_pair prop_urem_sound + +let%test_unit "sdiv sound" = + qc1 sexp_binop "bv-sdiv" gen_small_interval_pair prop_sdiv_sound + +let%test_unit "srem sound" = + qc1 sexp_binop "bv-srem" gen_small_interval_pair prop_srem_sound + +let%test_unit "logand sound" = + qc1 sexp_binop "bv-logand" gen_small_interval_pair prop_logand_sound + +let%test_unit "logor sound" = + qc1 sexp_binop "bv-logor" gen_small_interval_pair prop_logor_sound + +let%test_unit "logxor sound" = + qc1 sexp_binop "bv-logxor" gen_small_interval_pair prop_logxor_sound + +let%test_unit "lsl sound" = + qc1 sexp_binop "bv-lsl" gen_small_interval_pair prop_lsl_sound + +let%test_unit "lsr sound" = + qc1 sexp_binop "bv-lsr" gen_small_interval_pair prop_lsr_sound + +let%test_unit "asr sound" = + qc1 sexp_binop "bv-asr" gen_small_interval_pair prop_asr_sound + +let%test_unit "neg sound" = + qc1 sexp_unop "bv-neg" gen_small_sized_interval prop_neg_sound + +let%test_unit "lnot sound" = + qc1 sexp_unop "bv-lnot" gen_small_sized_interval prop_lnot_sound + +let%test_unit "clz sound" = + qc1 sexp_unop "bv-clz" gen_small_sized_interval prop_clz_sound + +let%test_unit "ctz sound" = + qc1 sexp_unop "bv-ctz" gen_small_sized_interval prop_ctz_sound + +let%test_unit "popcnt sound" = + qc1 sexp_unop "bv-popcnt" gen_small_sized_interval prop_popcnt_sound diff --git a/src/test/test_float32.ml b/src/test/test_float32.ml new file mode 100644 index 00000000..00051b95 --- /dev/null +++ b/src/test/test_float32.ml @@ -0,0 +1,136 @@ +open Core +open Cgen.Float32 + +module Q = Quickcheck + +let gen_float32 = Q.Generator.map Int32.quickcheck_generator ~f:of_bits +let gen_pair = Q.Generator.both gen_float32 gen_float32 + +let f64 x = to_float x +let near ?(eps = 1e-5) a b = Float.(abs (f64 a - f64 b) <= eps) + +let prop_add_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || + let r = add a b in + let e = of_float Float.(f64 a + f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_sub_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || + let r = sub a b in + let e = of_float Float.(f64 a - f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_mul_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || + let r = mul a b in + let e = of_float Float.(f64 a * f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_div_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || Float.(f64 b = 0.0) || + let r = div a b in + let e = of_float Float.(f64 a / f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_neg_approx x = + let r = neg x in + let fx = to_float x in + let e = of_float Float.(-fx) in + if is_nan x then is_nan r + else if is_inf x + then is_inf r && Bool.equal (is_negative r) (not @@ is_negative x) + else near r e + +let prop_compare_consistent a b = + is_unordered a b || + let c = compare a b in + let eq = Int.(c = 0) in + let lt = Int.(c < 0) in + let gt = Int.(c > 0) in + Bool.equal eq (a = b) && + Bool.equal (not eq) (a <> b) && + Bool.equal lt (a < b) && + Bool.equal (lt || eq) (a <= b) && + Bool.equal gt (a > b) && + Bool.equal (gt || eq) (a >= b) + +let prop_is_zero x = Bool.equal (is_zero x) Float.(f64 x = 0.0) + +let prop_is_negative x = + let sign_bit = Int32.(bits x land 0x8000_0000l <> 0l) in + Bool.equal (is_negative x) sign_bit + +let prop_is_inf x = Bool.equal (is_inf x) (Float.is_inf @@ f64 x) +let prop_is_nan x = Bool.equal (is_nan x) (Float.is_nan @@ f64 x) + +let prop_bits_roundtrip x = + let b = bits x in + let x' = of_bits b in + Int32.equal b (bits x') + +let prop_string_roundtrip x = + let s = to_string x in + let x' = of_string s in + if is_nan x then is_nan x' + else if is_inf x + then is_inf x' && Bool.(is_negative x = is_negative x') + else not @@ is_nan x' + +let prop_int_conversions x = + ignore (to_int8 x : int); + ignore (to_int16 x : int); + ignore (to_int32 x : int32); + ignore (to_int64 x : int64); + ignore (to_uint8 x : int); + ignore (to_uint16 x : int); + ignore (to_uint32 x : int32); + ignore (to_uint64 x : int64); + ignore (of_int8 0 : t); + ignore (of_int16 0 : t); + ignore (of_int32 0l : t); + ignore (of_int64 0L : t); + ignore (of_uint8 0 : t); + ignore (of_uint16 0 : t); + ignore (of_uint32 0l : t); + ignore (of_uint64 0L : t); + true + +(* prints failing inputs! *) +let qc1 name gen prop = + match + Q.test_or_error gen + ~seed:(`Deterministic name) + ~trials:5000 + ~f:(fun x -> + if prop x then Ok () + else Or_error.error_s [%sexp "failed", (x : t)]) + with + | Ok () -> () + | Error e -> Error.raise e + +let qc2 name gen prop = + match + Q.test_or_error gen + ~seed:(`Deterministic name) + ~trials:5000 + ~f:(fun (a, b) -> + if prop a b then Ok () + else Or_error.error_s [%sexp "failed", (a : t), (b : t)]) + with + | Ok () -> () + | Error e -> Error.raise e + +let%test_unit "bits roundtrip" = qc1 "bits" gen_float32 prop_bits_roundtrip +let%test_unit "add approx" = qc2 "add" gen_pair prop_add_approx +let%test_unit "sub approx" = qc2 "sub" gen_pair prop_sub_approx +let%test_unit "mul approx" = qc2 "mul" gen_pair prop_mul_approx +let%test_unit "div approx" = qc2 "div" gen_pair prop_div_approx +let%test_unit "neg approx" = qc1 "neg" gen_float32 prop_neg_approx +let%test_unit "compare consistency" = qc2 "cmp" gen_pair prop_compare_consistent +let%test_unit "is_zero" = qc1 "zero" gen_float32 prop_is_zero +let%test_unit "is_negative" = qc1 "neg" gen_float32 prop_is_negative +let%test_unit "is_inf" = qc1 "inf" gen_float32 prop_is_inf +let%test_unit "is_nan" = qc1 "nan" gen_float32 prop_is_nan +let%test_unit "int conversions" = qc1 "int" gen_float32 prop_int_conversions +let%test_unit "string roundtrip" = qc1 "str" gen_float32 prop_string_roundtrip From 358274735b85d404292813a6770dce39776b808c Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 28 Nov 2025 18:08:45 -0500 Subject: [PATCH 58/62] Use a more precise domain for `Slot_initialization` We can track the relative byte offsets of each slot that was initialized using the interval tree. Also, do some cleanup of the users of this analysis --- src/lib/interval_tree.ml | 6 +- .../coalesce_slots/coalesce_slots_impl.ml | 12 +- src/lib/passes/promote_slots/promote_slots.ml | 28 ++-- src/lib/slot_initialization.ml | 158 ++++++++++++++---- src/test/data/opt/gcdext.vir.opt.sysv | 33 ++-- 5 files changed, 165 insertions(+), 72 deletions(-) diff --git a/src/lib/interval_tree.ml b/src/lib/interval_tree.ml index e02e08e2..ac48b0b9 100644 --- a/src/lib/interval_tree.ml +++ b/src/lib/interval_tree.ml @@ -13,8 +13,8 @@ module Make(I : Interval) = struct end) type +'a node = { - rhs : 'a node option; - lhs : 'a node option; + rhs : 'a t; + lhs : 'a t; key : key; data : 'a; height : int; @@ -22,7 +22,7 @@ module Make(I : Interval) = struct least : point; } [@@deriving fields, sexp] - type +'a t = 'a node option [@@deriving sexp] + and +'a t = 'a node option [@@deriving sexp] let height = Option.value_map ~default:0 ~f:height let least = Option.map ~f:least diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml index 38d9c75e..533e77b3 100644 --- a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -201,7 +201,7 @@ let is_empty t = module Make(M : Scalars.L) = struct open M - module S = Slot_initialization.Make(M) + module Sinit = Slot_initialization.Make(M) let mkdef s x n = Map.update s x ~f:(function | None -> Range.singleton n @@ -259,7 +259,7 @@ module Make(M : Scalars.L) = struct let op = Insn.op i in let acc = liveness_insn si acc !s !ip i in Vec.push nums (Insn.label i); - s := S.Analysis.transfer_op slots !s op; + s := Sinit.S.transfer_op slots !s op; incr ip; acc) in let acc = liveness_ctrl si acc !s !ip l @@ Blk.ctrl b in @@ -288,7 +288,7 @@ module Make(M : Scalars.L) = struct | _ -> acc end | _ -> acc in - s := S.Analysis.transfer_op slots !s op; + s := Sinit.S.transfer_op slots !s op; acc)) let debug_show slots rs nums deads p subst = @@ -323,12 +323,12 @@ module Make(M : Scalars.L) = struct __FUNCTION__ Var.pp key Virtual.pp_operand data)) let run fn = - let slots = S.Analysis.collect_slots fn in + let slots = Sinit.S.collect_slots fn in if Map.is_empty slots then empty else let cfg = Cfg.create fn in let blks = Func.map_of_blks fn in - let t = S.Analysis.analyze cfg blks slots in - let si = S.analyze' t cfg blks slots in + let t = Sinit.S.analyze cfg blks slots in + let si = Sinit.analyze' t cfg blks slots in let rs, nums = liveness cfg blks slots t si in let p = partition slots rs in let deads = collect_deads blks slots rs t in diff --git a/src/lib/passes/promote_slots/promote_slots.ml b/src/lib/passes/promote_slots/promote_slots.ml index c8fef41f..4f89e36f 100644 --- a/src/lib/passes/promote_slots/promote_slots.ml +++ b/src/lib/passes/promote_slots/promote_slots.ml @@ -48,15 +48,21 @@ module A = Make(struct open E.Syntax +let apply + (type fn) + (module M : Scalars.L with type Func.t = fn) + run ssa (fn : fn) = + let module Sinit = Slot_initialization.Make(M) in + let slots = Sinit.S.collect_slots fn in + let cfg = M.Cfg.create fn in + let blks = M.Func.map_of_blks fn in + let s = Sinit.analyze cfg blks slots in + let undef l = Hash_set.mem s.bad l in + run fn ~undef >>= ssa + let run fn = if Dict.mem (Func.dict fn) Tags.ssa then - let module S = Slot_initialization.Make(Scalars_common.VL) in - let slots = S.Analysis.collect_slots fn in - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in - let s = S.analyze cfg blks slots in - let undef l = Hash_set.mem s.bad l in - V.run fn ~undef >>= Ssa.run + apply (module Scalars_common.VL) V.run Ssa.run fn else E.failf "In Promote_slots: expected SSA form for function $%s" @@ -65,13 +71,7 @@ let run fn = let run_abi fn = let open Abi in if Dict.mem (Func.dict fn) Tags.ssa then - let module S = Slot_initialization.Make(Scalars_common.AL) in - let slots = S.Analysis.collect_slots fn in - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in - let s = S.analyze cfg blks slots in - let undef l = Hash_set.mem s.bad l in - A.run fn ~undef >>= Ssa.run_abi + apply (module Scalars_common.AL) A.run Ssa.run_abi fn else E.failf "In Promote_slots (ABI): expected SSA form for function $%s" diff --git a/src/lib/slot_initialization.ml b/src/lib/slot_initialization.ml index 54e80d41..a8656095 100644 --- a/src/lib/slot_initialization.ml +++ b/src/lib/slot_initialization.ml @@ -2,9 +2,48 @@ open Core open Regular.Std open Graphlib.Std -type state = Var.Set.t [@@deriving equal] +module Slot = Virtual.Slot -let empty_state : state = Var.Set.empty +type interval = { + lo : int; + hi : int; +} [@@deriving compare, sexp] + +module Interval = struct + type point = int [@@deriving compare, sexp] + type t = interval [@@deriving compare, sexp] + + let lower t = t.lo + let upper t = t.hi + + let pp ppf t = Format.fprintf ppf "[%d,%d]" t.lo t.hi + + let from_access off ty = + let lo = Int64.to_int_exn off in + let sz = Type.sizeof_basic ty / 8 in + {lo; hi = lo + sz - 1} + + let extended t = { + lo = t.lo - 1; + hi = t.hi + 1; + } +end + +module Tree = Interval_tree.Make(Interval) + +(* For each slot, we have a set of intervals corresponding to + relative byte offsets that were initialized by a store. *) +type state = unit Tree.t Var.Map.t + +let equal_state s1 s2 = + Map.equal (fun t1 t2 -> + Seq.equal (fun (i1, _) (i2, _) -> + Interval.compare i1 i2 = 0) + (Tree.to_sequence t1) + (Tree.to_sequence t2)) + s1 s2 + +let empty_state : state = Var.Map.empty (* Starting constraint has the entry block with no incoming initializations. *) @@ -13,11 +52,34 @@ let init_constraints : state Label.Map.t = (* Our top element, which is every slot having been initialized. *) let top_state slots : state = - Var.Set.of_list @@ Map.keys slots + Map.map slots ~f:(fun s -> + let i = {lo = 0; hi = Slot.size s - 1} in + Tree.singleton i ()) + +(* Coalesce `i` with any overlapping or adjacent intervals in `t`. *) +let normalize_add t i = + let lo, hi, t = + Interval.extended i |> Tree.intersections t |> Seq.map ~f:fst |> + Seq.fold ~init:(i.lo, i.hi, t) ~f:(fun (lo, hi, t) i -> + min lo i.lo, max hi i.hi, Tree.remove t i) in + Tree.add t {lo; hi} () + +(* Intersect the intervals (and also normalize them). *) +let merge_tree t1 t2 = + Tree.to_sequence t1 |> Seq.map ~f:fst |> + Seq.fold ~init:Tree.empty ~f:(fun init i1 -> + Tree.intersections t2 i1 |> Seq.map ~f:fst |> + Seq.fold ~init ~f:(fun acc i2 -> + let lo = max i1.lo i2.lo in + let hi = min i1.hi i2.hi in + if lo <= hi then normalize_add acc {lo; hi} else acc)) (* Since this is a "must" forward-flow analysis, incoming predecessor states must intersect. *) -let merge_state = Set.inter +let merge_state s1 s2 = + Map.merge s1 s2 ~f:(fun ~key:_ -> function + | `Both (t1, t2) -> Some (merge_tree t1 t2) + | `Left _ | `Right _ -> None) type solution = (Label.t, state) Solution.t @@ -26,24 +88,63 @@ type t = { bad : Label.Hash_set.t; } -module Make(M : Scalars.L) = struct - open M +(* If the slot is not always initialized by the + time we reach the load, then we have UB. *) +let is_uninitialized acc base off ty = + match Map.find acc base with + | None -> true + | Some t -> + let i = Interval.from_access off ty in + not (Tree.dominates t i) + +let transfer_store esc acc ptr ty (s : Scalars.state) = + match Map.find s ptr with + | Some Offset (base, off) -> + (* If `base` ever escaped, then don't ever consider + it initialized. *) + if Hash_set.mem esc base then + let () = Logs.debug (fun m -> + m "%s: ignoring escaped slot %a%!" + __FUNCTION__ Var.pp base) in + acc + else + let i = Interval.from_access off ty in + Map.update acc base ~f:(function + | None -> Tree.singleton i () + | Some t when Tree.dominates t i -> t + | Some t -> normalize_add t i) + | _ -> acc - module Analysis = Scalars.Make(M) +let transfer_load bad acc l ptr ty (s : Scalars.state) = + match Map.find s ptr with + | Some Offset (base, off) -> + if is_uninitialized acc base off ty then + Hash_set.add bad l; + acc + | _ -> acc - let transfer_store acc ptr (s : Scalars.state) = - match Map.find s ptr with - | Some Offset (base, _) -> Set.add acc base - | _ -> acc +let debug_dump blks bad s = + Logs.debug (fun m -> + Label.Tree.iter blks ~f:(fun ~key ~data:_ -> + let s = Solution.get s key in + let pp_tree ppf (x, t) = + Tree.to_sequence t |> Seq.to_list |> + List.to_string ~f:(fun (i, ()) -> + Format.asprintf "%a" Interval.pp i) |> + Format.fprintf ppf "%a:%s" Var.pp x in + m "%s: %a: incoming must-initialize: %s%!" + __FUNCTION__ Label.pp key + (Map.to_alist s |> + List.to_string ~f:(Format.asprintf "%a" pp_tree)))); + Logs.debug (fun m -> + Hash_set.iter bad ~f:(fun l -> + m "%s: load at %a is potentially uninitialized%!" + __FUNCTION__ Label.pp l)) - let transfer_load bad acc l ptr (s : Scalars.state) = - match Map.find s ptr with - | Some Offset (base, _) -> - (* If the slot is not always initialized by the - time we reach the load, then we have UB. *) - if not @@ Set.mem acc base then Hash_set.add bad l; - acc - | _ -> acc +module Make(M : Scalars.L) = struct + open M + + module S = Scalars.Make(M) let transfer bad t blks slots l st = match Label.Tree.find blks l with @@ -53,10 +154,10 @@ module Make(M : Scalars.L) = struct Blk.insns b |> Seq.fold ~init:st ~f:(fun acc i -> let op = Insn.op i and l = Insn.label i in let acc = match Insn.load_or_store_to op with - | Some (ptr, _, Store) -> transfer_store acc ptr !s - | Some (ptr, _, Load) -> transfer_load bad acc l ptr !s + | Some (ptr, ty, Store) -> transfer_store t.esc acc ptr ty !s + | Some (ptr, ty, Load) -> transfer_load bad acc l ptr ty !s | _ -> acc in - s := Analysis.transfer_op slots !s op; + s := S.transfer_op slots !s op; acc) let analyze' t cfg blks slots = @@ -67,19 +168,10 @@ module Make(M : Scalars.L) = struct ~equal:equal_state ~merge:merge_state ~f:(transfer bad t blks slots) in - Logs.debug (fun m -> - Label.Tree.iter blks ~f:(fun ~key ~data:_ -> - let s = Solution.get s key in - m "%s: %a: incoming must-initialize: %s%!" - __FUNCTION__ Label.pp key - (Set.to_list s |> List.to_string ~f:Var.to_string))); - Logs.debug (fun m -> - Hash_set.iter bad ~f:(fun l -> - m "%s: load at %a is potentially uninitialized%!" - __FUNCTION__ Label.pp l)); + debug_dump blks bad s; {soln = s; bad} let analyze cfg blks slots = - let t = Analysis.analyze cfg blks slots in + let t = S.analyze cfg blks slots in analyze' t cfg blks slots end diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv index 7b3659db..8a62f120 100644 --- a/src/test/data/opt/gcdext.vir.opt.sysv +++ b/src/test/data/opt/gcdext.vir.opt.sysv @@ -1,36 +1,37 @@ module gcdext export function $gcd(w %a/rdi, w %b/rsi) { - %31 = slot 8, align 8 %32 = slot 8, align 8 + %33 = slot 8, align 8 + %34 = slot 8, align 8 @2: %1.1 = eq.w %a, 0x0_w ; @30 br %1.1, @3, @4 @3: - st.w %b, %31 ; @6 - %33.1 = add.l %31, 0x4_l ; @61 - st.w 0x0_w, %33.1 ; @8 - st.w 0x1_w, %32 ; @10 - %19.1 = ld.l %31 ; @49 - %21.1 = ld.l %32 ; @52 + st.w %b, %33 ; @6 + %35.1 = add.l %33, 0x4_l ; @61 + st.w 0x0_w, %35.1 ; @8 + st.w 0x1_w, %34 ; @10 + %19.1 = ld.l %33 ; @49 + %21.1 = ld.l %34 ; @52 jmp @29(%21.1, %19.1) @4: %m.1 = rem.w %b, %a ; @12 %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 - st.l %27.1, %31 ; @38 + st.l %27.1, %33 ; @38 st.l %28.1, %32 ; @42 - %rg.1 = ld.w %31 ; @15 - %34.1 = add.l %31, 0x4_l ; @62 - %rx.1 = ld.w %34.1 ; @17 + %rg.1 = ld.w %33 ; @15 + %36.1 = add.l %33, 0x4_l ; @62 + %rx.1 = ld.w %36.1 ; @17 %ry.1 = ld.w %32 ; @19 - st.w %rg.1, %31 ; @20 + st.w %rg.1, %33 ; @20 %nx.1 = div.w %b, %a ; @21 %6.1 = mul.w %nx.1, %rx.1 ; @35 %7.1 = sub.w %ry.1, %6.1 ; @36 - st.w %7.1, %34.1 ; @25 - st.w %rx.1, %32 ; @27 - %14.1 = ld.l %31 ; @43 - %16.1 = ld.l %32 ; @46 + st.w %7.1, %36.1 ; @25 + st.w %rx.1, %34 ; @27 + %14.1 = ld.l %33 ; @43 + %16.1 = ld.l %34 ; @46 jmp @29(%16.1, %14.1) @29(%30.1, %29.1): ret rax/%29.1, rdx/%30.1 From f35366a9f65e61d4bbf0bf1d73f2ddd5b06a04b8 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Fri, 28 Nov 2025 18:48:49 -0500 Subject: [PATCH 59/62] More compound type tests --- src/test/test_type.ml | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/test/test_type.ml b/src/test/test_type.ml index a2e230ba..93c1c75d 100644 --- a/src/test/test_type.ml +++ b/src/test/test_type.ml @@ -47,6 +47,45 @@ let c9 : Type.compound = `compound ("c9", None, [ `elt (`i32, 1); ]) +let c10 : Type.compound = `compound ("c10", None, [ + `name ("c1", 2); + ]) + +let c11 : Type.compound = `compound ("c11", None, [ + `elt (`i16, 3); + `elt (`i32, 1); + ]) + +let c12 : Type.compound = `compound ("c12", Some 8, [ + `elt (`i32, 1); + ]) + +let c13 : Type.compound = `compound ("c13", None, [ + `elt (`i8, 3); + ]) + +let c14 : Type.compound = `compound ("c14", Some 8, [ + `elt (`i16, 1); + ]) + +let c15 : Type.compound = `compound ("c15", Some 8, [ + `elt (`i8, 1); + `elt (`i16, 1); + `elt (`i32, 1); + `elt (`i8, 1); + ]) + +let opaque4 : Type.compound = `opaque ("opaque4", 4, 4) + +let c16 : Type.compound = `compound ("c16", Some 4, [ + `name ("opaque4", 1); + `name ("opaque4", 1); + ]) + +let c1arr : Type.compound = `compound ("c1arr", None, [ + `name ("c1", 3); + ]) + let sexp = Fn.compose Type.Layout.t_of_sexp Sexp.of_string let l1 = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2))))" @@ -58,6 +97,15 @@ let l6 = sexp "((align 8) (data ()))" let l7 = sexp "((align 8) (data (i32 (pad 4) i32 (pad 4))))" let l8 = sexp "((align 1) (data ()))" let l9 = sexp "((align 4) (data (f32 i8 i8 (pad 2) i32)))" +let l10 = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2))))" +let l11 = sexp "((align 4) (data (i16 i16 i16 (pad 2) i32)))" +let l12 = sexp "((align 8) (data (i32 (pad 4))))" +let l13 = sexp "((align 1) (data (i8 i8 i8)))" +let l14 = sexp "((align 8) (data (i16 (pad 6))))" +let l15 = sexp "((align 8) (data (i8 (pad 1) i16 i32 i8 (pad 7))))" +let lopaque4 = sexp "((align 4) (data ((opaque 4))))" +let l16 = sexp "((align 4) (data ((opaque 8))))" +let l1arr = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2))))" let gamma = function | "c1" -> l1 @@ -69,6 +117,15 @@ let gamma = function | "c7" -> l7 | "c8" -> l8 | "c9" -> l9 + | "c10" -> l10 + | "c11" -> l11 + | "c12" -> l12 + | "c13" -> l13 + | "c14" -> l14 + | "c15" -> l15 + | "opaque4" -> lopaque4 + | "c16" -> l16 + | "c1arr" -> l1arr | s -> failwithf "gamma: %s is undefined" s () let _sexp_of_layout l = Sexp.List (List.map l ~f:Type.sexp_of_datum) @@ -97,6 +154,15 @@ let test_c6 _ = test_sizeof_compound c6 ~expected:0 let test_c7 _ = test_sizeof_compound c7 ~expected:16 let test_c8 _ = test_sizeof_compound c8 ~expected:0 let test_c9 _ = test_sizeof_compound c9 ~expected:12 +let test_c10 _ = test_sizeof_compound c10 ~expected:40 +let test_c11 _ = test_sizeof_compound c11 ~expected:12 +let test_c12 _ = test_sizeof_compound c12 ~expected:8 +let test_c13 _ = test_sizeof_compound c13 ~expected:3 +let test_c14 _ = test_sizeof_compound c14 ~expected:8 +let test_c15 _ = test_sizeof_compound c15 ~expected:16 +let test_opaque4 _ = test_sizeof_compound opaque4 ~expected:4 +let test_c16 _ = test_sizeof_compound c16 ~expected:8 +let test_c1arr _ = test_sizeof_compound c1arr ~expected:60 let suite = "Test types" >::: [ "Test sizeof compound 1" >:: test_c1; @@ -108,6 +174,15 @@ let suite = "Test types" >::: [ "Test sizeof compound 7" >:: test_c7; "Test sizeof compound 8" >:: test_c8; "Test sizeof compound 9" >:: test_c9; + "Test sizeof compound 10" >:: test_c10; + "Test sizeof compound 11" >:: test_c11; + "Test sizeof compound 12" >:: test_c12; + "Test sizeof compound 13" >:: test_c13; + "Test sizeof compound 14" >:: test_c14; + "Test sizeof compound 15" >:: test_c15; + "Test sizeof compound opaque4" >:: test_opaque4; + "Test sizeof compound 16" >:: test_c16; + "Test sizeof compound 1 (array)" >:: test_c1arr; ] let () = run_test_tt_main suite From 1e38a3721ddeadff4870683a120ca1fd63055f7b Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 29 Nov 2025 10:20:20 -0500 Subject: [PATCH 60/62] Tweak printer for SROA accesses --- src/lib/passes/sroa/sroa_impl.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml index a0178239..45820419 100644 --- a/src/lib/passes/sroa/sroa_impl.ml +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -109,7 +109,7 @@ end = struct let pp ppf a = let neg = Int64.is_negative a.off in let pre, off = if neg then '-', Int64.neg a.off else '+', a.off in - Format.fprintf ppf "(%a %a.%a %c0x%Lx)" + Format.fprintf ppf "%a[%a.%a,%c0x%Lx]" Label.pp (Insn.label a.insn) pp_load_or_store a.ldst Type.pp_basic a.ty From 9b3c3f7ef4759b18588768b59e174af6c85b9288 Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 29 Nov 2025 10:21:08 -0500 Subject: [PATCH 61/62] Compare egraph cost unsigned, debug print function name --- src/lib/egraph/egraph.ml | 16 ++++++------ src/lib/egraph/extractor/extractor_core.ml | 29 +++++++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/lib/egraph/egraph.ml b/src/lib/egraph/egraph.ml index d2facdbb..db0351f2 100644 --- a/src/lib/egraph/egraph.ml +++ b/src/lib/egraph/egraph.ml @@ -30,7 +30,7 @@ let check_ssa fn = if Dict.mem (Func.dict fn) Tags.ssa then Ok () else - Input.E.failf "Expected SSA form for function %s" + Input.E.failf "Expected SSA form for function $%s" (Func.name fn) () let debug_dump t = @@ -40,8 +40,8 @@ let debug_dump t = (Iset.to_list s |> List.to_string ~f:Id.to_string) in let pp_sep ppf () = Format.fprintf ppf "\n" in - m "%s: lmoved:\n%a%!" - __FUNCTION__ + m "%s: $%s lmoved:\n%a%!" + __FUNCTION__ (Func.name t.input.fn) (Format.pp_print_list ~pp_sep pp_lmoved) (Hashtbl.to_alist t.lmoved)); Logs.debug (fun m -> @@ -50,8 +50,8 @@ let debug_dump t = (Lset.to_list s |> List.to_string ~f:Label.to_string) in let pp_sep ppf () = Format.fprintf ppf "\n" in - m "%s: imoved:\n%a%!" - __FUNCTION__ + m "%s: $%s imoved:\n%a%!" + __FUNCTION__ (Func.name t.input.fn) (Format.pp_print_list ~pp_sep pp_lmoved) (Vec.to_sequence_mutable t.imoved |> Seq.mapi ~f:Tuple2.create |> @@ -63,8 +63,8 @@ let debug_dump t = ~none:(fun ppf () -> Format.fprintf ppf "") Label.pp) (Uopt.to_option l) in let pp_sep ppf () = Format.fprintf ppf "\n" in - m "%s: ilbl:\n%a%!" - __FUNCTION__ + m "%s: $%s ilbl:\n%a%!" + __FUNCTION__ (Func.name t.input.fn) (Format.pp_print_list ~pp_sep pp_ilbl) (Vec.to_sequence_mutable t.ilbl |> Seq.mapi ~f:Tuple2.create |> @@ -76,6 +76,6 @@ let run ?(depth_limit = 6) ?(match_limit = 20) fn tenv rules = let*? input = Input.init fn tenv in let t = init input depth_limit match_limit rules in let*? () = Builder.run t in - let ex = Extractor.init t in debug_dump t; + let ex = Extractor.init t in Extractor.cfg ex diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml index 13c84d03..f9adb990 100644 --- a/src/lib/egraph/extractor/extractor_core.ml +++ b/src/lib/egraph/extractor/extractor_core.ml @@ -39,28 +39,39 @@ module Cost : sig val add : t -> t -> t val opc : t -> Int63.t val depth : t -> Int63.t + val pp : Format.formatter -> t -> unit end = struct - include Int63 + open Int63 + + type nonrec t = t + + (* We want an unsigned comparison *) + let compare_u a b = compare (a lxor min_value) (b lxor min_value) [@@inline] + let (<) a b = Int.(compare_u a b < 0) [@@inline] let depth_bits = 12 let depth_mask = pred (one lsl depth_bits) let opc_mask = lnot depth_mask - let opc c = c lsr depth_bits - let depth c = c land depth_mask - let create o d = (o lsl depth_bits) lor (d land depth_mask) - let pure o = of_int o lsl depth_bits + let opc c = c lsr depth_bits [@@inline] + let depth c = c land depth_mask [@@inline] + let create o d = (o lsl depth_bits) lor (d land depth_mask) [@@inline] + let pure o = of_int o lsl depth_bits [@@inline] (* Make sure the increment doesn't wrap around. *) let incr c = let d = depth c in (c land opc_mask) lor (if d = depth_mask then d else succ d) + [@@inline] let add x y = let o = opc x + opc y in let d = max (depth x) (depth y) in create o d + [@@inline] + + let pp = pp end type cost = Cost.t @@ -147,11 +158,11 @@ let debug_dump t = Logs.debug (fun m -> let pp ppf (cid, (c, n)) = Format.fprintf ppf - " %d:\n cost:\n depth: %a\n opc: %a\n node: %a%!" - cid Int63.pp (Cost.depth c) Int63.pp (Cost.opc c) + " %d:\n cost: %a\n depth: %a\n opc: %a\n node: %a%!" + cid Cost.pp c Int63.pp (Cost.depth c) Int63.pp (Cost.opc c) (Enode.pp ~node:(node t.eg)) n in - m "%s: cost table:\n%a" - __FUNCTION__ + m "%s: $%s cost table:\n%a" + __FUNCTION__ (Func.name t.eg.input.fn) (Format.pp_print_list pp ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")) (Hashtbl.to_alist t.table)) From a07ac1c5f1ee1d15d13a637538f1cadb20df4c6f Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Sat, 29 Nov 2025 11:09:54 -0500 Subject: [PATCH 62/62] Adds a `Bitset` abstraction --- src/lib/bitset.ml | 45 +++++++++ src/lib/bitset.mli | 4 + src/lib/bitset_intf.ml | 59 +++++++++++ src/lib/egraph/egraph.ml | 2 +- src/lib/egraph/egraph_common.ml | 6 +- src/lib/egraph/extractor/extractor_cfg.ml | 12 +-- src/lib/egraph/extractor/extractor_core.ml | 10 +- src/lib/regalloc/regalloc_irc.ml | 12 +-- src/test/dune | 9 ++ src/test/test_bitset.ml | 112 +++++++++++++++++++++ 10 files changed, 250 insertions(+), 21 deletions(-) create mode 100644 src/lib/bitset.ml create mode 100644 src/lib/bitset.mli create mode 100644 src/lib/bitset_intf.ml create mode 100644 src/test/test_bitset.ml diff --git a/src/lib/bitset.ml b/src/lib/bitset.ml new file mode 100644 index 00000000..6fb43afc --- /dev/null +++ b/src/lib/bitset.ml @@ -0,0 +1,45 @@ +open Core +open Regular.Std +open Bitset_intf + +module Make(K : Key) = struct + type t = Z.t [@@deriving compare, equal] + + let sexp_of_t t = Sexp.Atom (Z.to_string t) + + let t_of_sexp = function + | Sexp.Atom s -> Z.of_string s + | x -> + let exn = Failure "Bitset.t_of_sexp: atom needed" in + raise @@ Sexplib.Conv.Of_sexp_error (exn, x) + + let empty = Z.zero + let is_empty t = Z.equal t empty [@@inline] + let singleton k = Z.(one lsl K.to_int k) [@@inline] + let union = Z.logor + let inter = Z.logand + let set t k = union t @@ singleton k [@@inline] + let clear t k = inter t @@ Z.lognot @@ singleton k [@@inline] + let mem t k = Z.testbit t @@ K.to_int k [@@inline] + let init n = Z.(pred (one lsl n)) [@@inline] + + let min_elt_exn t = + if is_empty t then + invalid_arg "Bitset.min_elt_exn: empty set"; + K.of_int @@ Z.trailing_zeros t + + let min_elt t = if is_empty t then None + else Some (K.of_int @@ Z.trailing_zeros t) + + let enum = + let open Seq.Generator in + let pop_min t = Z.(t land pred t) in + let rec go t = + if is_empty t then return () else + let i = min_elt_exn t in + yield i >>= fun () -> + go (pop_min t) in + fun t -> run @@ go t +end + +module Id = Make(Int) diff --git a/src/lib/bitset.mli b/src/lib/bitset.mli new file mode 100644 index 00000000..4b8cec7c --- /dev/null +++ b/src/lib/bitset.mli @@ -0,0 +1,4 @@ +open Bitset_intf + +module Make(K : Key) : S with type key := K.t +module Id : S with type key := int diff --git a/src/lib/bitset_intf.ml b/src/lib/bitset_intf.ml new file mode 100644 index 00000000..08d16fab --- /dev/null +++ b/src/lib/bitset_intf.ml @@ -0,0 +1,59 @@ +open Regular.Std + +module type Key = sig + (** The key. *) + type t + + val to_int : t -> int + val of_int : int -> t +end + +module type S = sig + (** The key for indexing into the set. *) + type key + + (** The bitset. *) + type t = private Z.t [@@deriving compare, equal, sexp] + + (** The empty set. *) + val empty : t + + (** Returns [true] if the set is empty. *) + val is_empty : t -> bool + + (** Returns the singleton set of a key. *) + val singleton : key -> t + + (** Set union. *) + val union : t -> t -> t + + (** Set intersection. *) + val inter : t -> t -> t + + (** Add an element to the set. *) + val set : t -> key -> t + + (** Remove an element from the set. *) + val clear : t -> key -> t + + (** Returns [true] if the element is in the set. *) + val mem : t -> key -> bool + + (** Constructs the set where the first [n] elements + are set. *) + val init : int -> t + + (** Returns the least element of the set. + + @raise Invalid_argument if the set is empty + *) + val min_elt_exn : t -> key + + (** Same as [min_elt_exn], but returns [None] if + the set is empty. *) + val min_elt : t -> key option + + (** Produces the sequence of elements in the set, + from lowest to highest. *) + val enum : t -> key seq +end diff --git a/src/lib/egraph/egraph.ml b/src/lib/egraph/egraph.ml index db0351f2..10dc6589 100644 --- a/src/lib/egraph/egraph.ml +++ b/src/lib/egraph/egraph.ml @@ -18,7 +18,7 @@ let init input depth_limit match_limit rules = { typs = Vec.create (); lmoved = Label.Table.create (); imoved = Vec.create (); - pinned = Z.zero; + pinned = Bitset.Id.empty; ilbl = Vec.create (); lval = Label.Table.create (); depth_limit; diff --git a/src/lib/egraph/egraph_common.ml b/src/lib/egraph/egraph_common.ml index d3fc60df..539c1270 100644 --- a/src/lib/egraph/egraph_common.ml +++ b/src/lib/egraph/egraph_common.ml @@ -107,7 +107,7 @@ type t = { memo : (enode, id) Hashtbl.t; (* The hash-cons for optimized terms. *) lmoved : Iset.t Label.Table.t; (* Set of IDs that were moved to a given label. *) imoved : Lset.t Vec.t; (* Set of labels that were moved for a given ID. *) - mutable pinned : Z.t; (* IDs that should not be rescheduled. *) + mutable pinned : Bitset.Id.t; (* IDs that should not be rescheduled. *) ilbl : Label.t Uopt.t Vec.t; (* Maps IDs to labels. *) lval : id Label.Table.t; (* Maps labels to IDs. *) depth_limit : int; (* Maximum rewrite depth. *) @@ -140,8 +140,8 @@ let add_moved t id = function let init = Vec.get_exn t.imoved id in Vec.set_exn t.imoved id @@ List.fold ls ~init ~f:Lset.add -let set_pinned t id = t.pinned <- Z.(t.pinned lor (one lsl id)) -let is_pinned t id = Z.testbit t.pinned id +let set_pinned t id = t.pinned <- Bitset.Id.set t.pinned id +let is_pinned t id = Bitset.Id.mem t.pinned id let typeof_var t x = Typecheck.Env.typeof_var t.input.fn x t.input.tenv |> Or_error.ok diff --git a/src/lib/egraph/extractor/extractor_cfg.ml b/src/lib/egraph/extractor/extractor_cfg.ml index 9d6e0926..2159b57a 100644 --- a/src/lib/egraph/extractor/extractor_cfg.ml +++ b/src/lib/egraph/extractor/extractor_cfg.ml @@ -22,18 +22,18 @@ let empty_scope : scope = Id.Tree.empty type placed = { seq : Label.t Ftree.t; - ids : Z.t; + ids : Bitset.Id.t; } let create_placed id l = { seq = Ftree.singleton l; - ids = Z.(one lsl id); + ids = Bitset.Id.singleton id; } let add_placed p id l = - assert (not @@ Z.testbit p.ids id); { + assert (not @@ Bitset.Id.mem p.ids id); { seq = Ftree.snoc p.seq l; - ids = Z.(p.ids lor (one lsl id)); + ids = Bitset.Id.set p.ids id; } type env = { @@ -463,7 +463,7 @@ module Hoisting = struct end let should_skip t l id cid = - Z.testbit t.impure cid || + Bitset.Id.mem t.impure cid || is_partial_redundancy t l id cid (* If any nodes got moved up to this label, then we should check @@ -501,7 +501,7 @@ let reify t env l = | Some id -> match extract t id with | None -> extract_fail l @@ Common.find t.eg id | Some (E (Id {canon; _}, op, args) as e) - when not @@ Z.testbit t.impure canon -> + when not @@ Bitset.Id.mem t.impure canon -> (* There may be an opportunity to "sink" this instruction, which is the dual of the "hoisting" optimization below. Since this is a pure operation, we can wait until it is diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml index f9adb990..596cbf76 100644 --- a/src/lib/egraph/extractor/extractor_core.ml +++ b/src/lib/egraph/extractor/extractor_core.ml @@ -80,7 +80,7 @@ type t = { eg : egraph; table : (cost * enode) Id.Table.t; memo : ext Id.Table.t; - mutable impure : Z.t; + mutable impure : Bitset.Id.t; } let rec pp_ext ppf = function @@ -172,7 +172,7 @@ let init eg = eg; table = Id.Table.create (); memo = Id.Table.create (); - impure = Z.zero; + impure = Bitset.Id.empty; } in Saturation.go t; debug_dump t; @@ -211,15 +211,15 @@ let rec must_remain_fixed op args = match (op : Enode.op) with | _ -> false let prov t cid id op args = - if must_remain_fixed op args then begin - t.impure <- Z.(t.impure lor (one lsl cid)); + if must_remain_fixed op args then + let () = t.impure <- Bitset.Id.set t.impure cid in match labelof t.eg cid with | Some l -> Label l | None when id = cid -> Id {canon = cid; real = id} | None -> match labelof t.eg id with | None -> Id {canon = cid; real = id} | Some l -> Label l - end else Id {canon = cid; real = id} + else Id {canon = cid; real = id} module O = Monad.Option diff --git a/src/lib/regalloc/regalloc_irc.ml b/src/lib/regalloc/regalloc_irc.ml index dbcbab94..641fddcd 100644 --- a/src/lib/regalloc/regalloc_irc.ml +++ b/src/lib/regalloc/regalloc_irc.ml @@ -498,7 +498,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct Seq.filter ~f:(fun w -> Rv.is_reg w || Hash_set.mem t.colored w) |> (* okColors := okColors \ {color[GetAlias(w)]} *) Seq.filter_map ~f:(color t) |> - Seq.iter ~f:(fun c -> cs := Z.(!cs land ~!(one lsl c))) + Seq.iter ~f:(fun c -> cs := Bitset.Id.clear !cs c) let assign_colors t = (* while SelectStack is not empty @@ -507,19 +507,19 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct assert (can_be_colored t n); (* okColors := {0,...,K-1} *) let k = Regs.node_k n in - let cs = ref Z.(pred (one lsl k)) in + let cs = ref @@ Bitset.Id.init k in eliminate_colors t n cs; (* if okColors = {} then *) - if Z.(equal !cs zero) then + match Bitset.Id.min_elt !cs with + | None -> (* spilledNodes := spilledNodes U {n} *) t.spilled <- Set.add t.spilled n - else begin + | Some c -> (* coloredNodes := coloredNodes U {n} *) Hash_set.add t.colored n; (* let c \in okColors color[n] := c *) - set_color t n @@ Z.trailing_zeros !cs - end); + set_color t n c); (* forall n \in coalescedNodes *) Hash_set.iter t.coalesced ~f:(fun n -> (* color[n] := color[GetAlias(n)] *) diff --git a/src/test/dune b/src/test/dune index b69b7d54..a2d5bace 100644 --- a/src/test/dune +++ b/src/test/dune @@ -23,3 +23,12 @@ (inline_tests) (preprocess (pps ppx_jane))) + +(library + (name test_bitset) + (modules test_bitset) + (libraries cgen) + (ocamlopt_flags -O2) + (inline_tests) + (preprocess + (pps ppx_jane))) diff --git a/src/test/test_bitset.ml b/src/test/test_bitset.ml new file mode 100644 index 00000000..a7121fc3 --- /dev/null +++ b/src/test/test_bitset.ml @@ -0,0 +1,112 @@ +open Core +open Cgen + +module Seq = Sequence +module B = Bitset.Id +module Q = Quickcheck +module G = Q.Generator +module S = Q.Shrinker +module O = Q.Observer +module T = Base_quickcheck.Test + +(* Keys must be positive, but if we include really large ones + in our tests then we might OOM. *) +let key_ok i = i >= 0 && i <= 2000 +let gen_key = Int.quickcheck_generator |> G.filter ~f:key_ok + +let gen_set = + G.(list (tuple2 gen_key bool)) |> G.map ~f:(fun bits -> + List.fold bits ~init:B.empty ~f:(fun acc (k, on) -> + if on then B.set acc k else acc)) + +let shr_set = S.create @@ fun s -> + B.enum s |> Seq.map ~f:(B.clear s) + +let obs_set = O.unmap [%observer : int list] + ~f:(Fn.compose Seq.to_list B.enum) + +module Single = struct + type t = B.t [@@deriving sexp] + let quickcheck_generator = gen_set + let quickcheck_observer = obs_set + let quickcheck_shrinker = shr_set +end + +module Pair = struct + type t = B.t * B.t [@@deriving sexp] + let quickcheck_generator = G.tuple2 gen_set gen_set + let quickcheck_observer = O.tuple2 obs_set obs_set + let quickcheck_shrinker = S.tuple2 shr_set shr_set +end + +module Triple = struct + type t = B.t * B.t * B.t [@@deriving sexp] + let quickcheck_generator = G.tuple3 gen_set gen_set gen_set + let quickcheck_observer = O.tuple3 obs_set obs_set obs_set + let quickcheck_shrinker = S.tuple3 shr_set shr_set shr_set +end + +module Key = struct + type t = int [@@deriving sexp] + let quickcheck_generator = gen_key + let quickcheck_observer = [%observer : int] + let quickcheck_shrinker = S.filter Int.quickcheck_shrinker ~f:key_ok +end + +module With_key = struct + type t = B.t * int [@@deriving sexp] + let quickcheck_generator = G.tuple2 gen_set gen_key + let quickcheck_observer = O.tuple2 obs_set [%observer : int] + let quickcheck_shrinker = S.tuple2 shr_set [%shrinker : int] +end + +let%test_unit "union is commutative" = + T.run_exn (module Pair) ~f:(fun (a, b) -> + [%test_eq : B.t] (B.union a b) (B.union b a)) + +let%test_unit "union is associative" = + T.run_exn (module Triple) ~f:(fun (a, b, c) -> + [%test_eq : B.t] + (B.union a (B.union b c)) + (B.union (B.union a b) c)) + +let%test_unit "inter is commutative" = + T.run_exn (module Pair) ~f:(fun (a, b) -> + [%test_eq : B.t] (B.inter a b) (B.inter b a)) + +let%test_unit "inter is associative" = + T.run_exn (module Triple) ~f:(fun (a, b, c) -> + [%test_eq : B.t] + (B.inter a (B.inter b c)) + (B.inter (B.inter a b) c)) + +let%test_unit "set / mem consistency" = + T.run_exn (module With_key) ~f:(fun (s, k) -> + let s' = B.set s k in + assert (B.mem s' k)) + +let%test_unit "clear / mem consistency" = + T.run_exn (module With_key) ~f:(fun (s, k) -> + let s' = B.clear s k in + assert (not (B.mem s' k))) + +let%test_unit "singleton has exactly one element" = + T.run_exn (module Key) ~f:(fun k -> + [%test_result : int list] + (Seq.to_list (B.enum (B.singleton k))) + ~expect:[k]) + +let%test_unit "init n = [0..n-1]" = + T.run_exn (module Key) ~f:(fun n -> + [%test_result : int list] + (Seq.to_list (B.enum (B.init n))) + ~expect:(List.init n ~f:Fn.id)) + +let%test_unit "min_elt matches smallest element in enum" = + T.run_exn (module Single) ~f:(fun s -> + match B.min_elt s with + | None -> + [%test_eq : int list] (Seq.to_list (B.enum s)) [] + | Some k -> + let hd = List.hd_exn (Seq.to_list (B.enum s)) in + [%test_eq : int] k hd)