diff --git a/.clang-format b/.clang-format index 03ce0b43..e3263f0e 100644 --- a/.clang-format +++ b/.clang-format @@ -1,149 +1,40 @@ ---- -Language: Cpp -# BasedOnStyle: LLVM -AccessModifierOffset: -2 -AlignAfterOpenBracket: Align -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: false -AlignConsecutiveBitFields: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Right -AlignOperands: Align -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortEnumsOnASingleLine: true -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: All -AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: Never -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: MultiLine -BinPackArguments: true -BinPackParameters: true +BasedOnStyle: LLVM +IndentWidth: 2 +ContinuationIndentWidth: 2 +UseTab: Never +TabWidth: 2 + +# Function definitions like: +# static int +# name(...) +# { BraceWrapping: - AfterCaseLabel: false - AfterClass: false - AfterControlStatement: Never - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - BeforeLambdaBody: false - BeforeWhile: false - IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true - SplitEmptyNamespace: true -BreakBeforeBinaryOperators: None + AfterFunction: true + AfterStruct: true + AfterClass: true + AfterNamespace: true + AfterExternBlock: true + BeforeCatch: false + BeforeElse: false + +# K&R control-flow braces (same-line) BreakBeforeBraces: Attach -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: true -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: false -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DeriveLineEnding: true -DerivePointerAlignment: false -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: true -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Priority: 2 - SortPriority: 0 - - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Priority: 3 - SortPriority: 0 - - Regex: '.*' - Priority: 1 - SortPriority: 0 -IncludeIsMainRegex: '(Test)?$' -IncludeIsMainSourceRegex: '' -IndentCaseLabels: false -IndentCaseBlocks: false -IndentGotoLabels: true -IndentPPDirectives: None -IndentExternBlock: AfterExternBlock -IndentWidth: 2 -IndentWrappedFunctionNames: false -InsertTrailingCommas: None -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: true -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBinPackProtocolList: Auto -ObjCBlockIndentWidth: 2 -ObjCBreakBeforeNestedBlockParam: true -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: true -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 19 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 -PointerAlignment: Right -ReflowComments: true -SortIncludes: true -SortUsingDeclarations: true + +# Spacing rules +SpaceBeforeParens: ControlStatements SpaceAfterCStyleCast: false -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true SpaceBeforeCpp11BracedList: false -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyBlock: false -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInConditionalStatement: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false -Standard: Latest -StatementMacros: - - Q_UNUSED - - QT_REQUIRE_VERSION -TabWidth: 8 -UseCRLF: false -UseTab: Never -WhitespaceSensitiveMacros: - - STRINGIZE - - PP_STRINGIZE - - BOOST_PP_STRINGIZE -... +SpaceBeforeAssignmentOperators: true + +# Pointer alignment +PointerAlignment: Right +# Do not wrap long lines unless needed +ColumnLimit: 80 + +# Do not cram short functions onto one line +AllowShortFunctionsOnASingleLine: None + +# Keep return type on its own line if desired +AlwaysBreakAfterDefinitionReturnType: true +AlwaysBreakAfterReturnType: None diff --git a/Makefile b/Makefile index 34afe147..180bbd9d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ SRC := ./src/ -.PHONY: build clean install uninstall test doc deps indent status-clean check-style +.PHONY: build clean install uninstall test doc deps +.PHONY: ocaml-indent clang-indent indent status-clean check-style all: install @@ -25,9 +26,14 @@ doc: deps: $(MAKE) deps -C $(SRC) -indent: +ocaml-indent: sh tools/ocp-indent-all.sh +clang-indent: + sh tools/clang-format-all.sh + +indent: ocaml-indent clang-indent + status-clean: git diff --quiet --exit-code diff --git a/src/Makefile b/src/Makefile index 0708372c..daf688b1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,10 @@ .PHONY: clean install uninstall test doc deps bap-deps +PROFILE = release +ifeq ($(DEBUG),1) + PROFILE = dev +endif + BAP_DEPS = \ bitvec \ bitvec-binprot \ @@ -12,7 +17,7 @@ BAP_DEPS = \ BAP_REPO := git+https://github.com/BinaryAnalysisPlatform/opam-repository\#testing all: - dune build --profile=release + dune build --profile=$(PROFILE) clean: dune clean @@ -24,7 +29,7 @@ uninstall: dune uninstall test: - dune test --profile=release + dune test --profile=$(PROFILE) doc: dune build @doc diff --git a/src/bin/cli.ml b/src/bin/cli.ml index 0c8510b7..cc3f90e1 100644 --- a/src/bin/cli.ml +++ b/src/bin/cli.ml @@ -1,23 +1,6 @@ open Cmdliner -(* Why is this here, when we have `Cgen.Target.find`? - - The targets should be getting declared at the toplevel in their - respective modules, but the evaluation of these modules is not - guaranteed to happen by the time we start parsing command-line - arguments. - - It just so happens that, for the front-end executable, we should - know all of the out-of-the-box targets provided by the library, - so this feels like a tolerable compromise in the design of extending - the supported targets. -*) -let targets = - Core.Map.of_alist_exn (module Core.String) @@ - Core.List.map ~f:(fun t -> Cgen.Target.name t, t) @@ - Cgen.Machine.[ - X86.Amd64_sysv.target; - ] +let () = Cgen.Machine.force_initialization () let bail () = exit Cmd.Exit.ok @@ -100,7 +83,8 @@ let dump_no_comment = let man_targets = `S "TARGET" :: `Pre "Supported target platforms" :: begin - Core.Map.data targets |> + Cgen.Target.enum_targets () |> + Core.Sequence.to_list |> Core.List.map ~f:(fun t -> `P (Format.asprintf "%a" Cgen.Target.pp t)) end @@ -134,7 +118,14 @@ type t = { target : Cgen.Target.t; } -let go f file output dump nc target = +let log_env = Cmd.Env.info "CGEN_LOG" + +let setup_log level = + Logs.set_level level; + Logs.set_reporter @@ Logs_fmt.reporter () + +let go f file output dump nc target log_level = + setup_log log_level; let file = match file with | "" -> Istdin | _ -> Ifile file in @@ -144,12 +135,20 @@ let go f file output dump nc target = let dump = match dump_of_string_opt dump with | None -> fatal "invalid dump option: %s\n%!" dump () | Some d -> d in - let target = match Core.Map.find targets target with + let target = match Cgen.Target.find target with | None -> fatal "invalid target: %s\n%!" target () | Some t -> t in f {file; output; dump; nc; target} -let t f = Term.(const (go f) $ file $ output $ dump $ dump_no_comment $ target) +let t f = + let open Term in + const (go f) $ + file $ + output $ + dump $ + dump_no_comment $ + target $ + Logs_cli.level ~env:log_env () let man = List.concat [ man_dump; diff --git a/src/bin/dune b/src/bin/dune index 879350de..625e4056 100644 --- a/src/bin/dune +++ b/src/bin/dune @@ -2,7 +2,7 @@ (name cgen_main) (public_name cgen) (package cgen) - (libraries cgen cmdliner) + (libraries cgen cmdliner logs.cli logs.fmt) (modules cgen_main cli) (flags -w -58) (ocamlopt_flags -O2) diff --git a/src/cgen.opam b/src/cgen.opam index 06808bff..58624a0e 100644 --- a/src/cgen.opam +++ b/src/cgen.opam @@ -17,7 +17,9 @@ depends: [ "core" {>= "v0.15"} "core_kernel" {>= "v0.15"} "dune" {>= "3.15"} + "fmt" "graphlib" + "logs" "menhir" "monads" "ocamldiff" diff --git a/src/dune-project b/src/dune-project index d54bc4bb..6fa0f2f1 100644 --- a/src/dune-project +++ b/src/dune-project @@ -23,7 +23,9 @@ (core (>= v0.15)) (core_kernel (>= v0.15)) dune + fmt graphlib + logs menhir monads ocamldiff diff --git a/src/lib/allen_interval_algebra.ml b/src/lib/allen_interval_algebra.ml new file mode 100644 index 00000000..a51e53a0 --- /dev/null +++ b/src/lib/allen_interval_algebra.ml @@ -0,0 +1,95 @@ +(** Allen's Interval Algebra. + + {:https://en.wikipedia.org/wiki/Allen%27s_interval_algebra} +*) + +open Core + +type t = + | Before + | Meets + | Overlaps + | Finished_by + | Contains + | Starts + | Equal + | Started_by + | During + | Finishes + | Overlapped_by + | Met_by + | After +[@@deriving compare, equal, sexp] + +let pp ppf t = Format.fprintf ppf "%a" Sexp.pp @@ sexp_of_t t + +(** Returns the converse of the relation. *) +let converse = function + | Before -> After + | After -> Before + | Meets -> Met_by + | Met_by -> Meets + | Overlaps -> Overlapped_by + | Overlapped_by -> Overlaps + | Starts -> Started_by + | Started_by -> Starts + | Finishes -> Finished_by + | Finished_by -> Finishes + | During -> Contains + | Contains -> During + | Equal -> Equal + +(** The input interval type. *) +module type S = sig + (** A point in the interval. *) + type point + + (** An inclusive interval. + + Invariant: [lo t <= hi t] + *) + type t + + (** The lower-bound of the interval. *) + val lo : t -> point + + (** The upper-bound of the interval. *) + val hi : t -> point + + include Base.Comparisons.Infix with type t := point +end + +(** Create the relations for an interval type. *) +module Make(M : S) = struct + open M + + let before a b = hi a < lo b [@@inline] + let meets a b = hi a = lo b [@@inline] + let overlaps a b = lo a < lo b && hi a > lo b && hi a < hi b [@@inline] + let finished_by a b = lo a < lo b && hi a = hi b [@@inline] + let contains a b = lo a < lo b && hi a > hi b [@@inline] + let starts a b = lo a = lo b && hi a < hi b [@@inline] + let equal a b = lo a = lo b && hi a = hi b [@@inline] + let started_by a b = lo a = lo b && hi a > hi b [@@inline] + let during a b = lo a > lo b && hi a < hi b [@@inline] + let finishes a b = lo a > lo b && hi a = hi b [@@inline] + let overlapped_by a b = lo a > lo b && lo a < hi b && hi a > hi b [@@inline] + let met_by a b = lo a = hi b [@@inline] + let after a b = lo a > hi b [@@inline] + + (** Relates two intervals. *) + let relate a b = + if before a b then Before + else if meets a b then Meets + else if overlaps a b then Overlaps + else if finished_by a b then Finished_by + else if contains a b then Contains + else if starts a b then Starts + else if equal a b then Equal + else if started_by a b then Started_by + else if during a b then During + else if finishes a b then Finishes + else if overlapped_by a b then Overlapped_by + else if met_by a b then Met_by + else After +end diff --git a/src/lib/bitset.ml b/src/lib/bitset.ml new file mode 100644 index 00000000..6fb43afc --- /dev/null +++ b/src/lib/bitset.ml @@ -0,0 +1,45 @@ +open Core +open Regular.Std +open Bitset_intf + +module Make(K : Key) = struct + type t = Z.t [@@deriving compare, equal] + + let sexp_of_t t = Sexp.Atom (Z.to_string t) + + let t_of_sexp = function + | Sexp.Atom s -> Z.of_string s + | x -> + let exn = Failure "Bitset.t_of_sexp: atom needed" in + raise @@ Sexplib.Conv.Of_sexp_error (exn, x) + + let empty = Z.zero + let is_empty t = Z.equal t empty [@@inline] + let singleton k = Z.(one lsl K.to_int k) [@@inline] + let union = Z.logor + let inter = Z.logand + let set t k = union t @@ singleton k [@@inline] + let clear t k = inter t @@ Z.lognot @@ singleton k [@@inline] + let mem t k = Z.testbit t @@ K.to_int k [@@inline] + let init n = Z.(pred (one lsl n)) [@@inline] + + let min_elt_exn t = + if is_empty t then + invalid_arg "Bitset.min_elt_exn: empty set"; + K.of_int @@ Z.trailing_zeros t + + let min_elt t = if is_empty t then None + else Some (K.of_int @@ Z.trailing_zeros t) + + let enum = + let open Seq.Generator in + let pop_min t = Z.(t land pred t) in + let rec go t = + if is_empty t then return () else + let i = min_elt_exn t in + yield i >>= fun () -> + go (pop_min t) in + fun t -> run @@ go t +end + +module Id = Make(Int) diff --git a/src/lib/bitset.mli b/src/lib/bitset.mli new file mode 100644 index 00000000..4b8cec7c --- /dev/null +++ b/src/lib/bitset.mli @@ -0,0 +1,4 @@ +open Bitset_intf + +module Make(K : Key) : S with type key := K.t +module Id : S with type key := int diff --git a/src/lib/bitset_intf.ml b/src/lib/bitset_intf.ml new file mode 100644 index 00000000..08d16fab --- /dev/null +++ b/src/lib/bitset_intf.ml @@ -0,0 +1,59 @@ +open Regular.Std + +module type Key = sig + (** The key. *) + type t + + val to_int : t -> int + val of_int : int -> t +end + +module type S = sig + (** The key for indexing into the set. *) + type key + + (** The bitset. *) + type t = private Z.t [@@deriving compare, equal, sexp] + + (** The empty set. *) + val empty : t + + (** Returns [true] if the set is empty. *) + val is_empty : t -> bool + + (** Returns the singleton set of a key. *) + val singleton : key -> t + + (** Set union. *) + val union : t -> t -> t + + (** Set intersection. *) + val inter : t -> t -> t + + (** Add an element to the set. *) + val set : t -> key -> t + + (** Remove an element from the set. *) + val clear : t -> key -> t + + (** Returns [true] if the element is in the set. *) + val mem : t -> key -> bool + + (** Constructs the set where the first [n] elements + are set. *) + val init : int -> t + + (** Returns the least element of the set. + + @raise Invalid_argument if the set is empty + *) + val min_elt_exn : t -> key + + (** Same as [min_elt_exn], but returns [None] if + the set is empty. *) + val min_elt : t -> key option + + (** Produces the sequence of elements in the set, + from lowest to highest. *) + val enum : t -> key seq +end diff --git a/src/lib/bv_interval.ml b/src/lib/bv_interval.ml index 542cf621..325bee54 100644 --- a/src/lib/bv_interval.ml +++ b/src/lib/bv_interval.ml @@ -347,43 +347,36 @@ let sext t ~size = else create ~lo:(sext t.lo) ~hi:(sext t.hi) ~size let trunc t ~size = - (* This semantics is relaxed. A truncate to the same size should - just be a no-op. *) + (* Same width: no-op. *) if t.size = size then t else if t.size < size then - invalid_arg "trunc: `size` is not strictly less than `t.size`" + invalid_argf + "trunc: `size` %d is not strictly less than `size t` %d" + size t.size () + else if is_empty t then + create_empty ~size + else if is_full t then + create_full ~size + else if is_wrapped_hi t then + (* XXX: this could be more precise *) + create_full ~size else - let module B = (val Bitvec.modular t.size) in - let default un lower_div upper_div = - let lower_div, upper_div = - let lz = bv_clz lower_div t.size in - if t.size - lz > size then - let adj = B.(lower_div land pred (one lsl int Int.(size - 1))) in - B.(lower_div - adj), B.(upper_div - adj) - else lower_div, upper_div in - let w = t.size - bv_clz upper_div t.size in - if w <= size then union (create ~lo:lower_div ~hi:upper_div ~size) un - else if w = size + 1 then - let upper_div = B.(upper_div land (lnot (one lsl int size))) in - if Bv.(upper_div < lower_div) - then union (create ~lo:lower_div ~hi:upper_div ~size) un - else create_full ~size - else create_full ~size in - if is_empty t then create_empty ~size - else if is_full t then create_full ~size - else if is_wrapped_hi t then - let lz = bv_clz t.hi t.size in - if t.size - lz > size - || bv_cto t.hi t.size = size - then create_full ~size - else - let un = create - ~lo:Bv.(max_unsigned_value size) - ~hi:(Bv.extract ~hi:(size - 1) ~lo:0 t.hi) - ~size in - let upper_div = Bv.max_unsigned_value t.size in - if Bv.(t.lo = upper_div) then un else default un t.lo upper_div - else default (create_empty ~size) t.lo t.hi + let lo_big = Bv.to_bigint t.lo in + let hi_big = Bv.to_bigint t.hi in + let modulus_dst = Z.shift_left Z.one size in + let span = Z.sub hi_big lo_big in + if Z.geq span modulus_dst then + create_full ~size + else + let block_lo = Z.div lo_big modulus_dst in + let block_hi_minus1 = Z.div (Z.pred hi_big) modulus_dst in + if Z.equal block_lo block_hi_minus1 then + let lo_mod = Bv.extract ~hi:(size - 1) ~lo:0 t.lo in + let hi_mod = Bv.extract ~hi:(size - 1) ~lo:0 t.hi in + if Bv.(lo_mod = hi_mod) + then create_full ~size + else create ~lo:lo_mod ~hi:hi_mod ~size + else create_full ~size let add t1 t2 = if t1.size <> t2.size then @@ -417,9 +410,9 @@ let sub t1 t2 = else if is_full t1 || is_full t2 then create_full ~size else - let m = Bv.modulus size in - let lo = Bv.(succ ((t1.lo - t2.hi) mod m) mod m) in - let hi = Bv.((t1.hi - t2.lo) mod m) in + let module B = (val Bv.modular size) in + let lo = B.(t1.lo - t2.hi + one) in + let hi = B.(t1.hi - t2.lo) in if Bv.(lo = hi) then create_full ~size else @@ -429,6 +422,14 @@ let sub t1 t2 = then create_full ~size else x +let mul_single t1 t2 = + let size = t1.size in + match single_of t1 with + | Some s when Bv.(s = one) -> Some t2 + | Some s when Bv.(s = (ones mod modulus size)) -> + Some (sub (create_single ~value:Bv.zero ~size) t2) + | _ -> None + let mul t1 t2 = if t1.size <> t2.size then invalid_arg "mul: sizes must be equal" @@ -436,42 +437,46 @@ let mul t1 t2 = let size = t1.size in if is_empty t1 || is_empty t2 then create_empty ~size - else - let min1 = unsigned_min t1 in - let max1 = unsigned_max t1 in - let min2 = unsigned_min t2 in - let max2 = unsigned_max t2 in - let m2 = Bv.modulus (size * 2) in - let result = create - ~lo:Bv.(min1 * min2 mod m2) - ~hi:Bv.(succ (max1 * max2 mod m2) mod m2) - ~size:(size * 2) in - let ur = trunc result ~size in - let m = Bv.modulus size in - if not (is_wrapped_hi ur) - && (Bv.(signed_compare ur.hi zero m) >= 0 || - Bv.(ur.hi = min_signed_value size)) - then ur - else - let min1 = signed_min t1 in - let max1 = signed_max t1 in - let min2 = signed_min t2 in - let max2 = signed_max t2 in - let p = [| - Bv.(min1 * min2 mod m2); - Bv.(min1 * max2 mod m2); - Bv.(max1 * min2 mod m2); - Bv.(max1 * max2 mod m2); - |] in - let compare x y = Bv.signed_compare x y m2 in - let lo = Array.min_elt p ~compare in - let hi = Array.max_elt p ~compare in - let lo, hi = match lo, hi with - | None, _ | _, None -> assert false - | Some lo, Some hi -> (lo, Bv.(succ hi mod m2)) in - let result = create ~lo ~hi ~size:(size * 2) in - let sr = trunc result ~size in - if is_strictly_smaller_than ur sr then ur else sr + else match mul_single t1 t2 with + | Some t -> t + | None -> match mul_single t2 t1 with + | Some t -> t + | None -> + let min1 = unsigned_min t1 in + let max1 = unsigned_max t1 in + let min2 = unsigned_min t2 in + let max2 = unsigned_max t2 in + let m2 = Bv.modulus (size * 2) in + let result = create + ~lo:Bv.(min1 * min2 mod m2) + ~hi:Bv.(succ (max1 * max2 mod m2) mod m2) + ~size:(size * 2) in + let ur = trunc result ~size in + let m = Bv.modulus size in + if not (is_wrapped_hi ur) + && (Bv.(signed_compare ur.hi zero m) >= 0 || + Bv.(ur.hi = min_signed_value size)) + then ur + else + let min1 = signed_min t1 in + let max1 = signed_max t1 in + let min2 = signed_min t2 in + let max2 = signed_max t2 in + let p = [| + Bv.(min1 * min2 mod m2); + Bv.(min1 * max2 mod m2); + Bv.(max1 * min2 mod m2); + Bv.(max1 * max2 mod m2); + |] in + let compare x y = Bv.signed_compare x y m2 in + let lo = Array.min_elt p ~compare in + let hi = Array.max_elt p ~compare in + let lo, hi = match lo, hi with + | None, _ | _, None -> assert false + | Some lo, Some hi -> (lo, Bv.(succ hi mod m2)) in + let result = create ~lo ~hi ~size:(size * 2) in + let sr = trunc result ~size in + if is_strictly_smaller_than ur sr then ur else sr let smax t1 t2 = if t1.size <> t2.size then @@ -848,8 +853,8 @@ let logical_shift_right t1 t2 = let max2 = unsigned_max t2 in let min1 = unsigned_min t1 in let min2 = unsigned_min t2 in - let lo = Bv.(succ ((max1 lsr min2) mod m) mod m) in - let hi = Bv.((min1 lsr max2) mod m) in + let hi = Bv.(succ ((max1 lsr min2) mod m) mod m) in + let lo = Bv.((min1 lsr max2) mod m) in create ~lo ~hi ~size let arithmetic_shift_right t1 t2 = @@ -982,7 +987,7 @@ let ctz_range lo hi size = else if Bv.(lo = zero) then create ~lo:Bv.zero ~hi:(B.int (size + 1)) ~size else - let len = bv_clz B.(lo lxor succ hi) size in + let len = bv_clz B.(lo lxor pred hi) size in let hi = B.int (max (size - len - 1) (bv_ctz lo size) + 1) in create ~size ~lo:Bv.zero ~hi diff --git a/src/lib/dune b/src/lib/dune index 539cebce..4e58fd24 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -20,6 +20,7 @@ core_kernel.pairing_heap core_kernel.uopt graphlib + logs monads regular zarith) @@ -30,6 +31,8 @@ hashmap hashmap_intf isel_internal + interval_tree + interval_tree_intf last_stores live_intf loops @@ -39,8 +42,11 @@ patricia_tree_intf phi_values resolver_intf + scalars + slot_initialization sm subst_mapper + subst_mapper_abi tags var_internal virtual_lexer diff --git a/src/lib/egraph/egraph.ml b/src/lib/egraph/egraph.ml index 04d27c6c..10dc6589 100644 --- a/src/lib/egraph/egraph.ml +++ b/src/lib/egraph/egraph.ml @@ -1,4 +1,5 @@ open Core +open Regular.Std open Virtual include Egraph_common @@ -17,7 +18,7 @@ let init input depth_limit match_limit rules = { typs = Vec.create (); lmoved = Label.Table.create (); imoved = Vec.create (); - pinned = Z.zero; + pinned = Bitset.Id.empty; ilbl = Vec.create (); lval = Label.Table.create (); depth_limit; @@ -29,14 +30,52 @@ let check_ssa fn = if Dict.mem (Func.dict fn) Tags.ssa then Ok () else - Input.E.failf "Expected SSA form for function %s" + Input.E.failf "Expected SSA form for function $%s" (Func.name fn) () +let debug_dump t = + Logs.debug (fun m -> + let pp_lmoved ppf (l, s) = + Format.fprintf ppf " %a: %s%!" Label.pp l + (Iset.to_list s |> + List.to_string ~f:Id.to_string) in + let pp_sep ppf () = Format.fprintf ppf "\n" in + m "%s: $%s lmoved:\n%a%!" + __FUNCTION__ (Func.name t.input.fn) + (Format.pp_print_list ~pp_sep pp_lmoved) + (Hashtbl.to_alist t.lmoved)); + Logs.debug (fun m -> + let pp_lmoved ppf (id, s) = + Format.fprintf ppf " %d: %s%!" id + (Lset.to_list s |> + List.to_string ~f:Label.to_string) in + let pp_sep ppf () = Format.fprintf ppf "\n" in + m "%s: $%s imoved:\n%a%!" + __FUNCTION__ (Func.name t.input.fn) + (Format.pp_print_list ~pp_sep pp_lmoved) + (Vec.to_sequence_mutable t.imoved |> + Seq.mapi ~f:Tuple2.create |> + Seq.to_list)); + Logs.debug (fun m -> + let pp_ilbl ppf (id, l) = + Format.fprintf ppf " %d: %a%!" id + (Format.pp_print_option + ~none:(fun ppf () -> Format.fprintf ppf "") + Label.pp) (Uopt.to_option l) in + let pp_sep ppf () = Format.fprintf ppf "\n" in + m "%s: $%s ilbl:\n%a%!" + __FUNCTION__ (Func.name t.input.fn) + (Format.pp_print_list ~pp_sep pp_ilbl) + (Vec.to_sequence_mutable t.ilbl |> + Seq.mapi ~f:Tuple2.create |> + Seq.to_list)) + let run ?(depth_limit = 6) ?(match_limit = 20) fn tenv rules = let open Context.Syntax in let*? () = check_ssa fn in let*? input = Input.init fn tenv in let t = init input depth_limit match_limit rules in let*? () = Builder.run t in + debug_dump t; let ex = Extractor.init t in Extractor.cfg ex diff --git a/src/lib/egraph/egraph.mli b/src/lib/egraph/egraph.mli index 43775154..5d348352 100644 --- a/src/lib/egraph/egraph.mli +++ b/src/lib/egraph/egraph.mli @@ -55,7 +55,7 @@ type rule type rules (** Compiles the rules. *) -val compile : rule list -> rules +val compile : name:string -> rule list -> rules (** [run fn tenv rules ?depth_limit ?match_limit] constructs an e-graph from a function [fn] and applies the [rules] eagerly to produce a diff --git a/src/lib/egraph/egraph_common.ml b/src/lib/egraph/egraph_common.ml index 89b0a4d8..539c1270 100644 --- a/src/lib/egraph/egraph_common.ml +++ b/src/lib/egraph/egraph_common.ml @@ -107,7 +107,7 @@ type t = { memo : (enode, id) Hashtbl.t; (* The hash-cons for optimized terms. *) lmoved : Iset.t Label.Table.t; (* Set of IDs that were moved to a given label. *) imoved : Lset.t Vec.t; (* Set of labels that were moved for a given ID. *) - mutable pinned : Z.t; (* IDs that should not be rescheduled. *) + mutable pinned : Bitset.Id.t; (* IDs that should not be rescheduled. *) ilbl : Label.t Uopt.t Vec.t; (* Maps IDs to labels. *) lval : id Label.Table.t; (* Maps labels to IDs. *) depth_limit : int; (* Maximum rewrite depth. *) @@ -117,11 +117,11 @@ type t = { type egraph = t -let compile rules = +let compile ~name rules = (* XXX: the previous implementation also reversed the rule order, and now the testsuite relies on that behavior. Maybe the real fix is in changing how the extractor breaks ties. *) - Matcher.compile @@ List.rev_map rules + Matcher.compile ~name @@ List.rev_map rules ~f:(fun r -> r.pre, (r.post, r.subsume)) let find t id = Uf.find t.classes id @@ -140,8 +140,8 @@ let add_moved t id = function let init = Vec.get_exn t.imoved id in Vec.set_exn t.imoved id @@ List.fold ls ~init ~f:Lset.add -let set_pinned t id = t.pinned <- Z.(t.pinned lor (one lsl id)) -let is_pinned t id = Z.testbit t.pinned id +let set_pinned t id = t.pinned <- Bitset.Id.set t.pinned id +let is_pinned t id = Bitset.Id.mem t.pinned id let typeof_var t x = Typecheck.Env.typeof_var t.input.fn x t.input.tenv |> Or_error.ok diff --git a/src/lib/egraph/egraph_input.ml b/src/lib/egraph/egraph_input.ml index 6c479930..20ac64d7 100644 --- a/src/lib/egraph/egraph_input.ml +++ b/src/lib/egraph/egraph_input.ml @@ -12,27 +12,38 @@ module Operands = Set.Make(struct type t = operand [@@deriving compare, sexp] end) -module Phis = Phi_values.Make(struct - type t = Operands.t [@@deriving equal] - let one = Operands.singleton - let join = Set.union - end) +module Phis_lang = struct + module Ctrl = struct + type t = ctrl + let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg +end + +module Phis_domain = struct + type t = Operands.t [@@deriving equal] + let one = Operands.singleton + let join = Set.union +end + +module Phis = Phi_values.Make(Phis_lang)(Phis_domain) (* General information about the function we're translating. *) type t = { - fn : func; (* The function itself. *) - loop : loops; (* Loops analysis. *) - reso : resolver; (* Labels to blocks/insns. *) - cfg : Cfg.t; (* The CFG. *) - dom : Label.t Semi_nca.tree; (* Dominator tree. *) - domd : int Label.Table.t; (* Dominator tree depths. *) - pdom : Label.t Semi_nca.tree; (* Post-dominator tree. *) - rdom : Dominance.t; (* Fine-grained dominance relation. *) - df : Label.t Semi_nca.frontier; (* Dominance frontiers. *) - lst : Last_stores.t; (* Last stores analysis. *) - tenv : Typecheck.env; (* Typing environment. *) - phis : Phis.state; (* Block argument value sets. *) - rpo : Label.t -> int; (* Reverse post-order number. *) + fn : func; (* The function itself. *) + loop : loops; (* Loops analysis. *) + reso : resolver; (* Labels to blocks/insns. *) + cfg : Cfg.t; (* The CFG. *) + dom : Label.t Semi_nca.tree; (* Dominator tree. *) + domd : int Label.Table.t; (* Dominator tree depths. *) + pdom : Label.t Semi_nca.tree; (* Post-dominator tree. *) + rdom : Dominance.t; (* Fine-grained dominance relation. *) + lst : Last_stores.t; (* Last stores analysis. *) + tenv : Typecheck.env; (* Typing environment. *) + phis : Phis.state; (* Block argument value sets. *) + rpo : Label.t -> int; (* Reverse post-order number. *) } let init_dom_relation reso dom = @@ -83,10 +94,9 @@ let init fn tenv = let loop = Loops.analyze ~name:(Func.name fn) cfg in let dom = Semi_nca.compute (module Cfg) cfg Label.pseudoentry in let pdom = Semi_nca.compute (module Cfg) cfg Label.pseudoexit ~rev:true in - let df = Semi_nca.frontier (module Cfg) cfg dom in let rdom = init_dom_relation reso dom in let lst = init_last_stores cfg reso in let domd = dom_depths dom in let phis = Phis.analyze ~blk:(resolve_blk reso) cfg in let rpo = init_rpo cfg in - {fn; loop; reso; cfg; dom; domd; pdom; rdom; df; lst; tenv; phis; rpo} + {fn; loop; reso; cfg; dom; domd; pdom; rdom; lst; tenv; phis; rpo} diff --git a/src/lib/egraph/egraph_rewrite.ml b/src/lib/egraph/egraph_rewrite.ml index bd4df29b..822d232a 100644 --- a/src/lib/egraph/egraph_rewrite.ml +++ b/src/lib/egraph/egraph_rewrite.ml @@ -50,6 +50,9 @@ let union ?ty t id oid = (* Called when a duplicate node is inserted. *) let duplicate ?l t id = + Logs.debug (fun m -> + m "%s: inserted already hash-consed term %d:\n %a%!" + __FUNCTION__ id (Enode.pp ~node:(node t)) (node t id)); Option.iter l ~f:(Sched.duplicate t id); id @@ -109,7 +112,7 @@ and optimize ?ty ?l ~d t n id = | U _ -> assert false | N _ when d <= 0 -> id | N _ when Matcher.is_empty t.rules -> id - | N (o, cs) -> + | N _ -> let vm = VM.create () in let rws = {id; budget = t.match_limit} in if VM.init ~lookup:(node t) vm t.rules id then begin @@ -154,6 +157,14 @@ and rewrite ?ty ?l ~d t rws y = Uf.union t.classes rws.id oid; rws.id <- oid in try + let prev = rws.id in + Logs.debug (fun m -> + let s = Map.to_alist @@ Y.subst y in + m "%s: matched on rule %d for term %d\n pat: %a\n subst: %s%!" + __FUNCTION__ (Y.rule y) prev + Matcher.pp_pat (Y.pat y) + (List.to_string s ~f:(fun (x, id) -> + Format.asprintf "%s=%d" x id))); let env = Map.map (Y.subst y) ~f:(subst_info t) in let action, subsume = Y.payload y in let go env p = check env p; assemble env p in @@ -171,5 +182,10 @@ and rewrite ?ty ?l ~d t rws y = (* Rewrite is OK, integrate with the current e-class. *) let continue = not (subsume || Enode.is_const (node t oid)) in if continue then default oid else optimal oid; + Logs.debug (fun m -> + m "%s: rewrote term %d to %d, continue=%b:\n prev: %a\n curr: %a%!" + __FUNCTION__ prev oid continue + (Enode.pp ~node:(node t)) (node t prev) + (Enode.pp ~node:(node t)) (node t oid)); continue with Mismatch -> true diff --git a/src/lib/egraph/egraph_sched.ml b/src/lib/egraph/egraph_sched.ml index a61984fa..e86bda6c 100644 --- a/src/lib/egraph/egraph_sched.ml +++ b/src/lib/egraph/egraph_sched.ml @@ -49,13 +49,23 @@ let lca t a b = (* Note that `id` must be the canonical e-class. *) let move t old l id = + Logs.debug (fun m -> + let pp_old ppf old = + if List.is_empty old + then Format.fprintf ppf "" + else Format.fprintf ppf "from %s " + (List.to_string ~f:Label.to_string old) in + m "%s: moving term %d %ato %a%!" + __FUNCTION__ id pp_old old Label.pp l); add_moved t id old; set_label t id l; Hashtbl.update t.lmoved l ~f:(function | None -> Iset.singleton id | Some s -> Iset.add s id) -let mark_use t id a = add_moved t id [a] +let mark_use t id a = + Logs.debug (fun m -> m "%s: id=%d, a=%a%!" __FUNCTION__ id Label.pp a); + add_moved t id [a] (* Update when we union two nodes together. Should not be called if both IDs are the same. *) @@ -63,19 +73,30 @@ let merge t a b u = assert (a <> b); let cid = find t a in (* Link the ID to the label, along with the union ID. *) - let link ?p l = + let link ?p dir l = + Logs.debug (fun m -> + m "%s: merge dominated %s: a=%d, b=%d, cid=%d, u=%d, l=%a, p=%a%!" + __FUNCTION__ + (match dir with `left -> "left" | `right -> "right") + a b cid u Label.pp l + (Format.pp_print_option + ~none:(fun ppf () -> Format.fprintf ppf "") + Label.pp) p); Option.iter p ~f:(mark_use t cid); set_label t cid l; set_label t u l in match labelof t a, labelof t b with | None, None -> () - | None, Some pb -> link pb - | Some pa, None -> link pa + | None, Some pb -> link `right pb + | Some pa, None -> link `left pa | Some pa, Some pb when Label.(pa = pb) -> () - | Some pa, Some pb when dominates t ~parent:pb pa -> link pb ~p:pa - | Some pa, Some pb when dominates t ~parent:pa pb -> link pa ~p:pb + | Some pa, Some pb when dominates t ~parent:pb pa -> link `right pb ~p:pa + | Some pa, Some pb when dominates t ~parent:pa pb -> link `left pa ~p:pb | Some pa, Some pb -> let pc = lca t pa pb in + Logs.debug (fun m -> + m "%s: merge LCA: a=%d, b=%d, cid=%d, u=%d, pa=%a, pb=%a, pc=%a%!" + __FUNCTION__ a b cid u Label.pp pa Label.pp pb Label.pp pc); assert (cid = find t b); assert (cid = find t u); clear_label t a; @@ -94,6 +115,10 @@ let rec useof t l : enode -> unit = function useof t l @@ node t c) let default_placement t id l n = + Logs.debug (fun m -> + m "%s: placing term %d at %a:\n node: %a%!" + __FUNCTION__ id Label.pp l + (Enode.pp ~node:(node t)) n); move t [] l id; useof t l n @@ -103,15 +128,28 @@ let duplicate t id a = let cid = find t id in match labelof t cid with | Some b when Label.(b = a) -> () - | Some b when dominates t ~parent:b a -> mark_use t cid a + | Some b when dominates t ~parent:b a -> + Logs.debug (fun m -> + m "%s: %d at %a dominated by previous term %d at %a%!" + __FUNCTION__ id Label.pp a cid Label.pp b); + mark_use t cid a | Some b when dominates t ~parent:a b -> + Logs.debug (fun m -> + m "%s: %d at %a dominates previous term %d at %a%!" + __FUNCTION__ id Label.pp a cid Label.pp b); mark_use t cid b; set_label t id a | Some b -> let c = lca t a b in + Logs.debug (fun m -> + m "%s: %d at %a LCA with previous term %d at %a: %a%!" + __FUNCTION__ id Label.pp a cid Label.pp b Label.pp c); clear_label t cid; move t [a; b] c cid; | None -> + Logs.debug (fun m -> + m "%s: %d at %a has no previous term, cid=%d%!" + __FUNCTION__ id Label.pp a cid); (* This e-class wasn't moved, though it wasn't registered to begin with (even though it was hash-consed). *) default_placement t id a @@ node t id @@ -220,6 +258,11 @@ module Licm = struct let exists_in_loop t parent = List.exists ~f:(is_child_loop ~parent t) let licm_move t l l' lp id lhs = + Logs.debug (fun m -> + m "%s: LICM for term %d: l=%a, l'=%a:\n loop: %a\n node: %a%!" + __FUNCTION__ id Label.pp l Label.pp l' + Loops.pp_data (Loops.get t.input.loop lp) + (Enode.pp ~node:(node t)) (node t id)); let l' = match lhs with | None -> l' | Some x -> match partition_uses t x with diff --git a/src/lib/egraph/extractor/extractor_cfg.ml b/src/lib/egraph/extractor/extractor_cfg.ml index 57c4af4e..2159b57a 100644 --- a/src/lib/egraph/extractor/extractor_cfg.ml +++ b/src/lib/egraph/extractor/extractor_cfg.ml @@ -22,25 +22,24 @@ let empty_scope : scope = Id.Tree.empty type placed = { seq : Label.t Ftree.t; - ids : Z.t; + ids : Bitset.Id.t; } let create_placed id l = { seq = Ftree.singleton l; - ids = Z.(one lsl id); + ids = Bitset.Id.singleton id; } let add_placed p id l = - assert (not @@ Z.testbit p.ids id); { + assert (not @@ Bitset.Id.mem p.ids id); { seq = Ftree.snoc p.seq l; - ids = Z.(p.ids lor (one lsl id)); + ids = Bitset.Id.set p.ids id; } type env = { insn : Insn.op Label.Table.t; ctrl : ctrl Label.Table.t; news : placed Label.Table.t; - closure : Lset.t Label.Table.t; mutable cur : Label.t; mutable scp : scope; } @@ -49,7 +48,6 @@ let init () = { insn = Label.Table.create (); ctrl = Label.Table.create (); news = Label.Table.create(); - closure = Label.Table.create (); cur = Label.pseudoentry; scp = empty_scope; } @@ -93,6 +91,9 @@ let fresh env canon real = let* x = Context.Var.fresh in let+ l = Context.Label.fresh in env.scp <- Id.Tree.set env.scp ~key:canon ~data:(x, l); + Logs.debug (fun m -> + m "%s: placing fresh %a at %a: env.cur=%a canon=%d, real=%d%!" + __FUNCTION__ Var.pp x Label.pp l Label.pp env.cur canon real); Hashtbl.update env.news env.cur ~f:(function | Some p -> add_placed p real l | None -> create_placed real l); @@ -107,14 +108,17 @@ let insn t env a f = let+ op = f x in upd env.insn l op; `var x +[@@specialise] let insn' env l f = let+ op = f () in upd env.insn l op +[@@specialise] let ctrl env l f = let+ c = f () in upd env.ctrl l c +[@@specialise] let sel e x ty c y n = match c with | `var c -> !!(`sel (x, ty, c, y, n)) @@ -122,6 +126,8 @@ let sel e x ty c y n = match c with | `bool false -> !!(`uop (x, `copy ty, n)) | _ -> invalid_pure e +let (let@) x f = x f [@@specialise] [@@inline] + let rec pure t env e : operand Context.t = let pure = pure t env in let insn = insn t env in @@ -130,7 +136,7 @@ let rec pure t env e : operand Context.t = | E (a, Obinop b, [l; r]) -> let* l = pure l in let* r = pure r in - insn a @@ fun x -> + let@ x = insn a in !!(`bop (x, b, l, r)) | E (_, Obool b, []) -> !!(`bool b) | E (_, Ocall (x, _), _) -> !!(`var x) @@ -141,13 +147,13 @@ let rec pure t env e : operand Context.t = let* c = pure c in let* y = pure y in let* n = pure n in - insn a @@ fun x -> + let@ x = insn a in sel e x ty c y n | E (_, Osingle s, []) -> !!(`float s) | E (_, Osym (s, n), []) -> !!(`sym (s, n)) | E (a, Ounop u, [y]) -> let* y = pure y in - insn a @@ fun x -> + let@ x = insn a in !!(`uop (x, u, y)) | E (_, Ovaarg (x, _), [_]) -> !!(`var x) | E (_, Ovar x, []) -> !!(`var x) @@ -257,51 +263,51 @@ let exp t env l e = let* c = pure c in let* y = dst y in let* n = dst n in - ctrl @@ fun () -> + let@ () = ctrl in br l e c y n | E (_, Ocall0 _, [f; args; vargs]) -> let* f = global t env f in let* args = callargs t env args in let* vargs = callargs t env vargs in - insn @@ fun () -> + let@ () = insn in !!(`call (None, f, args, vargs)) | E (_, Ocall (x, ty), [f; args; vargs]) -> let* f = global t env f in let* args = callargs t env args in let* vargs = callargs t env vargs in - insn @@ fun () -> + let@ () = insn in !!(`call (Some (x, ty), f, args, vargs)) | E (_, Oload (x, t), [y]) -> let* y = pure y in - insn @@ fun () -> + let@ () = insn in !!(`load (x, t, y)) | E (_, Ojmp, [d]) -> let* d = dst d in - ctrl @@ fun () -> + let@ () = ctrl in !!(`jmp d) | E (_, Oret, [x]) -> let* x = pure x in - ctrl @@ fun () -> + let@ () = ctrl in !!(`ret (Some x)) | E (_, Oset _, [y]) -> pure y >>| ignore | E (_, Ostore (t, _), [v; x]) -> let* v = pure v in let* x = pure x in - insn @@ fun () -> + let@ () = insn in !!(`store (t, v, x)) | E (_, Osw ty, i :: d :: tbl) -> let* i = pure i in let* d = sw_default t env l d in let* tbl = table t env tbl ty in - ctrl @@ fun () -> + let@ () = ctrl in sw l e ty i d tbl | E (_, Ovaarg (x, t), [a]) -> let* a = pure a in - insn @@ fun () -> + let@ () = insn in vaarg l e x t a | E (_, Ovastart _, [a]) -> let* a = pure a in - insn @@ fun () -> + let@ () = insn in vastart l e a (* The rest are rejected. *) | E (_, Oaddr _, _) @@ -330,38 +336,6 @@ let exp t env l e = | E (_, Ovastart _, _) -> invalid l e module Hoisting = struct - let (++) = Lset.union - let not_pseudo = Fn.non Label.is_pseudo - let descendants t = Semi_nca.Tree.descendants t.eg.input.dom - let dominates t = Semi_nca.Tree.is_descendant_of t.eg.input.dom - let frontier t = Semi_nca.Frontier.enum t.eg.input.df - let to_set = Fn.compose Lset.of_sequence @@ Seq.filter ~f:not_pseudo - - (* Effectively, we are computing the dominator-subtree closure - over the iterated dominance frontier of `l`. - - This helps answer the question of where the computation of a - given instruction is "available" in the CFG. - *) - let rec closure ?(self = true) t env l = - let c = match Hashtbl.find env.closure l with - | Some c -> c - | None -> - let c = - frontier t l |> Seq.filter ~f:not_pseudo |> - (* A block can be in its own dominance frontier, so - we need to avoid an infinite loop. *) - Seq.filter ~f:(Fn.non @@ Label.equal l) |> - (* Additionally, we don't want to follow back-edges. This - can happen when a node in our frontier is, for example, - a loop header. *) - Seq.filter ~f:(fun parent -> not @@ dominates t ~parent l) |> - Seq.map ~f:(closure t env) |> - Seq.fold ~init:(to_set @@ descendants t l) ~f:(++) in - Hashtbl.set env.closure ~key:l ~data:c; - c in - if self then Lset.add c l else Lset.remove c l - (* Try the real ID first before moving on to the canonical ID. This could happen if we rescheduled a newer term before we unioned it with an older term. *) @@ -371,12 +345,14 @@ module Hoisting = struct if id = cid then s else Common.movedof t.eg cid else s + let resolve_label t l = + match Resolver.resolve t.eg.input.reso l with + | Some `insn (_, b, _, _) -> Blk.label b + | Some `blk b -> Blk.label b + | None -> assert false + let moved_blks t id cid = - find_moved t id cid |> Lset.map ~f:(fun l -> - match Resolver.resolve t.eg.input.reso l with - | Some `insn (_, b, _, _) -> Blk.label b - | Some `blk b -> Blk.label b - | None -> assert false) + find_moved t id cid |> Lset.map ~f:(resolve_label t) let rec post_dominated t l bs = match Semi_nca.Tree.parent t.eg.input.pdom l with @@ -385,7 +361,7 @@ module Hoisting = struct (* See if there exists a cycle starting from `l` that does not intersect with `bs`. *) - let exists_bypass t l bs = + let exists_bypass t l id cid bs = let rec loop q = match Stack.pop q with | None -> false | Some (n, vis) when Lset.mem vis n -> @@ -395,28 +371,77 @@ module Hoisting = struct Cfg.Node.succs n t.eg.input.cfg |> Seq.iter ~f:(fun s -> Stack.push q (s, vis)); loop q in - Loops.mem t.eg.input.loop l && - loop (Stack.singleton (l, Lset.empty)) + let res = + Loops.mem t.eg.input.loop l && + loop (Stack.singleton (l, Lset.empty)) in + Logs.debug (fun m -> + m "%s: %a is post-dominated: bs=%s, id=%d, cid=%d, res=%b%!" + __FUNCTION__ Label.pp l + (Lset.to_list bs |> List.to_string ~f:Label.to_string) + id cid res); + res + + (* Given all of the uses we know of, compute the points + where the definition will be killed: + + kills = {k | ∃ u ∈ uses s.t. k ∈ succs(u) ∧ k ∉ uses} + *) + let compute_kills t uses = + Lset.fold uses ~init:Lset.empty ~f:(fun init u -> + Cfg.Node.succs u t.eg.input.cfg |> + Seq.filter ~f:(Fn.non @@ Lset.mem uses) |> + Seq.fold ~init ~f:Lset.add) + + (* See if there exists a path, starting from `l`, that avoids + touching any block in `uses` and either reaches the end of + the function, or one of the blocks in `kills`. + + `kills` is the set of blocks where, if we were to place the + instruction at `l`, its live range would end. + + pre: `kills` and `uses` are disjoint + *) + let is_partial_redundancy_pathwise t l id cid ~uses = + let kills = compute_kills t uses in + let rec loop q = match Stack.pop q with + | None -> false + | Some (n, vis) when Lset.mem vis n -> loop q + | Some (n, _) when Lset.mem uses n -> loop q + | Some (n, _) when Lset.mem kills n -> true + | Some (n, _) when Label.(n = pseudoexit) -> true + | Some (n, vis) -> + let vis = Lset.add vis n in + Cfg.Node.succs n t.eg.input.cfg |> + Seq.iter ~f:(fun s -> Stack.push q (s, vis)); + loop q in + let res = loop @@ Stack.singleton (l, Lset.empty) in + Logs.debug (fun m -> + m "%s: l=%a, id=%d cid=%d, uses=%s, kills=%s, res=%b%!" + __FUNCTION__ Label.pp l id cid + (Lset.to_list uses |> List.to_string ~f:Label.to_string) + (Lset.to_list kills |> List.to_string ~f:Label.to_string) + res); + res (* When we "move" duplicate nodes up to the LCA (lowest common ancestor) in the dominator tree, we might be introducing a partial redundancy. This means that, at the LCA, the node is not going to be used on all paths that are dominated by it, so we need to do a simple analysis to see if this is the case. *) - let is_partial_redundancy t env l id cid = + let is_partial_redundancy t l id cid = (* If this node is deliberately placed here then allow it. *) not (Common.is_pinned t.eg id) && begin + Logs.debug (fun m -> + m "%s: checking %a, id=%d, cid=%d%!" + __FUNCTION__ Label.pp l id cid); (* Get the blocks associated with the labels that were "moved" for this node. *) - let l = match Resolver.resolve t.eg.input.reso l with - | Some `insn (_, b, _, _) -> Blk.label b - | Some `blk _ -> l - | None -> assert false in let bs = moved_blks t id cid in (* An empty set means that nobody uses this value. *) Lset.is_empty bs || begin (* If we're being used in the candidate block then this is trivially not a partial redundancy. *) + let l = resolve_label t l in not (Lset.mem bs l) && begin (* If one of these blocks post-dominates the block that we're moving to, then it is safe to allow the move to happen, @@ -428,28 +453,18 @@ module Hoisting = struct if post_dominated t l bs then (* Check if we can reach the target block via a backedge without visiting any of the blocks we moved from. *) - exists_bypass t l bs + exists_bypass t l id cid bs else - (* For each of these blocks, get its reflexive transitive - closure in the dominator tree, and union them together. *) - let a = Lset.fold bs ~init:Lset.empty - ~f:(fun acc l -> acc ++ closure t env l) in - (* Get the non-reflexive transitive closure of the block - that we moved to. *) - let b = closure t env l ~self:false in - (* If these sets are not equal, then we have a partial - redundancy, and thus need to duplicate code. In the - case where the closure includes our target block `l`, - we want to exclude it, since the closure for `l` will - exclude itself. *) - not @@ Lset.equal (Lset.remove a l) b + (* Check if `bs` intersects on all paths where `id` can + be used. *) + is_partial_redundancy_pathwise t l id cid ~uses:bs end end end - let should_skip t env l id cid = - Z.testbit t.impure cid || - is_partial_redundancy t env l id cid + let should_skip t l id cid = + Bitset.Id.mem t.impure cid || + is_partial_redundancy t l id cid (* If any nodes got moved up to this label, then we should check to see if it is eligible for this code motion optimization. @@ -471,7 +486,10 @@ module Hoisting = struct Context.Seq.iter ~f:(fun (id, cid) -> match extract t id with | None -> extract_fail l id | Some e -> - Context.unless (should_skip t env l id cid) @@ fun () -> + let@ () = Context.unless (should_skip t l id cid) in + Logs.debug (fun m -> + m "%s: id=%d, cid=%d was moved to l=%a, OK to hoist%!" + __FUNCTION__ id cid Label.pp l); pure t env e >>| ignore) end @@ -483,7 +501,7 @@ let reify t env l = | Some id -> match extract t id with | None -> extract_fail l @@ Common.find t.eg id | Some (E (Id {canon; _}, op, args) as e) - when not @@ Z.testbit t.impure canon -> + when not @@ Bitset.Id.mem t.impure canon -> (* There may be an opportunity to "sink" this instruction, which is the dual of the "hoisting" optimization below. Since this is a pure operation, we can wait until it is @@ -492,10 +510,21 @@ let reify t env l = begin match op, args with | Oset _, [E (Id {canon; _}, _, _)] when Common.is_pinned t.eg canon -> + Logs.debug (fun m -> + m "%s: pinned: id=%d, l=%a%!" + __FUNCTION__ id Label.pp l); exp t env l e - | _ -> !!() + | _ -> + Logs.debug (fun m -> + m "%s: delaying CFG extraction of id=%d, l=%a%!" + __FUNCTION__ id Label.pp l); + !!() end - | Some e -> exp t env l e + | Some e -> + Logs.debug (fun m -> + m "%s: eagerly extracting id=%d to l=%a in CFG%!" + __FUNCTION__ id Label.pp l); + exp t env l e (* Rewrite a single instruction. *) let step_insn t env i = @@ -533,8 +562,7 @@ let find_insn env l = Hashtbl.find env.insn l |> Option.map ~f:(Insn.create ~label:l) (* Find any new instructions to be prepended directly before label `l`. - Note that the ordering is, by default, from oldest to newest ID, but - this can be reversed for an efficient `rev_append` as seen below. *) + The order can be reversed for an efficient `rev_append` as seen below. *) let find_news ?(rev = false) env l = Hashtbl.find env.news l |> Option.value_map ~default:[] ~f:(fun p -> diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml index 00e8bf55..596cbf76 100644 --- a/src/lib/egraph/extractor/extractor_core.ml +++ b/src/lib/egraph/extractor/extractor_core.ml @@ -3,14 +3,14 @@ open Egraph_common open Monads.Std open Virtual -(* We store the canonical and real IDs to help us determine - the ordering when reifying back to the CFG representation. - - Canonical IDs help us extract the best term, but the real - ID of the term determines the ordering; in particular, we - order from oldest to newest. This makes sense since the way - we build terms in the e-graph will be such that terms with - a newer ID will always depend on an ID that is older. +(* Keep track of the provenance of extracted nodes. + + We were previously using the real IDs for determining + the order by which we place new instructions, but that + is no longer guaranteed. In principle, lower IDs never + depend on higher IDs to compute their results, but when + we reschedule instructions in the CFG this is no longer + useful. *) type prov = | Label of Label.t @@ -37,28 +37,41 @@ module Cost : sig val pure : int -> t val incr : t -> t val add : t -> t -> t + val opc : t -> Int63.t + val depth : t -> Int63.t + val pp : Format.formatter -> t -> unit end = struct - include Int63 + open Int63 + + type nonrec t = t + + (* We want an unsigned comparison *) + let compare_u a b = compare (a lxor min_value) (b lxor min_value) [@@inline] + let (<) a b = Int.(compare_u a b < 0) [@@inline] let depth_bits = 12 let depth_mask = pred (one lsl depth_bits) let opc_mask = lnot depth_mask - let opc c = c lsr depth_bits - let depth c = c land depth_mask - let create o d = (o lsl depth_bits) lor (d land depth_mask) - let pure o = of_int o lsl depth_bits + let opc c = c lsr depth_bits [@@inline] + let depth c = c land depth_mask [@@inline] + let create o d = (o lsl depth_bits) lor (d land depth_mask) [@@inline] + let pure o = of_int o lsl depth_bits [@@inline] (* Make sure the increment doesn't wrap around. *) let incr c = let d = depth c in (c land opc_mask) lor (if d = depth_mask then d else succ d) + [@@inline] let add x y = let o = opc x + opc y in let d = max (depth x) (depth y) in create o d + [@@inline] + + let pp = pp end type cost = Cost.t @@ -67,7 +80,7 @@ type t = { eg : egraph; table : (cost * enode) Id.Table.t; memo : ext Id.Table.t; - mutable impure : Z.t; + mutable impure : Bitset.Id.t; } let rec pp_ext ppf = function @@ -141,14 +154,28 @@ end = struct | Some _ | None -> term)) end +let debug_dump t = + Logs.debug (fun m -> + let pp ppf (cid, (c, n)) = + Format.fprintf ppf + " %d:\n cost: %a\n depth: %a\n opc: %a\n node: %a%!" + cid Cost.pp c Int63.pp (Cost.depth c) Int63.pp (Cost.opc c) + (Enode.pp ~node:(node t.eg)) n in + m "%s: $%s cost table:\n%a" + __FUNCTION__ (Func.name t.eg.input.fn) + (Format.pp_print_list pp + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")) + (Hashtbl.to_alist t.table)) + let init eg = let t = { eg; table = Id.Table.create (); memo = Id.Table.create (); - impure = Z.zero; + impure = Bitset.Id.empty; } in Saturation.go t; + debug_dump t; t let rec must_remain_fixed op args = match (op : Enode.op) with @@ -184,15 +211,15 @@ let rec must_remain_fixed op args = match (op : Enode.op) with | _ -> false let prov t cid id op args = - if must_remain_fixed op args then begin - t.impure <- Z.(t.impure lor (one lsl cid)); + if must_remain_fixed op args then + let () = t.impure <- Bitset.Id.set t.impure cid in match labelof t.eg cid with | Some l -> Label l | None when id = cid -> Id {canon = cid; real = id} | None -> match labelof t.eg id with | None -> Id {canon = cid; real = id} | Some l -> Label l - end else Id {canon = cid; real = id} + else Id {canon = cid; real = id} module O = Monad.Option diff --git a/src/lib/float_stubs.c b/src/lib/float_stubs.c index d271cb17..f42c7d7f 100644 --- a/src/lib/float_stubs.c +++ b/src/lib/float_stubs.c @@ -12,16 +12,19 @@ #define Float_val(v) (*(float *)(Data_custom_val(v))) -intnat cgen_float32_compare_unboxed(float f, float g) { +intnat +cgen_float32_compare_unboxed(float f, float g) { return (intnat)(f > g) - (intnat)(f < g) + (intnat)(f == f) - (intnat)(g == g); } -int cgen_float32_compare_to_untagged(value f, value g) { +int +cgen_float32_compare_to_untagged(value f, value g) { return cgen_float32_compare_unboxed(Float_val(f), Float_val(g)); } -value cgen_float32_compare(value f, value g) { +value +cgen_float32_compare(value f, value g) { return Val_int(cgen_float32_compare_to_untagged(f, g)); } @@ -30,7 +33,8 @@ value cgen_float32_compare(value f, value g) { - ocaml/runtime/hash.c - ocaml/stdlib/float.ml */ -value cgen_float32_hash(value x) { +value +cgen_float32_hash(value x) { uint32_t h = caml_hash_mix_float(0, Float_val(x)); h ^= h >> 16; h *= 0x85ebca6b; @@ -41,22 +45,23 @@ value cgen_float32_hash(value x) { } static struct custom_operations cgen_float32_custom_ops = { - .identifier = (char *)"cgen_float32_custom_ops", - .finalize = custom_finalize_default, - .compare = cgen_float32_compare_to_untagged, - .hash = cgen_float32_hash, - .serialize = custom_serialize_default, - .deserialize = custom_deserialize_default, - .compare_ext = custom_compare_ext_default, + .identifier = (char *)"cgen_float32_custom_ops", + .finalize = custom_finalize_default, + .compare = cgen_float32_compare_to_untagged, + .hash = cgen_float32_hash, + .serialize = custom_serialize_default, + .deserialize = custom_deserialize_default, + .compare_ext = custom_compare_ext_default, #if OCAML_VERSION_MAJOR >= 4 && OCAML_VERSION_MINOR >= 8 - .fixed_length = NULL, + .fixed_length = NULL, #endif }; #define Alloc_float() \ caml_alloc_custom(&cgen_float32_custom_ops, sizeof(float), 0, 1) -value cgen_float32_of_float(value x) { +value +cgen_float32_of_float(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -64,21 +69,38 @@ value cgen_float32_of_float(value x) { CAMLreturn(f); } -value cgen_float_of_float32(value x) { return caml_copy_double(Float_val(x)); } -value cgen_float32_is_zero(value x) { return Val_bool(Float_val(x) == 0.0f); } -value cgen_float32_is_inf(value x) { return Val_bool(isinf(Float_val(x))); } +value +cgen_float_of_float32(value x) { + return caml_copy_double(Float_val(x)); +} + +value +cgen_float32_is_zero(value x) { + return Val_bool(Float_val(x) == 0.0f); +} + +value +cgen_float32_is_inf(value x) { + return Val_bool(isinf(Float_val(x))); +} -value cgen_float32_is_negative(value x) { +value +cgen_float32_is_negative(value x) { return Val_bool(signbit(Float_val(x))); } -value cgen_float32_is_nan(value x) { return Val_bool(isnan(Float_val(x))); } +value +cgen_float32_is_nan(value x) { + return Val_bool(isnan(Float_val(x))); +} -value cgen_float32_is_unordered(value x, value y) { +value +cgen_float32_is_unordered(value x, value y) { return Val_bool(isunordered(Float_val(x), Float_val(y))); } -value cgen_float32_add(value x, value y) { +value +cgen_float32_add(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -86,7 +108,8 @@ value cgen_float32_add(value x, value y) { CAMLreturn(f); } -value cgen_float32_div(value x, value y) { +value +cgen_float32_div(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -94,7 +117,8 @@ value cgen_float32_div(value x, value y) { CAMLreturn(f); } -value cgen_float32_mul(value x, value y) { +value +cgen_float32_mul(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -102,7 +126,8 @@ value cgen_float32_mul(value x, value y) { CAMLreturn(f); } -value cgen_float32_neg(value x) { +value +cgen_float32_neg(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -110,7 +135,8 @@ value cgen_float32_neg(value x) { CAMLreturn(f); } -value cgen_float32_sub(value x, value y) { +value +cgen_float32_sub(value x, value y) { CAMLparam2(x, y); CAMLlocal1(f); f = Alloc_float(); @@ -118,7 +144,8 @@ value cgen_float32_sub(value x, value y) { CAMLreturn(f); } -value cgen_bits_of_float32(value x) { +value +cgen_bits_of_float32(value x) { CAMLparam1(x); CAMLlocal1(i); float f = Float_val(x); @@ -126,7 +153,8 @@ value cgen_bits_of_float32(value x) { CAMLreturn(i); } -value cgen_float32_of_bits(value x) { +value +cgen_float32_of_bits(value x) { CAMLparam1(x); CAMLlocal1(f); uint32_t i = Int32_val(x); @@ -135,32 +163,48 @@ value cgen_float32_of_bits(value x) { CAMLreturn(f); } -value cgen_int8_of_float32(value x) { return Val_int((int8_t)Float_val(x)); } -value cgen_int16_of_float32(value x) { return Val_int((int16_t)Float_val(x)); } +value +cgen_int8_of_float32(value x) { + return Val_int((int8_t)Float_val(x)); +} -value cgen_int32_of_float32(value x) { +value +cgen_int16_of_float32(value x) { + return Val_int((int16_t)Float_val(x)); +} + +value +cgen_int32_of_float32(value x) { return caml_copy_int32((int32_t)Float_val(x)); } -value cgen_int64_of_float32(value x) { +value +cgen_int64_of_float32(value x) { return caml_copy_int64((int64_t)Float_val(x)); } -value cgen_uint8_of_float32(value x) { return Val_int((uint8_t)Float_val(x)); } +value +cgen_uint8_of_float32(value x) { + return Val_int((uint8_t)Float_val(x)); +} -value cgen_uint16_of_float32(value x) { +value +cgen_uint16_of_float32(value x) { return Val_int((uint16_t)Float_val(x)); } -value cgen_uint32_of_float32(value x) { +value +cgen_uint32_of_float32(value x) { return caml_copy_int32((uint32_t)Float_val(x)); } -value cgen_uint64_of_float32(value x) { +value +cgen_uint64_of_float32(value x) { return caml_copy_int64((uint64_t)Float_val(x)); } -value cgen_float32_of_int8(value x) { +value +cgen_float32_of_int8(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -168,7 +212,8 @@ value cgen_float32_of_int8(value x) { CAMLreturn(f); } -value cgen_float32_of_int16(value x) { +value +cgen_float32_of_int16(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -176,7 +221,8 @@ value cgen_float32_of_int16(value x) { CAMLreturn(f); } -value cgen_float32_of_int32(value x) { +value +cgen_float32_of_int32(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -184,7 +230,8 @@ value cgen_float32_of_int32(value x) { CAMLreturn(f); } -value cgen_float32_of_int64(value x) { +value +cgen_float32_of_int64(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -192,7 +239,8 @@ value cgen_float32_of_int64(value x) { CAMLreturn(f); } -value cgen_float32_of_uint8(value x) { +value +cgen_float32_of_uint8(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -200,7 +248,8 @@ value cgen_float32_of_uint8(value x) { CAMLreturn(f); } -value cgen_float32_of_uint16(value x) { +value +cgen_float32_of_uint16(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -208,7 +257,8 @@ value cgen_float32_of_uint16(value x) { CAMLreturn(f); } -value cgen_float32_of_uint32(value x) { +value +cgen_float32_of_uint32(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -216,7 +266,8 @@ value cgen_float32_of_uint32(value x) { CAMLreturn(f); } -value cgen_float32_of_uint64(value x) { +value +cgen_float32_of_uint64(value x) { CAMLparam1(x); CAMLlocal1(f); f = Alloc_float(); @@ -224,7 +275,8 @@ value cgen_float32_of_uint64(value x) { CAMLreturn(f); } -value cgen_string_of_float32(value x) { +value +cgen_string_of_float32(value x) { CAMLparam1(x); CAMLlocal1(s); char buf[MAX_FLOAT_DIGITS] = {0}; @@ -233,27 +285,33 @@ value cgen_string_of_float32(value x) { CAMLreturn(s); } -value cgen_float32_equal(value x, value y) { +value +cgen_float32_equal(value x, value y) { return Val_bool(Float_val(x) == Float_val(y)); } -value cgen_float32_ge(value x, value y) { +value +cgen_float32_ge(value x, value y) { return Val_bool(Float_val(x) >= Float_val(y)); } -value cgen_float32_gt(value x, value y) { +value +cgen_float32_gt(value x, value y) { return Val_bool(Float_val(x) > Float_val(y)); } -value cgen_float32_le(value x, value y) { +value +cgen_float32_le(value x, value y) { return Val_bool(Float_val(x) <= Float_val(y)); } -value cgen_float32_lt(value x, value y) { +value +cgen_float32_lt(value x, value y) { return Val_bool(Float_val(x) < Float_val(y)); } -value cgen_bits_of_float(value x) { +value +cgen_bits_of_float(value x) { CAMLparam1(x); CAMLlocal1(d); double f = Double_val(x); @@ -261,7 +319,8 @@ value cgen_bits_of_float(value x) { CAMLreturn(d); } -value cgen_float_of_bits(value x) { +value +cgen_float_of_bits(value x) { CAMLparam1(x); CAMLlocal1(f); uint64_t i = Int64_val(x); @@ -269,88 +328,115 @@ value cgen_float_of_bits(value x) { CAMLreturn(f); } -value cgen_int8_of_float(value x) { return Val_int((int8_t)Double_val(x)); } -value cgen_int16_of_float(value x) { return Val_int((int16_t)Double_val(x)); } +value +cgen_int8_of_float(value x) { + return Val_int((int8_t)Double_val(x)); +} -value cgen_int32_of_float(value x) { +value +cgen_int16_of_float(value x) { + return Val_int((int16_t)Double_val(x)); +} + +value +cgen_int32_of_float(value x) { return caml_copy_int32((int32_t)Double_val(x)); } -value cgen_int64_of_float(value x) { +value +cgen_int64_of_float(value x) { return caml_copy_int64((int64_t)Double_val(x)); } -value cgen_uint8_of_float(value x) { return Val_int((uint8_t)Double_val(x)); } -value cgen_uint16_of_float(value x) { return Val_int((uint16_t)Double_val(x)); } +value +cgen_uint8_of_float(value x) { + return Val_int((uint8_t)Double_val(x)); +} +value +cgen_uint16_of_float(value x) { + return Val_int((uint16_t)Double_val(x)); +} -value cgen_uint32_of_float(value x) { +value +cgen_uint32_of_float(value x) { return caml_copy_int32((uint32_t)Double_val(x)); } -value cgen_uint64_of_float(value x) { +value +cgen_uint64_of_float(value x) { return caml_copy_int64((uint64_t)Double_val(x)); } -value cgen_float_of_int8(value x) { +value +cgen_float_of_int8(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int8_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_int16(value x) { +value +cgen_float_of_int16(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int16_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_int32(value x) { +value +cgen_float_of_int32(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int32_t)Int32_val(x))); CAMLreturn(f); } -value cgen_float_of_int64(value x) { +value +cgen_float_of_int64(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((int64_t)Int64_val(x))); CAMLreturn(f); } -value cgen_float_of_uint8(value x) { +value +cgen_float_of_uint8(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint8_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_uint16(value x) { +value +cgen_float_of_uint16(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint16_t)Int_val(x))); CAMLreturn(f); } -value cgen_float_of_uint32(value x) { +value +cgen_float_of_uint32(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint32_t)Int32_val(x))); CAMLreturn(f); } -value cgen_float_of_uint64(value x) { +value +cgen_float_of_uint64(value x) { CAMLparam1(x); CAMLlocal1(f); f = caml_copy_double((double)((uint64_t)Int64_val(x))); CAMLreturn(f); } -value cgen_float_is_unordered(value x, value y) { +value +cgen_float_is_unordered(value x, value y) { return Val_bool(isunordered(Double_val(x), Double_val(y))); } -value cgen_float_is_ordered(value x, value y) { +value +cgen_float_is_ordered(value x, value y) { return Val_bool(!isunordered(Double_val(x), Double_val(y))); } diff --git a/src/lib/hashmap.ml b/src/lib/hashmap.ml index 297f6267..b069c846 100644 --- a/src/lib/hashmap.ml +++ b/src/lib/hashmap.ml @@ -4,8 +4,8 @@ open Regular.Std module Make(K : Hashtbl.Key) = struct module M = Map.Make(K) module P = Patricia_tree.Make(struct - include Int63 - let size = 63 + include Int + let size = Sys.int_size_in_bits end) type +'a t = 'a M.t P.t @@ -21,10 +21,8 @@ module Make(K : Hashtbl.Key) = struct let empty = P.empty let is_empty = P.is_empty - let hash' k = Int63.of_int @@ K.hash k - let find_exn t k = - hash' k |> P.find_exn t |> + K.hash k |> P.find_exn t |> Fn.flip Map.find k |> function | None -> raise Not_found | Some v -> v @@ -35,17 +33,17 @@ module Make(K : Hashtbl.Key) = struct let mem t k = try ignore (find_exn t k); true with | Not_found -> false - let singleton k v = P.singleton (hash' k) (M.singleton k v) + let singleton k v = P.singleton (K.hash k) (M.singleton k v) - let set t ~key ~data = P.update t (hash' key) ~f:(function + let set t ~key ~data = P.update t (K.hash key) ~f:(function | Some m -> Map.set m ~key ~data | None -> M.singleton key data) - let add_multi t ~key ~data = P.update t (hash' key) ~f:(function + let add_multi t ~key ~data = P.update t (K.hash key) ~f:(function | Some m -> Map.add_multi m ~key ~data | None -> M.singleton key [data]) - let add_exn t ~key ~data = P.update t (hash' key) ~f:(function + let add_exn t ~key ~data = P.update t (K.hash key) ~f:(function | None -> M.singleton key data | Some m -> match Map.add m ~key ~data with | `Duplicate -> raise Duplicate @@ -54,23 +52,23 @@ module Make(K : Hashtbl.Key) = struct let add t ~key ~data = try `Ok (add_exn t ~key ~data) with | Duplicate -> `Duplicate - let remove t k = P.change t (hash' k) + let remove t k = P.change t (K.hash k) ~f:(Option.bind ~f:(fun m -> let m' = Map.remove m k in Option.some_if (not @@ Map.is_empty m') m')) - let update t k ~f = P.update t (hash' k) ~f:(function + let update t k ~f = P.update t (K.hash k) ~f:(function | None -> M.singleton k @@ f None | Some m -> Map.update m k ~f) let update_with t k ~has ~nil = - P.update_with t (hash' k) + P.update_with t (K.hash k) ~nil:(fun () -> M.singleton k @@ nil ()) ~has:(fun m -> Map.update m k ~f:(function | Some v -> has v | None -> nil ())) - let change t k ~f = P.change t (hash' k) ~f:(function + let change t k ~f = P.change t (K.hash k) ~f:(function | None -> f None |> Option.map ~f:(fun v -> M.singleton k v) | Some m -> let m' = Map.change m k ~f in diff --git a/src/lib/interval_tree.ml b/src/lib/interval_tree.ml new file mode 100644 index 00000000..ac48b0b9 --- /dev/null +++ b/src/lib/interval_tree.ml @@ -0,0 +1,270 @@ +(* Implementation borrowed from BAP *) + +open Core +open Regular.Std +open Interval_tree_intf + +module Make(I : Interval) = struct + type key = I.t [@@deriving compare, sexp] + type point = I.point [@@deriving compare, sexp] + + module P = Comparable.Make_plain(struct + type t = point [@@deriving compare, sexp] + end) + + type +'a node = { + rhs : 'a t; + lhs : 'a t; + key : key; + data : 'a; + height : int; + greatest : point; + least : point; + } [@@deriving fields, sexp] + + and +'a t = 'a node option [@@deriving sexp] + + let height = Option.value_map ~default:0 ~f:height + let least = Option.map ~f:least + let greatest = Option.map ~f:greatest + let empty = None + + let bound f lhs top rhs = + Option.merge lhs rhs ~f |> + Option.value_map ~default:top ~f:(f top) + + let create lhs key data rhs = + let hl = height lhs and hr = height rhs in + let mn = I.lower key and mx = I.upper key in + let ll = least lhs and lr = least rhs in + let gl = greatest lhs and gr = greatest rhs in + Some { + lhs; rhs; key; data; + height = max hl hr + 1; + least = bound P.min ll mn lr; + greatest = bound P.max gl mx gr; + } + + let singleton key data = create None key data None + + let rec min_binding = function + | Some {lhs = None; key; data; _} -> Some (key, data) + | Some {lhs; _} -> min_binding lhs + | None -> None + + let rec max_binding = function + | Some {rhs = None; key; data; _} -> Some (key, data) + | Some {rhs; _} -> max_binding rhs + | None -> None + + let bal l x d r = + let hl, hr = height l, height r in + if hl > hr + 2 then match l with + | None -> assert false + | Some t when height t.lhs >= height t.rhs -> + create t.lhs t.key t.data (create t.rhs x d r) + | Some t -> match t.rhs with + | None -> assert false + | Some rhs -> + create (create t.lhs t.key t.data rhs.lhs) + rhs.key rhs.data (create rhs.rhs x d r) + else if hr > hl + 2 then match r with + | None -> assert false + | Some t when height t.rhs >= height t.lhs -> + create (create l x d t.lhs) t.key t.data t.rhs + | Some t -> match t.lhs with + | None -> assert false + | Some lhs -> + create (create l x d lhs.lhs) lhs.key lhs.data + (create lhs.rhs t.key t.data t.rhs) + else create l x d r + + let rec add map key data = match map with + | None -> singleton key data + | Some t -> + let c = I.compare key t.key in + if c = 0 then bal map key data None + else if c < 0 + then bal (add t.lhs key data) t.key t.data t.rhs + else bal t.lhs t.key t.data (add t.rhs key data) + + let is_inside key p = + let low = I.lower key and high = I.upper key in + P.between ~low ~high p + + let lookup start a = + let open Seq.Generator in + let rec go = function + | None -> return () + | Some t when P.(a < t.least || a > t.greatest) -> return () + | Some t -> + if is_inside t.key a then + go t.lhs >>= fun () -> + yield (t.key, t.data) >>= fun () -> + go t.rhs + else + go t.lhs >>= fun () -> + go t.rhs in + start |> go |> run + + let is_dominated t r = + let open I in + P.(lower r >= lower t.key && upper r <= upper t.key) + + let has_intersections t r = + let open I in + let open P in + let p, q = lower t.key, upper t.key + and x, y = lower r, upper r in + if p <= x + then x <= q && p <= y + else p <= y && x <= q + + let can't_be_in_tree t r = + let open I in + P.(lower r > t.greatest || upper r < t.least) + + let can't_be_dominated t r = + let open I in + P.(lower r < t.least || upper r > t.greatest) + + let query ~skip_if ~take_if start r = + let open Seq.Generator in + let rec go = function + | None -> return () + | Some t when skip_if t r -> return () + | Some t when take_if t r -> + go t.lhs >>= fun () -> + yield (t.key, t.data) >>= fun () -> + go t.rhs + | Some t -> + go t.lhs >>= fun () -> + go t.rhs in + start |> go |> run + + let dominators m r = query m r + ~skip_if:can't_be_dominated + ~take_if:is_dominated + + let intersections m r = query m r + ~skip_if:can't_be_in_tree + ~take_if:has_intersections + + let dominates m r = not (Seq.is_empty (dominators m r)) + let intersects m r = not (Seq.is_empty (intersections m r)) + let contains m a = not (Seq.is_empty (lookup m a)) + + let[@tail_mod_cons] rec map m ~f = match m with + | None -> None + | Some m -> Some { + m with + lhs = (map[@tailcall false]) m.lhs ~f; + data = f m.data; + rhs = (map[@tailcall]) m.rhs ~f; + } + + let[@tail_mod_cons] rec mapi m ~f = match m with + | None -> None + | Some m -> Some { + m with + lhs = (mapi[@tailcall false]) m.lhs ~f; + data = f m.key m.data; + rhs = (mapi[@tailcall]) m.rhs ~f; + } + + let rec remove_min_binding = function + | Some {lhs = None; rhs; _} -> rhs + | Some t -> bal (remove_min_binding t.lhs) t.key t.data t.rhs + | None -> assert false + + let splice t1 t2 = match t1,t2 with + | None, t | t, None -> t + | _ -> match min_binding t2 with + | Some (key, data) -> bal t1 key data (remove_min_binding t2) + | None -> assert false + + let mem_equal x y = + let open I in + P.(lower x = lower y && upper x = upper y) + + let rec remove map mem = match map with + | None -> None + | Some t when mem_equal t.key mem -> + splice (remove t.lhs mem) (remove t.rhs mem) + | Some t -> match I.compare mem t.key with + | 1 -> bal t.lhs t.key t.data (remove t.rhs mem) + | _ -> bal (remove t.lhs mem) t.key t.data t.rhs + + let remove_if ~leave_if ~remove_if map mem = + let rec remove = function + | None -> None + | Some t when leave_if t mem -> Some t + | Some t when remove_if t mem -> + splice (remove t.lhs) (remove t.rhs) + | Some t -> + bal (remove t.lhs) t.key t.data (remove t.rhs) in + remove map + + let remove_intersections map mem = remove_if map mem + ~leave_if:can't_be_in_tree + ~remove_if:has_intersections + + let remove_dominators map mem = remove_if map mem + ~leave_if:can't_be_dominated + ~remove_if:is_dominated + + let filter_mapi map ~f = + let rec fmap = function + | None -> None + | Some t -> match f t.key t.data with + | None -> splice (fmap t.lhs) (fmap t.rhs) + | Some data -> + bal (fmap t.lhs) t.key data (fmap t.rhs) in + fmap map + + let filter_map map ~f = filter_mapi map ~f:(fun _ x -> f x) + let filter map ~f : _ t = filter_map map ~f:(fun x -> Option.some_if (f x) x) + + let to_sequence start = + let open Seq.Generator in + let rec go = function + | None -> return () + | Some t -> + go t.lhs >>= fun () -> + yield (t.key, t.data) >>= fun () -> + go t.rhs in + start |> go |> run + + module C = Container.Make(struct + type 'a t = 'a node option + + let rec fold m ~init ~f = Option.fold m ~init:init ~f:(fun acc m -> + fold m.rhs ~init:(f (fold m.lhs ~init:acc ~f) m.data) ~f) + + let rec iter m ~f = Option.iter m ~f:(fun m -> + iter m.lhs ~f; + f m.data; + iter m.rhs ~f) + + let iter = `Custom iter + let length = `Define_using_fold + end) + + let fold = C.fold + let count = C.count + let sum = C.sum + let iter = C.iter + let length = C.length + let is_empty = C.is_empty + let exists = C.exists + let mem = C.mem + let for_all = C.for_all + let find_map = C.find_map + let find = C.find + let to_list = C.to_list + let to_array = C.to_array + let min_elt = C.min_elt + let max_elt = C.max_elt + let fold_until = C.fold_until + let fold_result = C.fold_result +end diff --git a/src/lib/interval_tree.mli b/src/lib/interval_tree.mli new file mode 100644 index 00000000..7fde5076 --- /dev/null +++ b/src/lib/interval_tree.mli @@ -0,0 +1,5 @@ +open Interval_tree_intf + +module Make(I : Interval) : S + with type key := I.t + and type point := I.point diff --git a/src/lib/interval_tree_intf.ml b/src/lib/interval_tree_intf.ml new file mode 100644 index 00000000..c59e7701 --- /dev/null +++ b/src/lib/interval_tree_intf.ml @@ -0,0 +1,40 @@ +open Core +open Regular.Std + +module type Interval = sig + type t [@@deriving compare, sexp] + type point [@@deriving compare, sexp] + val lower : t -> point + val upper : t -> point +end + +module type S = sig + type 'a t [@@deriving sexp_of] + type key + type point + + val empty : 'a t + val singleton : key -> 'a -> 'a t + val least : 'a t -> point option + val greatest : 'a t -> point option + val min_binding : 'a t -> (key * 'a) option + val max_binding : 'a t -> (key * 'a) option + val add : 'a t -> key -> 'a -> 'a t + val dominators : 'a t -> key -> (key * 'a) seq + val intersections : 'a t -> key -> (key * 'a) seq + val intersects : 'a t -> key -> bool + val dominates : 'a t -> key -> bool + val contains : 'a t -> point -> bool + val lookup : 'a t -> point -> (key * 'a) seq + val map : 'a t -> f:('a -> 'b) -> 'b t + val mapi : 'a t -> f:(key -> 'a -> 'b) -> 'b t + val filter : 'a t -> f:('a -> bool) -> 'a t + val filter_map : 'a t -> f:('a -> 'b option) -> 'b t + val filter_mapi : 'a t -> f:(key -> 'a -> 'b option) -> 'b t + val remove : 'a t -> key -> 'a t + val remove_intersections : 'a t -> key -> 'a t + val remove_dominators : 'a t -> key -> 'a t + val to_sequence : 'a t -> (key * 'a) seq + + include Container.S1 with type 'a t := 'a t +end diff --git a/src/lib/isel/isel.ml b/src/lib/isel/isel.ml index 15b45f8c..93a169ec 100644 --- a/src/lib/isel/isel.ml +++ b/src/lib/isel/isel.ml @@ -34,12 +34,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct fn; node = Vec.create (); typs = Vec.create (); + id2r = Vec.create (); cfg; dom = Semi_nca.compute (module Cfg) cfg Label.pseudoentry; rpo = init_rpo cfg; blks = Func.map_of_blks fn; v2id = Var.Table.create (); - id2r = Id.Table.create (); insn = Label.Table.create (); extra = Label.Table.create (); frame = needs_stack_frame fn; diff --git a/src/lib/isel/isel_builder.ml b/src/lib/isel/isel_builder.ml index d56bf05b..4b7512ff 100644 --- a/src/lib/isel/isel_builder.ml +++ b/src/lib/isel/isel_builder.ml @@ -21,20 +21,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct "In Isel_builder.reg: invalid register %s in function $%s" r (Func.name t.fn) () - let var t x = match Hashtbl.find t.v2id x with + let var t x = match getvar t x with | Some id -> !!id | None -> C.failf "In Isel_builder.var: unbound variable %a in function $%s" Var.pp x (Func.name t.fn) () - let new_var t x ty = Hashtbl.find_or_add t.v2id x ~default:(fun () -> - let v = Rv.var (regcls ty) x in - let id = new_node ~ty t @@ Rv v in - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:v; - id) - + let newvar = newvar ~f:Rv.var let word = (Target.word M.target :> ty) let typeof_operand t : Virtual.operand -> ty C.t = function @@ -122,7 +116,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct | src -> src in let emit ?i dst src = let+ ty, id = operand' t @@ rewrite src in - let n = N (Omove, [new_var t dst ty; id]) in + let n = N (Omove, [newvar t dst ty; id]) in ignore @@ new_node ~l t n; Option.iter i ~f:(fun i -> status.(i) <- `moved) in let rec move_one i = @@ -168,7 +162,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct moves to be inserted into. *) let* l', ld' = if br then let+ l' = C.Label.fresh in - Hashtbl.add_multi t.extra ~key:l ~data:l'; + addextra t l l'; l', l' else !!(l, ld) in let+ () = windmill t l' moves in @@ -189,17 +183,38 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct | #Virtual.global as g -> global t g | #Virtual.local as loc -> local ~br t l loc >>| snd + let eval_binop o a b = match a, b with + | `int (a, _), `int (b, _) -> + (Eval.binop_int o a b :> Virtual.const option) + | `float a, `float b -> + (Eval.binop_single o a b :> Virtual.const option) + | `double a, `double b -> + (Eval.binop_double o a b :> Virtual.const option) + | _ -> None + + let eval_unop o = function + | `int (a, ty) -> + (Eval.unop_int o a ty :> Virtual.const option) + | `float a -> + (Eval.unop_single o a :> Virtual.const option) + | `double a -> + (Eval.unop_double o a :> Virtual.const option) + | _ -> None + let binop t l x o a b = - let* a = operand t a in - let+ b = operand t b in - let n = N (Obinop o, [a; b]) in let ty = infer_ty_binop o in - let id = new_node ~ty t n in + let+ id = match eval_binop o a b with + | Some c -> !!(constant t c) + | None -> + let* a = operand t a in + let+ b = operand t b in + let n = N (Obinop o, [a; b]) in + new_node ~ty t n in let r = Rv.var (regcls ty) x in let rid = new_node ~ty t @@ Rv r in ignore @@ new_node ~l t @@ N (Omove, [rid; id]); - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:r + setvar t x id; + setrv t id r let unop t l x o a = let* () = match o with @@ -208,19 +223,20 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct "In Isel_builder.unop: uitof is not supported by target %a" Target.pp M.target () | _ -> !!() in - let+ a = operand t a in let ty = infer_ty_unop o in - (* Copy propagation *) - let id = match o with - | `copy _ -> a - | _ -> - let n = N (Ounop o, [a]) in - new_node ~ty t n in + let+ id = match eval_unop o a with + | Some c -> !!(constant t c) + | None -> match o with + | `copy _ -> operand t a (* copy propagation *) + | _ -> + let+ a = operand t a in + let n = N (Ounop o, [a]) in + new_node ~ty t n in let r = Rv.var (regcls ty) x in let rid = new_node ~ty t @@ Rv r in ignore @@ new_node ~l t @@ N (Omove, [rid; id]); - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:r + setvar t x id; + setrv t id r let sel t l x ty c y n = let* c = var t c in @@ -232,8 +248,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let r = Rv.var (regcls ty) x in let rid = new_node ~ty t @@ Rv r in ignore @@ new_node ~l t @@ N (Omove, [rid; id]); - Hashtbl.set t.v2id ~key:x ~data:id; - Hashtbl.set t.id2r ~key:id ~data:r + setvar t x id; + setrv t id r let call_args_stack_size t l f args = C.List.fold args ~init:0 ~f:(fun sz -> function @@ -282,7 +298,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let+ r = reg t r in let ty = (ty :> ty) in let rid = new_node ~ty t @@ Rv (Rv.reg r) in - let xid = new_var t x ty in + let xid = newvar t x ty in let n = N (Omove, [xid; rid]) in ignore @@ new_node ~l t n @@ -317,7 +333,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let ty' = (ty :> ty) in let n = N (Oload ty, [a]) in let lid = new_node ~ty:ty' t n in - let vid = new_var t x ty' in + let vid = newvar t x ty' in (* TODO: see if we can do a pessimistic alias analysis to forward the `Oload` node where this var appears, where possible. *) ignore @@ new_node ~l t @@ N (Omove, [vid; lid]) @@ -333,7 +349,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let+ r = reg t r in let ty = (ty :> ty) in let rid = new_node ~ty t @@ Rv (Rv.reg r) in - let xid = new_var t x ty in + let xid = newvar t x ty in let n = N (Omove, [xid; rid]) in ignore @@ new_node ~l t n @@ -356,7 +372,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let stkargs t l x = assert t.frame; let rid = new_node ~ty:word t @@ Rv (Rv.reg R.fp) in - let xid = new_var t x word in + let xid = newvar t x word in let w = Target.word M.target in let off = Bv.(int M.stack_args_offset mod modulus (Type.sizeof_imm_base w)) in let oid = new_node ~ty:word t @@ N (Oint (off, (w :> Type.imm)), []) in @@ -402,7 +418,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let sw t l ty i d tbl = let ty' = (ty :> ty) in let i = match i with - | `var x -> new_var t x ty' + | `var x -> newvar t x ty' | `sym (s, o) -> new_node ~ty:ty' t @@ N (Osym (s, o), []) in let* d, _ = local ~br:true t l d in let+ tbl = @@ -423,7 +439,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let+ r = reg t r in let ty = (ty :> ty) in let rid = new_node ~ty t @@ Rv (Rv.reg r) in - let xid = new_var t x ty in + let xid = newvar t x ty in ignore @@ new_node ~l t @@ N (Omove, [xid; rid]) let stkparam t l (x, o, ty) = @@ -431,7 +447,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let w = Target.word M.target in let wi = (w :> Type.imm) in let wb = (w :> Type.basic) in - let xid = new_var t x ty' in + let xid = newvar t x ty' in (* Use the frame pointer. It will make our lives much easier. *) let rid = new_node ~ty:word t @@ Rv (Rv.reg R.fp) in let o' = o + M.stack_args_offset in @@ -449,7 +465,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct at that point we will have more information on how to lay out the stack. *) let slot t _l s = - let _sid = new_var t (Slot.var s) word in + let _sid = newvar t (Slot.var s) word in () let step t l = match Label.Tree.find t.blks l with diff --git a/src/lib/isel/isel_common.ml b/src/lib/isel/isel_common.ml index 55db348d..654617dc 100644 --- a/src/lib/isel/isel_common.ml +++ b/src/lib/isel/isel_common.ml @@ -23,12 +23,12 @@ type 'r t = { fn : func; node : 'r node Vec.t; typs : ty Uopt.t Vec.t; + id2r : 'r Uopt.t Vec.t; cfg : Cfg.t; dom : Label.t Semi_nca.tree; rpo : Label.t -> int; blks : blk Label.Tree.t; v2id : Id.t Var.Table.t; - id2r : 'r Id.Table.t; insn : Id.t list Label.Table.t; extra : Label.t list Label.Table.t; frame : bool; @@ -41,12 +41,24 @@ let new_node ?l ?ty t n : Id.t = assert (id = Vec.length t.typs); Vec.push t.node n; Vec.push t.typs @@ Uopt.of_option ty; + Vec.push t.id2r Uopt.none; Option.iter l ~f:(fun key -> Hashtbl.add_multi t.insn ~key ~data:id); id let node t id = Vec.get_exn t.node id let typeof t id = Uopt.to_option @@ Vec.get_exn t.typs id +let setvar t x id = Hashtbl.set t.v2id ~key:x ~data:id +let getvar t x = Hashtbl.find t.v2id x +let setrv t id r = Vec.set_exn t.id2r id @@ Uopt.some r +let getrv t id = Uopt.to_option @@ Vec.get_exn t.id2r id +let addextra t key data = Hashtbl.add_multi t.extra ~key ~data + +let newvar ~f t x ty = Hashtbl.find_or_add t.v2id x ~default:(fun () -> + let v = f (regcls ty) x in + let id = new_node ~ty t @@ Rv v in + setrv t id v; + id) let rec pp_node t ppr ppf id = match node t id with | Rv r -> Format.fprintf ppf "%a" ppr r diff --git a/src/lib/isel/isel_match.ml b/src/lib/isel/isel_match.ml index 2984d7a0..32703a2c 100644 --- a/src/lib/isel/isel_match.ml +++ b/src/lib/isel/isel_match.ml @@ -16,6 +16,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let wordi = (word :> Type.imm) let wordb = (word :> Type.basic) + type subst = Rv.t S.t type rule = (Rv.t, M.Insn.t) R.t type callback = (Rv.t, M.Insn.t) R.callback @@ -65,18 +66,23 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* XXX: don't try this at home! The representations are exactly the same, but we need to erase the type constraints on the input. *) let pats : (Matcher.pat * callback list) list = Obj.magic rules in - let prog = Matcher.compile pats ~commute:true in + let name = Format.asprintf "%a-isel" Target.pp M.target in + let prog = Matcher.compile ~name pats ~commute:true in prog, VM.create () (* Translate a substitution we got from the matcher into one that our rule callbacks can understand. *) - let map_subst_terms t (s : Matcher.subst) : Rv.t S.t = + let map_subst_terms t (s : Matcher.subst) : subst = let open S in let regvar id r = match typeof t id with | Some (#Type.basic as ty) -> Regvar (r, ty) | Some `v128 -> Regvar_v r | Some `flag -> Regvar (r, wordb) - | None -> raise_notrace Mismatch in + | None -> + Logs.debug (fun m -> + m "%s: no regvar for term %d: %a%!" + __FUNCTION__ id (pp_node t) id); + raise_notrace Mismatch in Map.map s ~f:(fun id -> let tm = match node t id with | N (Oaddr a, []) -> Imm (a, wordi) @@ -89,9 +95,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct | Callargs rs -> Callargs rs | Tbl (d, tbl) -> Table (d, tbl) | Rv r -> regvar id r - | N _ -> match Hashtbl.find t.id2r id with - | None -> raise_notrace Mismatch - | Some r -> regvar id r in + | N _ -> match getrv t id with + | Some r -> regvar id r + | None -> + Logs.debug (fun m -> + m "%s: no regvar for term %d: %a%!" + __FUNCTION__ id (pp_node t) id); + raise_notrace Mismatch + in S.{id; tm}) let fail_init_matcher t l id = @@ -112,13 +123,19 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct result of computing ?y because future instructions may want to match on it. *) - let check_blank_move s n (p : Matcher.pat) = match n, p with + let check_blank_move t (s : subst) id (p : Matcher.pat) = + match node t id, p with | N (Omove, [_; _]), P (Omove, [V x; V y]) -> begin match Map.(find s x, find s y) with | Some x, Some y - when x.S.id = y.S.id + when x.id = y.id || S.equal_term Rv.equal x.tm y.tm -> + Logs.debug (fun m -> + m "%s: term %d: blank move to x=%d: %a from y=%d: %a%!" + __FUNCTION__ id + x.id (pp_node t) x.id + y.id (pp_node t) y.id); raise_notrace Mismatch | _ -> () end @@ -129,13 +146,23 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let* () = C.unless init @@ fail_init_matcher t l id in let rec loop () = match VM.one vm prog with | None -> !!None - | Some y -> - try + | Some y -> try + Logs.debug (fun m -> + let s = Map.to_alist @@ Y.subst y in + m "%s: insn %a, term %d, yielded:\n rule: %d\n subst: %s\n pat: %a%!" + __FUNCTION__ Label.pp l id (Y.rule y) + (List.to_string s ~f:(fun (x, id) -> + Format.asprintf "%s=%d" x id)) + Matcher.pp_pat (Y.pat y)); let s = map_subst_terms t @@ Y.subst y in - check_blank_move s (node t id) @@ Y.pat y; + check_blank_move t s id @@ Y.pat y; R.try_ s (Y.payload y) >>= function | Some _ as is -> !!is - | None -> loop () + | None -> + Logs.debug (fun m -> + m "%s: no callbacks succeeded, looping again%!" + __FUNCTION__); + loop () with Mismatch -> loop () in loop () >>= function | None -> fail_match t l id () @@ -200,8 +227,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct >>| List.dedup_and_sort ~compare:M.Reg.compare let run t = - let* blks = transl_blks t in let* rets = transl_rets t in + let* blks = transl_blks t in let dict = Func.dict t.fn in let dict = if not t.frame then dict else Dict.set dict Pseudo.Func.Tag.needs_stack_frame () in diff --git a/src/lib/machine/machine.ml b/src/lib/machine/machine.ml index 65729865..df1d9ca0 100644 --- a/src/lib/machine/machine.ml +++ b/src/lib/machine/machine.ml @@ -1 +1,11 @@ +open Core + module X86 = X86 + +let targets = [ + X86.Amd64_sysv.target; +] + +let force_initialization () = + Logs.debug (fun m -> m "forcing initialization of targets%!"); + ignore (Sys.opaque_identity targets) diff --git a/src/lib/machine/machine.mli b/src/lib/machine/machine.mli index d842c8f8..3a890dbd 100644 --- a/src/lib/machine/machine.mli +++ b/src/lib/machine/machine.mli @@ -7,3 +7,6 @@ module X86 : sig val target : Target.t end end + +(** Ensures that all targets are registered. *) +val force_initialization : unit -> unit diff --git a/src/lib/machine/x86/x86_amd64_common.ml b/src/lib/machine/x86/x86_amd64_common.ml index 643228ab..828d8008 100644 --- a/src/lib/machine/x86/x86_amd64_common.ml +++ b/src/lib/machine/x86/x86_amd64_common.ml @@ -412,11 +412,13 @@ module Insn = struct | CMOVcc of cc | CMP | CVTSD2SI + | CVTTSD2SI | CVTSD2SS | CVTSI2SD | CVTSI2SS | CVTSS2SD | CVTSS2SI + | CVTTSS2SI | DIVSD | DIVSS | IMUL2 @@ -463,11 +465,13 @@ module Insn = struct | CMOVcc cc -> Format.fprintf ppf "cmov%a" pp_cc cc | CMP -> Format.fprintf ppf "cmp" | CVTSD2SI -> Format.fprintf ppf "cvtsd2si" + | CVTTSD2SI -> Format.fprintf ppf "cvttsd2si" | CVTSD2SS -> Format.fprintf ppf "cvtsd2ss" | CVTSI2SD -> Format.fprintf ppf "cvtsi2sd" | CVTSI2SS -> Format.fprintf ppf "cvtsi2ss" | CVTSS2SD -> Format.fprintf ppf "cvtss2sd" | CVTSS2SI -> Format.fprintf ppf "cvtss2si" + | CVTTSS2SI -> Format.fprintf ppf "cvttss2si" | DIVSD -> Format.fprintf ppf "divsd" | DIVSS -> Format.fprintf ppf "divss" | IMUL2 -> Format.fprintf ppf "imul" @@ -608,107 +612,109 @@ module Insn = struct Regvar.Set.of_list @@ List.map regs ~f:Regvar.reg - (* Registers read by an instruction. *) - let reads = function - | Two (op, a, b) -> - begin match op with - | ADD - | ADDSD - | ADDSS - | AND - | CMP - | DIVSD - | DIVSS - | IMUL2 - | MULSD - | MULSS - | OR - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | SUBSD - | SUBSS - | TEST_ - | UCOMISD - | UCOMISS - -> rset [a; b] - | CMOVcc _ - (* We introduce a dependency on `a` so that any previous - writes to it will be preserved. - - This is morally equivalent to: - - `x := cc ? b : a` - *) - -> - Set.union (rset' [`rflags]) - (Set.union (rset_reg [a]) (rset [b])) - | BSF - | BSR - | CVTSD2SI - | CVTSD2SS - | CVTSI2SD - | CVTSI2SS - | CVTSS2SD - | CVTSS2SI - | LEA - | MOVSX - | MOVSXD - | MOVZX - | POPCNT - -> rset [b] - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - -> Set.union (rset_mem [a]) (rset [b]) - | XOR - | XORPD - | XORPS -> - begin match a, b with - | Oreg (a, _), Oreg (b, _) when Regvar.(a = b) - (* Special case for XORing with self: this isn't really a use - of the register, since we're just assigning 0. *) - -> Regvar.Set.empty - | _ -> rset [a; b] - end + let binop_reads op a b = match op with + | ADD + | ADDSD + | ADDSS + | AND + | CMP + | DIVSD + | DIVSS + | IMUL2 + | MULSD + | MULSS + | OR + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | SUBSD + | SUBSS + | TEST_ + | UCOMISD + | UCOMISS + -> rset [a; b] + | CMOVcc _ + (* We introduce a dependency on `a` so that any previous + writes to it will be preserved. + + This is morally equivalent to: + + `x := cc ? b : a` + *) + -> + Set.union (rset' [`rflags]) + (Set.union (rset_reg [a]) (rset [b])) + | BSF + | BSR + | CVTSD2SI + | CVTTSD2SI + | CVTSD2SS + | CVTSI2SD + | CVTSI2SS + | CVTSS2SD + | CVTSS2SI + | CVTTSS2SI + | LEA + | MOVSX + | MOVSXD + | MOVZX + | POPCNT + -> rset [b] + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + -> Set.union (rset_mem [a]) (rset [b]) + | XOR + | XORPD + | XORPS -> + begin match a, b with + | Oreg (a, _), Oreg (b, _) when Regvar.(a = b) + (* Special case for XORing with self: this isn't really a use + of the register, since we're just assigning 0. *) + -> Regvar.Set.empty + | _ -> rset [a; b] end - | One (op, a) -> - begin match op with - | CALL args -> - Set.union (rset' [`rsp]) @@ - Set.union (rset_mem [a]) @@ - Regvar.Set.of_list args - | DEC - | INC - | NEG - | NOT - -> rset [a] - | DIV - | IDIV -> - begin match a with - | Oreg (_, `i8) - -> Set.union (rset' [`rax]) (rset [a]) - | _ - -> Set.union (rset' [`rax; `rdx]) (rset [a]) - end - | IMUL1 - | MUL + + let unop_reads op a = match op with + | CALL args -> + Set.union (rset' [`rsp]) @@ + Set.union (rset [a]) @@ + Regvar.Set.of_list args + | DEC + | INC + | NEG + | NOT + -> rset [a] + | DIV + | IDIV -> + begin match a with + | Oreg (_, `i8) -> Set.union (rset' [`rax]) (rset [a]) - | SETcc _ - (* SETcc will "read" the destination in the sense that - only the lower 8 bits are changed. *) - -> Set.union (rset' [`rflags]) (rset [a]) - | POP - | PUSH - -> Set.union (rset' [`rsp]) (rset_mem [a]) + | _ + -> Set.union (rset' [`rax; `rdx]) (rset [a]) end + | IMUL1 + | MUL + -> Set.union (rset' [`rax]) (rset [a]) + | SETcc _ + (* SETcc will "read" the destination in the sense that + only the lower 8 bits are changed. *) + -> Set.union (rset' [`rflags]) (rset [a]) + | POP + | PUSH + -> Set.union (rset' [`rsp]) (rset_mem [a]) + + (* Registers read by an instruction. *) + let reads = function + | Two (op, a, b) -> binop_reads op a b + | One (op, a) -> unop_reads op a | JMP (Jind a) -> rset [a] | CDQ | CQO @@ -730,86 +736,88 @@ module Insn = struct | FP64 _ -> Regvar.Set.empty + let binop_writes op a _b = match op with + | ADD + | AND + | BSF + | BSR + | IMUL2 + | OR + | POPCNT + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | XOR + -> Set.union (rset' [`rflags]) (rset_reg [a]) + | ADDSD + | ADDSS + | CMOVcc _ + | CVTSD2SI + | CVTTSD2SI + | CVTSD2SS + | CVTSI2SD + | CVTSI2SS + | CVTSS2SD + | CVTSS2SI + | CVTTSS2SI + | DIVSD + | DIVSS + | LEA + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + | MOVSX + | MOVSXD + | MOVZX + | MULSD + | MULSS + | SUBSD + | SUBSS + | XORPD + | XORPS + -> rset_reg [a] + | CMP + | TEST_ + | UCOMISD + | UCOMISS + -> rset' [`rflags] + + let unop_writes call op a = match op with + | DEC + | INC + | NEG + | NOT + -> Set.union (rset' [`rflags]) (rset_reg [a]) + | SETcc _ + -> rset_reg [a] + | CALL _ + -> Set.union (rset' [`rsp; `rflags]) call + | PUSH + -> rset' [`rsp] + | DIV + | IDIV + | IMUL1 + | MUL -> + begin match a with + | Oreg (_, `i8) + -> rset' [`rax; `rflags] + | _ + -> rset' [`rax; `rdx; `rflags] + end + | POP + -> Set.union (rset' [`rsp]) (rset_reg [a]) + (* Registers written to by an instruction. *) let writes call = function - | Two (op, a, _) -> - begin match op with - | ADD - | AND - | BSF - | BSR - | IMUL2 - | OR - | POPCNT - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | XOR - -> Set.union (rset' [`rflags]) (rset_reg [a]) - | ADDSD - | ADDSS - | CMOVcc _ - | CVTSD2SI - | CVTSD2SS - | CVTSI2SD - | CVTSI2SS - | CVTSS2SD - | CVTSS2SI - | DIVSD - | DIVSS - | LEA - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - | MOVSX - | MOVSXD - | MOVZX - | MULSD - | MULSS - | SUBSD - | SUBSS - | XORPD - | XORPS - -> rset_reg [a] - | CMP - | TEST_ - | UCOMISD - | UCOMISS - -> rset' [`rflags] - end - | One (op, a) -> - begin match op with - | DEC - | INC - | NEG - | NOT - -> Set.union (rset' [`rflags]) (rset_reg [a]) - | SETcc _ - -> rset_reg [a] - | CALL _ - -> Set.union (rset' [`rsp; `rflags]) call - | PUSH - -> rset' [`rsp] - | DIV - | IDIV - | IMUL1 - | MUL -> - begin match a with - | Oreg (_, `i8) - -> rset' [`rax; `rflags] - | _ - -> rset' [`rax; `rdx; `rflags] - end - | POP - -> Set.union (rset' [`rsp]) (rset_reg [a]) - end + | Two (op, a, b) -> binop_writes op a b + | One (op, a) -> unop_writes call op a | IMUL3 (a, _, _) -> Set.union (rset' [`rflags]) (rset_reg [a]) | RET @@ -837,78 +845,80 @@ module Insn = struct | Omem _ -> true | _ -> false + let binop_writes_to_memory op a _b = match op with + | ADD + | ADDSD + | ADDSS + | AND + | DIVSD + | DIVSS + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + | MULSD + | MULSS + | OR + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | SUBSD + | SUBSS + | XOR + -> is_mem a + | BSF (* illegal *) + | BSR (* illegal *) + | CMOVcc _ (* illegal *) + | CMP + | CVTSD2SI (* illegal *) + | CVTTSD2SI (* illegal *) + | CVTSD2SS (* illegal *) + | CVTSI2SD (* illegal *) + | CVTSI2SS (* illegal *) + | CVTSS2SI (* illegal *) + | CVTTSS2SI (* illegal *) + | CVTSS2SD (* illegal *) + | IMUL2 + | LEA (* illegal *) + | MOVSX (* illegal *) + | MOVZX (* illegal *) + | MOVSXD (* illegal *) + | POPCNT (* illegal *) + | TEST_ + | UCOMISD + | UCOMISS + | XORPD (* illegal *) + | XORPS (* illegal *) + -> false + + let unop_writes_to_memory op a = match op with + | DEC + | INC + | NEG + | NOT + | POP + | SETcc _ + -> is_mem a + | CALL _ (* writes to stack *) + | PUSH (* writes to stack *) + -> true + | DIV + | IDIV + | IMUL1 + | MUL + -> false + (* "illegal" here means that it is illegal to have a memory operand in the destination. *) let writes_to_memory = function - | Two (op, a, _) -> - begin match op with - | ADD - | ADDSD - | ADDSS - | AND - | DIVSD - | DIVSS - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - | MULSD - | MULSS - | OR - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | SUBSD - | SUBSS - | XOR - -> is_mem a - | BSF (* illegal *) - | BSR (* illegal *) - | CMOVcc _ (* illegal *) - | CMP - | CVTSD2SI (* illegal *) - | CVTSD2SS (* illegal *) - | CVTSI2SD (* illegal *) - | CVTSI2SS (* illegal *) - | CVTSS2SI (* illegal *) - | CVTSS2SD (* illegal *) - | IMUL2 - | LEA (* illegal *) - | MOVSX (* illegal *) - | MOVZX (* illegal *) - | MOVSXD (* illegal *) - | POPCNT (* illegal *) - | TEST_ - | UCOMISD - | UCOMISS - | XORPD (* illegal *) - | XORPS (* illegal *) - -> false - end - | One (op, a) -> - begin match op with - | DEC - | INC - | NEG - | NOT - | POP - | SETcc _ - -> is_mem a - | CALL _ (* writes to stack *) - | PUSH (* writes to stack *) - -> true - | DIV - | IDIV - | IMUL1 - | MUL - -> false - end + | Two (op, a, b) -> binop_writes_to_memory op a b + | One (op, a) -> unop_writes_to_memory op a | CDQ | CQO | CWD @@ -963,11 +973,13 @@ module Insn = struct let cmov cc a b = Two (CMOVcc cc, a, b) let cmp a b = Two (CMP, a, b) let cvtsd2si a b = Two (CVTSD2SI, a, b) + let cvttsd2si a b = Two (CVTTSD2SI, a, b) let cvtsd2ss a b = Two (CVTSD2SS, a, b) let cvtsi2sd a b = Two (CVTSI2SD, a, b) let cvtsi2ss a b = Two (CVTSI2SS, a, b) let cvtss2sd a b = Two (CVTSS2SD, a, b) let cvtss2si a b = Two (CVTSS2SI, a, b) + let cvttss2si a b = Two (CVTTSS2SI, a, b) let divsd a b = Two (DIVSD, a, b) let divss a b = Two (DIVSS, a, b) let imul2 a b = Two (IMUL2, a, b) diff --git a/src/lib/machine/x86/x86_amd64_emit.ml b/src/lib/machine/x86/x86_amd64_emit.ml index 6182031f..e5fa631c 100644 --- a/src/lib/machine/x86/x86_amd64_emit.ml +++ b/src/lib/machine/x86/x86_amd64_emit.ml @@ -66,36 +66,35 @@ let emit_func ppf (name, lnk) = Format.fprintf ppf ".section %s\n" section; if Linkage.export lnk then global ppf name; Format.fprintf ppf ".p2align 4\n"; - Format.fprintf ppf "%s:\n" name; - Format.fprintf ppf " endbr64\n" + Format.fprintf ppf "%s:\n" name let emit_blk ppf (l : Label.t) = Format.fprintf ppf "%a:\n" label l +let emit_reg t ppf r = match r, t with + | (#Reg.sse as r), (#Type.fp | `v128) -> + Format.fprintf ppf "%a" Reg.pp_sse r + | (#Reg.gpr as r), `i8 -> + Format.fprintf ppf "%a" Reg.pp_gpr8 r + | (#Reg.gpr as r), `i16 -> + Format.fprintf ppf "%a" Reg.pp_gpr16 r + | (#Reg.gpr as r), `i32 -> + Format.fprintf ppf "%a" Reg.pp_gpr32 r + | (#Reg.gpr as r), `i64 -> + Format.fprintf ppf "%a" Reg.pp_gpr r + | `rip, `i64 -> + Format.fprintf ppf "rip" + | _ -> + invalid_argf "invalid register/type combo: %s/%s" + (Format.asprintf "%a" Reg.pp r) + (match t with + | `v128 -> "v128" + | #Type.basic as t -> + Format.asprintf "%a" Type.pp_basic t) + () + let emit_regvar t ppf rv = match Regvar.which rv with - | First r -> - begin match r, t with - | (#Reg.sse as r), (#Type.fp | `v128) -> - Format.fprintf ppf "%a" Reg.pp_sse r - | (#Reg.gpr as r), `i8 -> - Format.fprintf ppf "%a" Reg.pp_gpr8 r - | (#Reg.gpr as r), `i16 -> - Format.fprintf ppf "%a" Reg.pp_gpr16 r - | (#Reg.gpr as r), `i32 -> - Format.fprintf ppf "%a" Reg.pp_gpr32 r - | (#Reg.gpr as r), `i64 -> - Format.fprintf ppf "%a" Reg.pp_gpr r - | `rip, `i64 -> - Format.fprintf ppf "rip" - | _ -> - invalid_argf "invalid register/type combo: %s/%s" - (Format.asprintf "%a" Reg.pp r) - (match t with - | `v128 -> "v128" - | #Type.basic as t -> - Format.asprintf "%a" Type.pp_basic t) - () - end + | First r -> emit_reg t ppf r | Second (x, _) -> invalid_argf "tried to emit a variable %s" (Format.asprintf "%a" Var.pp x) () diff --git a/src/lib/machine/x86/x86_amd64_isel.ml b/src/lib/machine/x86/x86_amd64_isel.ml index 898abb80..9ec94c4c 100644 --- a/src/lib/machine/x86/x86_amd64_isel.ml +++ b/src/lib/machine/x86/x86_amd64_isel.ml @@ -58,6 +58,14 @@ let can_lea_ty = function | `i16 | `i32 | `i64 -> true | _ -> false +let compare_u a b = + Int64.(compare (a lxor min_value) (b lxor min_value)) + +let lt_u a b = compare_u a b < 0 [@@inline] [@@ocaml.warning "-32"] +let le_u a b = compare_u a b <= 0 [@@inline] [@@ocaml.warning "-32"] +let gt_u a b = compare_u a b > 0 [@@inline] [@@ocaml.warning "-32"] +let ge_u a b = compare_u a b >= 0 [@@inline] [@@ocaml.warning "-32"] + (* pre: `tbl` is non-empty TODO: @@ -75,9 +83,10 @@ let adjust_table d tbl = let acc = Vec.create () in let _ = List.fold tbl ~init:lowest ~f:(fun p (v, l) -> let diff = Int64.(v - p) in - for _ = 0 to Int64.to_int_trunc diff - 1 do - Vec.push acc d; - done; + let rec loop i = if lt_u i diff then + let () = Vec.push acc d in + loop @@ Int64.succ i in + loop 0L; Vec.push acc l; Int64.succ v) in Vec.to_list acc, lowest, highest @@ -128,10 +137,10 @@ end = struct !!![I.movsd (Oreg (x, xt)) (Oreg (y, yt))] | _ -> !!None - let move_ri_x_y env = + let move_ri_x_y ?(zx = false) env = let*! x, xt = S.regvar env "x" in let*! y, yt = S.imm env "y" in - let*! () = guard @@ Type.equal_basic xt (bty yt) in + let*! () = guard (zx || Type.equal_basic xt (bty yt)) in if Bv.(y = zero) then !!![xor_gpr_self x xt] else @@ -1738,9 +1747,9 @@ end = struct let xt' = ftosi_ty xt in match tf with | `f32 -> - !!![I.cvtss2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttss2si (Oreg (x, xt')) (Oreg (y, yt))] | `f64 -> - !!![I.cvtsd2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttsd2si (Oreg (x, xt')) (Oreg (y, yt))] | _ -> !!None let ftosi_rf32_x_y ti env = @@ -1769,9 +1778,9 @@ end = struct let xt' = ftoui_ty xt in match tf with | `f32 -> - !!![I.cvtss2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttss2si (Oreg (x, xt')) (Oreg (y, yt))] | `f64 -> - !!![I.cvtsd2si (Oreg (x, xt')) (Oreg (y, yt))] + !!![I.cvttsd2si (Oreg (x, xt')) (Oreg (y, yt))] | _ -> !!None let ftoui_rf32_x_y ti env = @@ -1935,11 +1944,16 @@ end = struct ] | _ -> !!None - let call_sym_x env = + let call_sym_x_y env = let*! args = S.callargs env "x" in let*! s, o = S.sym env "y" in !!![I.call args (Osym (s, o))] + let call_r_x_y env = + let*! args = S.callargs env "x" in + let*! y, yt = S.regvar env "y" in + !!![I.call args (Oreg (y, yt))] + let jmp_tbl_x_y env = let*! x, xt = S.regvar env "x" in let*! d, tbl = S.table env "y" in @@ -1954,6 +1968,10 @@ end = struct let tbl, lowest, highest = adjust_table d tbl in let highest' = Int64.(highest - lowest) in let diff = Int64.(highest - highest') in + let diff_fits = fits_int32_pos diff in + let* tdiff = if diff_fits then !!None else + let+ r = C.Var.fresh >>| Rv.var GPR in + Some r in let* tl, tladdr = fresh_label_addr in let* tbase = C.Var.fresh >>| Rv.var GPR in let* tidx = C.Var.fresh >>| Rv.var GPR in @@ -1981,11 +1999,16 @@ end = struct I.mov (Oreg (tidx, xt)) (Oreg (x, xt)) ]; (* Subtract the difference from the index if needed. *) - ( if Int64.(diff = 1L) - then [I.dec (Oreg (tidx, `i64))] - else if Int64.(diff > 0L) - then [I.sub (Oreg (tidx, `i64)) (Oimm (diff, `i64))] - else [] + ( match diff with + | 0L -> [] + | 1L -> [I.dec (Oreg (tidx, `i64))] + | _ when diff_fits -> + [I.sub (Oreg (tidx, `i64)) (Oimm (diff, `i64))] + | _ -> + let tdiff = Option.value_exn tdiff in [ + I.mov (Oreg (tdiff, `i64)) (Oimm (diff, `i64)); + I.sub (Oreg (tidx, `i64)) (Oreg (tdiff, `i64)); + ] ); [ (* Compare against highest value. *) I.cmp (Oreg (tidx, `i64)) (Oimm (highest', `i64)); @@ -2217,7 +2240,7 @@ end = struct let zext = [ move_rr_x_y ~zx:true; - move_ri_x_y; + move_ri_x_y ~zx:true; ] let fext ty = [ @@ -2318,7 +2341,8 @@ end = struct ] let call = [ - call_sym_x; + call_sym_x_y; + call_r_x_y; ] end diff --git a/src/lib/machine/x86/x86_amd64_peephole.ml b/src/lib/machine/x86/x86_amd64_peephole.ml index d351eb7b..af7724dc 100644 --- a/src/lib/machine/x86/x86_amd64_peephole.ml +++ b/src/lib/machine/x86/x86_amd64_peephole.ml @@ -10,20 +10,22 @@ module Lset = Label.Tree_set let decomp i = Insn.label i, Insn.insn i -let map_insns fn t = - if Ltree.is_empty t then fn - else Func.map_blks fn ~f:(fun b -> - Blk.map_insns b ~f:(fun i -> - match Ltree.find t @@ Insn.label i with - | Some insn -> Insn.with_insn i insn - | None -> i)) - -let filter_not_in fn t = - if Lset.is_empty t then fn - else Func.map_blks fn ~f:(fun b -> - Blk.insns b |> Seq.filter ~f:(fun i -> - not @@ Lset.mem t @@ Insn.label i) |> - Seq.to_list |> Blk.with_insns b) +let map_insns changed fn t = + if Ltree.is_empty t then fn else + let () = changed := true in + Func.map_blks fn ~f:(fun b -> + Blk.map_insns b ~f:(fun i -> + match Ltree.find t @@ Insn.label i with + | Some insn -> Insn.with_insn i insn + | None -> i)) + +let filter_not_in changed fn t = + if Lset.is_empty t then fn else + let () = changed := true in + Func.map_blks fn ~f:(fun b -> + Blk.insns b |> Seq.filter ~f:(fun i -> + not @@ Lset.mem t @@ Insn.label i) |> + Seq.to_list |> Blk.with_insns b) (* Blocks that consist of a single instruction of the form: @@ -46,10 +48,10 @@ let collect_singles fn = | _ -> acc) (* Union-find with path compression. *) -let find_with_compression m l = +let find_with_compression changed m l = let parent l = Ltree.find !m l |> Option.value ~default:l in - let l = ref l in - let p = ref @@ parent !l in + let l = ref l and orig = l in + let p = ref @@ parent orig in while Label.(!l <> !p) do let g = parent !p in if Label.(g <> !p) then begin @@ -58,6 +60,7 @@ let find_with_compression m l = end; l := g done; + if Label.(!p <> orig) then changed := true; !p (* For blocks collected in the above analysis, thread them through to @@ -83,10 +86,10 @@ let find_with_compression m l = @3: ... *) -let jump_threading fn = +let jump_threading changed fn = let singles = collect_singles fn in if not @@ Label.Tree.is_empty singles then - let find = find_with_compression @@ ref singles in + let find = find_with_compression changed @@ ref singles in Func.map_blks fn ~f:(fun b -> Blk.map_insns b ~f:(fun i -> Insn.with_insn i @@ match Insn.insn i with @@ -96,8 +99,42 @@ let jump_threading fn = | insn -> insn)) else fn +let is_merge_candidate cfg b1 b2 = + match Seq.hd @@ Blk.insns ~rev:true b1 with + | Some i when is_barrier (Insn.insn i) -> false + | Some _ | None -> + let l1 = Blk.label b1 and l2 = Blk.label b2 in Seq.( + equal Label.equal (Cfg.Node.succs l1 cfg) (singleton l2) && + equal Label.equal (Cfg.Node.preds l2 cfg) (singleton l1)) + +(* find m l = None ==> no change + find m l = Some None ==> delete + find m l = Some (Some b) ==> replace *) +let collect_merge_blks fn = + let cfg = Cfg.create ~is_barrier ~dests fn in + let rec go m = function + | [] | [_] -> m + | b1 :: b2 :: rest when is_merge_candidate cfg b1 b2 -> + let label = Blk.label b1 in + let insns = Seq.(to_list @@ append (Blk.insns b1) (Blk.insns b2)) in + let b1' = Blk.create ~label ~insns in + let m = Ltree.add_exn m ~key:label ~data:(Some b1') in + let m = Ltree.add_exn m ~key:(Blk.label b2) ~data:None in + go m rest + | _ :: rest -> go m rest in + Func.blks fn |> Seq.to_list |> go Ltree.empty + +let merge_blks changed fn = + let m = collect_merge_blks fn in + if Ltree.is_empty m then fn else + let () = changed := true in + Func.blks fn |> Seq.filter_map ~f:(fun b -> + match Ltree.find m @@ Blk.label b with + | None -> Some b | Some b' -> b') |> + Seq.to_list |> Func.with_blks fn + (* Remove blocks that are not reachable from the entry block. *) -let remove_disjoint fn = +let remove_disjoint changed fn = let reachable = with_return @@ fun {return} -> let cfg = Cfg.create ~is_barrier ~dests fn in let start = Func.entry fn in @@ -106,9 +143,12 @@ let remove_disjoint fn = ~start_tree:(fun n s -> if Label.(n = start) then s else return s) ~enter_node:(fun _ n s -> Lset.add s n) in - Func.blks fn |> Seq.map ~f:Blk.label |> - Seq.filter ~f:(Fn.non @@ Lset.mem reachable) |> - Seq.to_list |> Func.remove_blks_exn fn + let dead = + Func.blks fn |> Seq.map ~f:Blk.label |> + Seq.filter ~f:(Fn.non @@ Lset.mem reachable) |> + Seq.to_list in + if not @@ List.is_empty dead then changed := true; + Func.remove_blks_exn fn dead (* Invert conditional branches based on the block layout. @@ -146,8 +186,8 @@ let collect_invert_branches afters fn = ~key:lb ~data:(JMP (Jlbl a)) | _ -> acc)) -let invert_branches afters fn = - map_insns fn @@ collect_invert_branches afters fn +let invert_branches changed afters fn = + map_insns changed fn @@ collect_invert_branches afters fn (* Eliminate useless unconditional jumps where a fallthrough would suffice. @@ -176,8 +216,8 @@ let collect_implicit_fallthroughs afters fn = Lset.add acc la | _ -> acc)) -let implicit_fallthroughs afters fn = - filter_not_in fn @@ collect_implicit_fallthroughs afters fn +let implicit_fallthroughs changed afters fn = + filter_not_in changed fn @@ collect_implicit_fallthroughs afters fn (* Deallocating the stack pointer followed by a LEAVE instruction is redundant. @@ -212,8 +252,8 @@ let collect_dealloc_stack_before_leave fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let dealloc_stack_before_leave fn = - filter_not_in fn @@ collect_dealloc_stack_before_leave fn +let dealloc_stack_before_leave changed fn = + filter_not_in changed fn @@ collect_dealloc_stack_before_leave fn let collect_redundant_spill_after_reload fn = Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b -> @@ -229,8 +269,8 @@ let collect_redundant_spill_after_reload fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let redundant_spill_after_reload fn = - filter_not_in fn @@ collect_redundant_spill_after_reload fn +let redundant_spill_after_reload changed fn = + filter_not_in changed fn @@ collect_redundant_spill_after_reload fn (* If we have a LEA of the same address as a subsequent load, then use the result of the LEA as the address for the load. @@ -252,8 +292,19 @@ let collect_reuse_lea fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let reuse_lea fn = - map_insns fn @@ collect_reuse_lea fn +let reuse_lea changed fn = + map_insns changed fn @@ collect_reuse_lea fn + +let and_test_cc = function + | Ce | Cne | Cs | Cns -> true + | _ -> false + +let and_test_act = function + | Jcc (cc, _) + | One (SETcc cc, _) + | Two (CMOVcc cc, _, _) + -> and_test_cc cc + | _ -> false let collect_and_test fn = Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b -> @@ -261,17 +312,22 @@ let collect_and_test fn = | [] | [_] -> acc | (_, Two (AND, Oreg (r1, _), _)) :: (l, Two (TEST_, Oreg (r1', _), Oreg (r2', _))) - :: xs when Rv.(r1 = r1') && Rv.(r1 = r2') -> + :: (_, act) + :: xs when Rv.(r1 = r1') + && Rv.(r1 = r2') + && and_test_act act -> go (Lset.add acc l) xs | (_, Two (AND, Oreg (r1, _), _)) :: (l, Two (CMP, Oreg (r1', _), Oimm (0L, _))) - :: xs when Rv.equal r1 r1' -> + :: (_, act) + :: xs when Rv.equal r1 r1' + && and_test_act act -> go (Lset.add acc l) xs | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let and_test fn = - filter_not_in fn @@ collect_and_test fn +let and_test changed fn = + filter_not_in changed fn @@ collect_and_test fn let immty = function | #Type.imm as imm -> imm @@ -293,8 +349,8 @@ let collect_lea_mov fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let lea_mov fn = - map_insns fn @@ collect_lea_mov fn +let lea_mov changed fn = + map_insns changed fn @@ collect_lea_mov fn (* TODO: fill me in *) let combinable_binop = function @@ -308,6 +364,15 @@ let combinable_binop = function | CMP -> true | _ -> false +(* Combine with unary ops that don't write to their operand. *) +let combinable_unop = function + | CALL _ + | DIV + | IDIV + | IMUL1 + | MUL -> true + | _ -> false + let rset_mem' o l = List.exists l ~f:(Set.mem @@ rset [o]) let collect_mov_op fn = @@ -331,11 +396,18 @@ let collect_mov_op fn = && not (Set.mem (rset [o]) r1) -> let i = Two (op, Oreg (r1', r1t), o) in go (Ltree.set acc ~key:l ~data:i) xs + | (_, Two (MOV, Oreg (r1, _), o)) + :: (l, One (op, Oreg (r2', _))) + :: xs when combinable_unop op + && Rv.(r1 = r2') + && not (Set.mem (rset [o]) r1) -> + let i = One (op, o) in + go (Ltree.set acc ~key:l ~data:i) xs | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let mov_op fn = - map_insns fn @@ collect_mov_op fn +let mov_op changed fn = + map_insns changed fn @@ collect_mov_op fn let collect_mov_to_store fn = Func.blks fn |> Seq.fold ~init:Ltree.empty ~f:(fun acc b -> @@ -350,20 +422,29 @@ let collect_mov_to_store fn = | _ :: xs -> go acc xs in go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b) -let mov_to_store fn = - map_insns fn @@ collect_mov_to_store fn +let mov_to_store changed fn = + map_insns changed fn @@ collect_mov_to_store fn + +let max_rounds = 5 let run fn = - let fn = jump_threading fn in - let fn = remove_disjoint fn in - let afters = Func.collect_afters fn in - let fn = invert_branches afters fn in - let fn = implicit_fallthroughs afters fn in - let fn = dealloc_stack_before_leave fn in - let fn = redundant_spill_after_reload fn in - let fn = reuse_lea fn in - let fn = and_test fn in - let fn = lea_mov fn in - let fn = mov_op fn in - let fn = mov_to_store fn in - fn + let rec loop i fn = + if i > max_rounds then fn else + let () = Logs.debug (fun m -> + m "%s: peephole round %d%!" __FUNCTION__ i) in + let changed = ref false in + let fn = jump_threading changed fn in + let fn = remove_disjoint changed fn in + let fn = merge_blks changed fn in + let afters = Func.collect_afters fn in + let fn = invert_branches changed afters fn in + let fn = implicit_fallthroughs changed afters fn in + let fn = dealloc_stack_before_leave changed fn in + let fn = redundant_spill_after_reload changed fn in + let fn = reuse_lea changed fn in + let fn = and_test changed fn in + let fn = lea_mov changed fn in + let fn = mov_op changed fn in + let fn = mov_to_store changed fn in + if !changed then loop (i + 1) fn else fn in + loop 1 fn diff --git a/src/lib/machine/x86/x86_amd64_regalloc.ml b/src/lib/machine/x86/x86_amd64_regalloc.ml index a0998ab4..ac44aad1 100644 --- a/src/lib/machine/x86/x86_amd64_regalloc.ml +++ b/src/lib/machine/x86/x86_amd64_regalloc.ml @@ -100,46 +100,46 @@ let substitute_operand f = function | Osym _ as s -> s | Oah -> Oah +(* Attempt some peephole optimizations after the substitution. + XXX: what if the FLAGS register is live? *) +let substitute_binop' o a b op = match o, op a, op b with + | LEA, Oreg (x, ty), Omem (Abd (y, d), _) when Regvar.(x = y) -> + (* lea x, [x+d] => add x, d or sub x, -d *) + if Int32.(d < 0l) then + let d = Int32.neg d in + let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in + if Int64.(d' = 1L) then + One (DEC, Oreg (x, ty)) + else + Two (SUB, Oreg (x, ty), Oimm (d', immty ty)) + else + let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in + if Int64.(d' = 1L) then + One (INC, Oreg (x, ty)) + else + Two (ADD, Oreg (x, ty), Oimm (d', immty ty)) + | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = y) -> + (* lea x, [x+y*1] => add x, y *) + Two (ADD, Oreg (x, ty), Oreg (z, ty)) + | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = z) -> + (* lea x, [y+x*1] => add x, y *) + Two (ADD, Oreg (x, ty), Oreg (y, ty)) + | LEA, Oreg (x, ty), Omem (Aisd (y, S2, 0l), _) when Regvar.(x = y) -> + (* lea x, [x*2] => shl x, 1 *) + Two (SHL, Oreg (x, ty), Oimm (1L, `i8)) + | LEA, Oreg (x, ty), Omem (Aisd (y, S4, 0l), _) when Regvar.(x = y) -> + (* lea x, [x*4] => shl x, 2 *) + Two (SHL, Oreg (x, ty), Oimm (2L, `i8)) + | LEA, Oreg (x, ty), Omem (Aisd (y, S8, 0l), _) when Regvar.(x = y) -> + (* lea x, [x*8] => shl x, 3 *) + Two (SHL, Oreg (x, ty), Oimm (3L, `i8)) + | o, a, b -> + (* Default case. *) + Two (o, a, b) + let substitute' i op = match i with | One (o, a) -> One (o, op a) - | Two (o, a, b) -> - (* Attempt some peephole optimizations after the substitution. - XXX: what if the FLAGS register is live? *) - begin match o, op a, op b with - | LEA, Oreg (x, ty), Omem (Abd (y, d), _) when Regvar.(x = y) -> - (* lea x, [x+d] => add x, d or sub x, -d *) - if Int32.(d < 0l) then - let d = Int32.neg d in - let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in - if Int64.(d' = 1L) then - One (DEC, Oreg (x, ty)) - else - Two (SUB, Oreg (x, ty), Oimm (d', immty ty)) - else - let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in - if Int64.(d' = 1L) then - One (INC, Oreg (x, ty)) - else - Two (ADD, Oreg (x, ty), Oimm (d', immty ty)) - | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = y) -> - (* lea x, [x+y*1] => add x, y *) - Two (ADD, Oreg (x, ty), Oreg (z, ty)) - | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = z) -> - (* lea x, [y+x*1] => add x, y *) - Two (ADD, Oreg (x, ty), Oreg (y, ty)) - | LEA, Oreg (x, ty), Omem (Aisd (y, S2, 0l), _) when Regvar.(x = y) -> - (* lea x, [x*2] => shl x, 1 *) - Two (SHL, Oreg (x, ty), Oimm (1L, `i8)) - | LEA, Oreg (x, ty), Omem (Aisd (y, S4, 0l), _) when Regvar.(x = y) -> - (* lea x, [x*4] => shl x, 2 *) - Two (SHL, Oreg (x, ty), Oimm (2L, `i8)) - | LEA, Oreg (x, ty), Omem (Aisd (y, S8, 0l), _) when Regvar.(x = y) -> - (* lea x, [x*8] => shl x, 3 *) - Two (SHL, Oreg (x, ty), Oimm (3L, `i8)) - | o, a, b -> - (* Default case. *) - Two (o, a, b) - end + | Two (o, a, b) -> substitute_binop' o a b op | IMUL3 (a, b, c) -> IMUL3 (op a, op b, c) | JMP (Jind a) -> JMP (Jind (op a)) | CDQ @@ -203,88 +203,90 @@ module Typed_writes = struct Regvar.Map.of_alist_reduce ~f:reduce @@ List.map l ~f:(fun (r, t) -> Regvar.reg r, t) - (* Registers written to by an instruction. *) - let writes call = function - | Two (o, a, _) -> - begin match o with - | ADD - | ADDSD - | ADDSS - | AND - | BSF - | BSR - | CMOVcc _ - | CVTSD2SI - | CVTSD2SS - | CVTSI2SD - | CVTSI2SS - | CVTSS2SD - | CVTSS2SI - | DIVSD - | DIVSS - | IMUL2 - | LEA - | MOV - | MOV_ - | MOVD - | MOVDQA - | MOVQ - | MOVSD - | MOVSS - | MOVSX - | MOVSXD - | MOVZX - | MULSD - | MULSS - | OR - | POPCNT - | ROL - | ROR - | SAR - | SHL - | SHR - | SUB - | SUBSD - | SUBSS - | XOR - | XORPD - | XORPS - -> rmap_reg [a] - | CMP - | TEST_ - | UCOMISD - | UCOMISS - -> Regvar.Map.empty - end - | One (o, a) -> - begin match o with - | CALL _ - -> call - | DEC - | INC - | NEG - | NOT - | SETcc _ - | POP - -> rmap_reg [a] - | PUSH + let binop_writes o a _b = match o with + | ADD + | ADDSD + | ADDSS + | AND + | BSF + | BSR + | CMOVcc _ + | CVTSD2SI + | CVTTSD2SI + | CVTSD2SS + | CVTSI2SD + | CVTSI2SS + | CVTSS2SD + | CVTSS2SI + | CVTTSS2SI + | DIVSD + | DIVSS + | IMUL2 + | LEA + | MOV + | MOV_ + | MOVD + | MOVDQA + | MOVQ + | MOVSD + | MOVSS + | MOVSX + | MOVSXD + | MOVZX + | MULSD + | MULSS + | OR + | POPCNT + | ROL + | ROR + | SAR + | SHL + | SHR + | SUB + | SUBSD + | SUBSS + | XOR + | XORPD + | XORPS + -> rmap_reg [a] + | CMP + | TEST_ + | UCOMISD + | UCOMISS + -> Regvar.Map.empty + + let unop_writes call o a = match o with + | CALL _ + -> call + | DEC + | INC + | NEG + | NOT + | SETcc _ + | POP + -> rmap_reg [a] + | PUSH + -> Regvar.Map.empty + | DIV + | IDIV + | IMUL1 + | MUL -> + begin match a with + | Oreg (_, `i8) + -> rmap' [`rax, `i8] + | Oreg (_, t) + -> rmap' [`rax, wty t; `rdx, wty t] + | Omem (_, t) + -> rmap' [`rax, wty t; `rdx, wty t] + | _ + (* invalid forms *) -> Regvar.Map.empty - | DIV - | IDIV - | IMUL1 - | MUL -> - begin match a with - | Oreg (_, `i8) - -> rmap' [`rax, `i8] - | Oreg (_, t) - -> rmap' [`rax, wty t; `rdx, wty t] - | Omem (_, t) - -> rmap' [`rax, wty t; `rdx, wty t] - | _ - (* invalid forms *) - -> Regvar.Map.empty - end end + + (* Registers written to by an instruction. *) + let writes call = function + | Two (o, a, b) -> binop_writes o a b + | One (o, a) -> unop_writes call o a | IMUL3 (a, _, _) -> rmap_reg [a] | Jcc _ @@ -318,65 +320,65 @@ module Pre_assign_slots(C : Context_intf.S) = struct let off' = Int32.of_int_exn off in let d' = Int32.(d + off') in if (off > 0 && Int32.(d' < d)) || - (off < 0 && Int32.(d' > d)) then begin + (off < 0 && Int32.(d' > d)) then let+ x, is = freshen base off' in Second (x, is) - end else !!(First d') + else !!(First d') + + let assign_ab find base a b = match find b with + | Some 0 -> !!(Ab base, []) + | Some o -> !!(Abd (base, Int32.of_int_exn o), []) + | None -> !!(a, []) + + let assign_abd find base a b d = match find b with + | None -> !!(a, []) + | Some o -> add_disp base o d >>| function + | First d' -> Abd (base, d'), [] + | Second (b', bi) -> Abd (b', d), bi + + let assign_abis find base a b i s = match find b, find i with + | None, None -> !!(a, []) + | Some 0, None -> !!(Abis (base, i, s), []) + | Some o, None -> !!(Abisd (base, i, s, Int32.of_int_exn o), []) + | None, Some o -> + let+ i', ii = freshen base (Int32.of_int_exn o) in + Abis (b, i', s), ii + | Some ob, Some oi -> + let+ i', ii = freshen base (Int32.of_int_exn oi) in + Abisd (base, i', s, Int32.of_int_exn ob), ii + + let assign_aisd find base a i s d = match find i with + | None -> !!(a, []) + | Some o -> + let+ i', ii = freshen base (Int32.of_int_exn o) in + Aisd (i', s, d), ii let rec assign_amode find base a = match a with | Albl _ | Asym _ -> !!(a, []) - | Ab b -> - begin match find b with - | Some 0 -> !!(Ab base, []) - | Some o -> !!(Abd (base, Int32.of_int_exn o), []) - | None -> !!(a, []) - end - | Abd (b, d) -> - begin match find b with - | None -> !!(a, []) - | Some o -> add_disp base o d >>| function - | First d' -> Abd (base, d'), [] - | Second (b', bi) -> Abd (b', d), bi - end - | Abis (b, i, s) -> - begin match find b, find i with - | None, None -> !!(a, []) - | Some 0, None -> !!(Abis (base, i, s), []) - | Some o, None -> !!(Abisd (base, i, s, Int32.of_int_exn o), []) - | None, Some o -> - let+ i', ii = freshen base (Int32.of_int_exn o) in - Abis (b, i', s), ii - | Some ob, Some oi -> - let+ i', ii = freshen base (Int32.of_int_exn oi) in - Abisd (base, i', s, Int32.of_int_exn ob), ii - end + | Ab b -> assign_ab find base a b + | Abd (b, d) -> assign_abd find base a b d + | Abis (b, i, s) -> assign_abis find base a b i s | Aisd (i, S1, d) -> assign_amode find base @@ Abd (i, d) - | Aisd (i, s, d) -> - begin match find i with - | None -> !!(a, []) - | Some o -> - let+ i', ii = freshen base (Int32.of_int_exn o) in - Aisd (i', s, d), ii - end - | Abisd (b, i, s, d) -> - begin match find b, find i with - | None, None -> !!(a, []) - | None, Some _ when equal_scale s S1 -> - assign_amode find base @@ Abisd (i, b, S1, d) - | None, Some o -> - let+ i', ii = freshen base (Int32.of_int_exn o) in - Abisd (b, i', s, d), ii - | Some o, None -> - begin add_disp base o d >>| function - | First d' -> Abisd (base, i, s, d'), [] - | Second (b', bi) -> Abisd (b', i, s, d), bi - end - | Some ob, Some oi -> - let* i', ii = freshen base (Int32.of_int_exn oi) in - add_disp base ob d >>| function - | First d' -> Abisd (base, i', s, d'), ii - | Second (b', bi) -> Abisd (b', i', s, d), bi @ ii + | Aisd (i, s, d) -> assign_aisd find base a i s d + | Abisd (b, i, s, d) -> assign_abisd find base a b i s d + + and assign_abisd find base a b i s d = match find b, find i with + | None, None -> !!(a, []) + | None, Some _ when equal_scale s S1 -> + assign_amode find base @@ Abisd (i, b, S1, d) + | None, Some o -> + let+ i', ii = freshen base (Int32.of_int_exn o) in + Abisd (b, i', s, d), ii + | Some o, None -> + begin add_disp base o d >>| function + | First d' -> Abisd (base, i, s, d'), [] + | Second (b', bi) -> Abisd (b', i, s, d), bi end + | Some ob, Some oi -> + let* i', ii = freshen base (Int32.of_int_exn oi) in + add_disp base ob d >>| function + | First d' -> Abisd (base, i', s, d'), ii + | Second (b', bi) -> Abisd (b', i', s, d), bi @ ii let assign_operand find base op = match op with | Oreg (r, t) -> @@ -428,41 +430,41 @@ module Pre_assign_slots(C : Context_intf.S) = struct | FP64 _ -> !![i] end +(* NB: this makes assumptions based on the results of `Pre_assign_slots`. *) module Post_assign_slots = struct - (* NB: this makes assumptions based on the results of `Pre_assign_slots`. *) + (* All spills/reloads should be using this form. *) + let assign_ab find base a b = match find b with + | Some 0 -> Ab base + | Some o -> Abd (base, Int32.of_int_exn o) + | None -> a + + let assign_abd find _base a b _d = match find b with + | None -> a + | Some _ -> assert false + + let assign_abis find _base a b i _s = match find b, find i with + | None, None -> a + | Some _, None -> assert false + | None, Some _ -> assert false + | Some _, Some _ -> assert false + + let assign_aisd find _base a i _s _d = match find i with + | None -> a + | Some _ -> assert false + + let assign_abisd find _base a b i _s _d = match find b, find i with + | None, None -> a + | None, Some _ -> assert false + | Some _, Some _ -> assert false + | Some _, None -> assert false + let assign_amode find base a = match a with | Albl _ | Asym _ -> a - | Ab b -> - (* All spills/reloads should be using this form. *) - begin match find b with - | Some 0 -> Ab base - | Some o -> Abd (base, Int32.of_int_exn o) - | None -> a - end - | Abd (b, _d) -> - begin match find b with - | None -> a - | Some _ -> assert false - end - | Abis (b, i, _s) -> - begin match find b, find i with - | None, None -> a - | Some _, None -> assert false - | None, Some _ -> assert false - | Some _, Some _ -> assert false - end - | Aisd (i, _s, _d) -> - begin match find i with - | None -> a - | Some _ -> assert false - end - | Abisd (b, i, _s, _d) -> - begin match find b, find i with - | None, None -> a - | None, Some _ -> assert false - | Some _, Some _ -> assert false - | Some _, None -> assert false - end + | Ab b -> assign_ab find base a b + | Abd (b, d) -> assign_abd find base a b d + | Abis (b, i, s) -> assign_abis find base a b i s + | Aisd (i, s, d) -> assign_aisd find base a i s d + | Abisd (b, i, s, d) -> assign_abisd find base a b i s d let assign_operand find base op = match op with | Oreg (r, _) -> diff --git a/src/lib/matcher.ml b/src/lib/matcher.ml index 71af273e..be56c51d 100644 --- a/src/lib/matcher.ml +++ b/src/lib/matcher.ml @@ -260,12 +260,15 @@ module Make(M : L) = struct end type 'a program = { + name : string; rule : (pat * 'a) array; code : insn Vec.t; root : (op, label) Hashtbl.t; rmin : int array; } + let name p = p.name + let pp_program ppf p = Vec.iteri p.code ~f:(fun l i -> Format.fprintf ppf "@%d: %a\n" l pp_insn i) @@ -490,7 +493,8 @@ module Make(M : L) = struct | [] -> failwithf "compile_tree: empty sequence at rule %d" i () - let compile ?(commute = false) rules = + let compile ?(commute = false) ~name rules = + let[@alert "-deprecated"] t0 = Time.now () in let rule = Array.of_list rules in let code = Vec.create () in let rmin, root = match rules with @@ -506,7 +510,12 @@ module Make(M : L) = struct compile_tree forest i pat); let root = Hashtbl.map forest ~f:(linearize code) in compute_rmin code, root in - {rule; code; root; rmin} + let[@alert "-deprecated"] t = Time.now () in + let[@alert "-deprecated"] elapsed = Time.(Span.to_sec (diff t t0)) in + Logs.debug (fun m -> + m "%s: ruleset %s: compiled %d rules to %d instructions in %g seconds, commute=%b%!" + __FUNCTION__ name (Array.length rule) (Vec.length code) elapsed commute); + {name; rule; code; root; rmin} end let compile = Compiler.compile diff --git a/src/lib/matcher.mli b/src/lib/matcher.mli index 3f78c262..c31cb7b8 100644 --- a/src/lib/matcher.mli +++ b/src/lib/matcher.mli @@ -72,6 +72,9 @@ module Make(M : L) : sig (** A compiled VM program. *) type 'a program + (** The name of the ruleset that produced the program. *) + val name : 'a program -> string + (** Pretty-print the program. *) val pp_program : Format.formatter -> 'a program -> unit @@ -89,7 +92,7 @@ module Make(M : L) : sig and the [VM] (see below) will prioritize yielding matches in this order. *) - val compile : ?commute:bool -> (pat * 'a) list -> 'a program + val compile : ?commute:bool -> name:string -> (pat * 'a) list -> 'a program (** Returns [true] is the program is empty. *) val is_empty : 'a program -> bool diff --git a/src/lib/passes/abi_loadopt.ml b/src/lib/passes/abi_loadopt.ml index 8c6041b9..58bf4b09 100644 --- a/src/lib/passes/abi_loadopt.ml +++ b/src/lib/passes/abi_loadopt.ml @@ -73,6 +73,7 @@ type t = { blks : Abi.blk Label.Table.t; mems : store Mem.Table.t; vars : operand Var.Table.t; + nop : Label.Hash_set.t; mutable mem : Label.t option; mutable memo : operand Hashcons.t; } @@ -107,8 +108,9 @@ let init fn = let blks = Label.Table.create () in let mems = Mem.Table.create () in let vars = Var.Table.create () in + let nop = Label.Hash_set.create () in let mem = None and memo = Hashcons.empty in - {reso; dom; rdom; lst; blks; mems; mem; vars; memo} + {reso; dom; rdom; lst; blks; mems; mem; vars; memo; nop} module Optimize = struct let var t x = match Hashtbl.find t.vars x with @@ -151,10 +153,20 @@ module Optimize = struct | _ -> `var x in t.memo <- Hashcons.set t.memo ~key:op ~data + let same_store t v v' l l' = + equal_operand v v' && t.rdom ~parent:l' l + let store t l ty v a = let v = operand t v in let a = operand t a in let key = {label = l; addr = a; ty} in + (* Mark redundant stores only if we can prove that + it is storing the same value to the same address. *) + Option.iter t.mem ~f:(fun m -> + match Hashtbl.find t.mems {key with label = m} with + | Some Value (v', l') when same_store t v v' l l' -> + Hash_set.add t.nop l + | Some _ | None -> ()); Hashtbl.set t.mems ~key ~data:(Value (v, l)); t.mem <- Some l; `store (ty, v, a) @@ -203,17 +215,55 @@ module Optimize = struct t.mem <- Some l; `call (xs, f, args) + let eval_bop o a b = match a, b with + | `int (a, _), `int (b, _) -> + (Abi.Eval.binop_int o a b :> const option) + | `float a, `float b -> + (Abi.Eval.binop_single o a b :> const option) + | `double a, `double b -> + (Abi.Eval.binop_double o a b :> const option) + | _ -> None + + let eval_uop o = function + | `int (a, ty) -> + (Abi.Eval.unop_int o a ty :> const option) + | `float a -> + (Abi.Eval.unop_single o a :> const option) + | `double a -> + (Abi.Eval.unop_double o a :> const option) + | _ -> None + + let bop t l x o a b = + let a' = operand t a in + let b' = operand t b in + let op = `bop (x, o, a', b') in + begin match eval_bop o a' b' with + | None -> + let k = Op.of_insn op in + canonicalize t x k; + Op.commute k |> Option.iter ~f:(canonicalize t x) + | Some c -> + Hashtbl.set t.vars ~key:x ~data:(c :> operand); + Hash_set.add t.nop l + end; + op + + let uop t l x o a = + let a' = operand t a in + let op = `uop (x, o, a') in + begin match eval_uop o a' with + | None -> + let k = Op.of_insn op in + canonicalize t x k + | Some c -> + Hashtbl.set t.vars ~key:x ~data:(c :> operand); + Hash_set.add t.nop l + end; + op + let insn t l : Abi.Insn.op -> Abi.Insn.op = function - | `bop (x, o, a, b) -> - let op = `bop (x, o, operand t a, operand t b) in - let k = Op.of_insn op in - canonicalize t x k; - Op.commute k |> Option.iter ~f:(canonicalize t x); - op - | `uop (x, o, a) -> - let op = `uop (x, o, operand t a) in - canonicalize t x @@ Op.of_insn op; - op + | `bop (x, o, a, b) -> bop t l x o a b + | `uop (x, o, a) -> uop t l x o a | `sel (x, ty, c, y, n) -> sel t x ty c y n | `call (xs, f, args) -> call t l xs f args | `store (ty, v, a) -> store t l ty v a @@ -279,8 +329,13 @@ let run fn = Semi_nca.Tree.children t.dom l |> Seq.iter ~f:(fun l -> Stack.push q (l, t.mem, t.memo))); Abi.Func.map_blks fn ~f:(fun b -> - Abi.Blk.label b |> Hashtbl.find t.blks |> - Option.value ~default:b) + let b = + Abi.Blk.label b |> Hashtbl.find t.blks |> + Option.value ~default:b in + if Hash_set.is_empty t.nop then b else + Abi.Blk.insns b |> Seq.filter ~f:(fun i -> + not @@ Hash_set.mem t.nop @@ Abi.Insn.label i) |> + Seq.to_list |> Abi.Blk.with_insns b) else E.failf "In Abi_loadopt: function $%s is not in SSA form" (Abi.Func.name fn) () diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml new file mode 100644 index 00000000..a8a510cc --- /dev/null +++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml @@ -0,0 +1,54 @@ +open Core +open Monads.Std +open Regular.Std +open Virtual +open Scalars +open Coalesce_slots_impl +open Scalars_common + +module E = Monad.Result.Error +module V = Make(VL) +module A = Make(AL) + +open E.Let + +let check_ssa msg n d = + if Dict.mem d Tags.ssa then Ok () + else E.failf "In Coalesce_slots%s: function $%s is not in SSA form" msg n () + +let apply t fn map_blks map_blk insns with_insns with_ctrl label = + let not_dead = not @. Lset.mem t.deads @. label in + if is_empty t then fn else + let f = if Map.is_empty t.subst then fun b -> + insns b |> Seq.filter ~f:not_dead |> + Seq.to_list |> with_insns b + else fun b -> + let is, c = map_blk t.subst b in + let is = if Lset.is_empty t.deads then is + else List.filter is ~f:not_dead in + with_ctrl (with_insns b is) c in + map_blks fn ~f +[@@specialise] + +let run fn = + let+ () = check_ssa "" (Func.name fn) (Func.dict fn) in + let t = V.run fn in + apply t fn + Func.map_blks + Subst_mapper.map_blk + Blk.insns + Blk.with_insns + Blk.with_ctrl + Insn.label + +let run_abi fn = + let open Abi in + let+ () = check_ssa " (ABI)" (Func.name fn) (Func.dict fn) in + let t = A.run fn in + apply t fn + Func.map_blks + Subst_mapper_abi.map_blk + Blk.insns + Blk.with_insns + Blk.with_ctrl + Insn.label diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml new file mode 100644 index 00000000..533e77b3 --- /dev/null +++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml @@ -0,0 +1,338 @@ +open Core +open Regular.Std +open Graphlib.Std +open Scalars + +module Ltree = Label.Tree +module Lset = Label.Tree_set +module Slot = Virtual.Slot +module Allen = Allen_interval_algebra + +type tag = Def | Use | Both [@@deriving compare, equal, sexp] + +let pp_tag ppf = function + | Def -> Format.fprintf ppf "def" + | Use -> Format.fprintf ppf "use" + | Both -> Format.fprintf ppf "both" + +let join_tag a b = if equal_tag a b then a else Both + +type range = { + lo : int; + hi : int; + tg : tag; +} [@@deriving compare, equal, sexp] + +module Range = struct + type t = range [@@deriving compare, equal, sexp] + + let pp ppf r = Format.fprintf ppf "%a[%d, %d]" pp_tag r.tg r.lo r.hi + + let bad = {lo = Int.min_value; hi = Int.max_value; tg = Both} + let is_bad = equal bad + + let singleton n = {lo = n; hi = n; tg = Def} + let size r = r.hi - r.lo + + let distance x y = + if x.hi < y.lo then y.lo - x.hi + else if x.lo > y.hi then x.lo - y.hi + else 0 + [@@ocaml.warning "-32"] + + (* Extend the upper-bound on the live range. *) + let use r n = { + r with + hi = Int.max r.hi n; + tg = join_tag r.tg Use; + } + + (* Shrink the lower-bound on the live range. + + Also, a defintion counts as a use, because we need to + reference the slot, so extend the upper-bound. + *) + let def r n = { + lo = Int.min r.lo n; + hi = Int.max r.hi n; + tg = join_tag r.tg Def; + } + + module Algebra = Allen.Make(struct + type point = int + type nonrec t = t + let lo t = t.lo [@@inline] + let hi t = t.hi [@@inline] + include Int.Replace_polymorphic_compare + end) +end + +type candidate = { + var : Var.t; + size : int; + align : int; + range : range; + mutable adj : Var.Set.t; + mutable assigned : bool; +} + +let create_candidate slots rs v = + let slot = Map.find_exn slots v in { + var = v; + size = Slot.size slot; + align = Slot.align slot; + range = Map.find_exn rs v; + adj = Var.Set.empty; + assigned = false; + } + +let compat_size_align x y = + (* The smaller slot must not have a higher alignment. *) + not ((x.size < y.size && x.align > y.align) || + (y.size < x.size && y.align > x.align)) + +(* Find compatible slots. Most importantly, their live ranges must + not interfere. *) +let compat_range x y = + compat_size_align x y && + let a : Allen.t = Range.Algebra.relate x.range y.range in + Logs.debug (fun m -> + m "%s: %a, %a: %a%!" __FUNCTION__ + Var.pp x.var Var.pp y.var Allen.pp a); + match a with + | Before | After -> true + | _ -> false + +let range_priority x y = + (* Prefer shorter live ranges. *) + let c = Int.compare (Range.size x.range) (Range.size y.range) in + if c = 0 then + (* Break ties by comparing on the var. This + is to give a bit more determinism to the + algorithm. *) + Var.compare x.var y.var + else c + +let size_priority x y = + (* Assuming that the sizes and alignments are compatible, + just pick the biggest one. *) + match Int.compare x.size y.size with + | 0 -> Int.compare x.align y.align + | c -> c + +let candidates slots rs = + let vs = Vec.create ~capacity:(Map.length rs) () in + Map.to_sequence rs |> + (* Do not consider escapees. This would mess up + our heuristics for building the groups. *) + Seq.filter ~f:(not @. Range.is_bad @. snd) |> + Seq.map ~f:(create_candidate slots rs @. fst) |> + Seq.iter ~f:(Vec.push vs); + vs + +let is_subset g y = List.for_all g ~f:(fun x -> Set.mem y.adj x.var) + +(* Greedy partitioning algorithm. *) +let partition slots rs = + let vs = candidates slots rs in + match Vec.length vs with + | 0 -> [] + | 1 -> [[Vec.front_exn vs]] + | len -> + assert (len > 1); + let gs = ref [] in + (* Compute the adjacency sets. *) + for i = 0 to len - 1 do + let x = Vec.unsafe_get vs i in + for j = 0 to len - 1 do + if i <> j then + let y = Vec.unsafe_get vs j in + if compat_range x y then + x.adj <- Set.add x.adj y.var + done + done; + (* Use an ascending order. *) + Vec.sort vs ~compare:(fun x y -> range_priority y x); + while not @@ Vec.is_empty vs do + let x = Vec.pop_exn vs in + if not x.assigned then + let () = x.assigned <- true in + Logs.debug (fun m -> + m "%s: processing %a%!" + __FUNCTION__ Var.pp x.var); + let g = ref [x] in + for i = Vec.length vs - 1 downto 0 do + let y = Vec.unsafe_get vs i in + if not y.assigned && is_subset !g y then + let () = y.assigned <- true in + g := y :: !g + done; + gs := !g :: !gs + done; + !gs + +(* invariant: a group is never empty *) +let canon_elt g = List.max_elt g ~compare:size_priority |> Option.value_exn + +let make_subst _slots p = + List.fold p ~init:Var.Map.empty + ~f:(fun init -> function + | [] | [_] -> init + | g -> + let canon = canon_elt g in + let data = `var canon.var in + List.fold g ~init ~f:(fun acc x -> + if Var.(x.var = canon.var) then acc + else Map.set acc ~key:x.var ~data)) + +type t = { + subst : Subst_mapper.t; (* Map from coalesced to canonical slots *) + deads : Lset.t; (* Stores to dead slots. *) +} + +let empty = { + subst = Var.Map.empty; + deads = Lset.empty; +} + +let is_empty t = + Map.is_empty t.subst && Lset.is_empty t.deads + +module Make(M : Scalars.L) = struct + open M + + module Sinit = Slot_initialization.Make(M) + + let mkdef s x n = Map.update s x ~f:(function + | None -> Range.singleton n + | Some r -> Range.def r n) + + let mkuse s x n = Map.change s x ~f:(function + | Some r -> Some (Range.use r n) + | None -> None) + + let update (si : Slot_initialization.t) acc s x n l ldst = + match Map.find s x with + | None -> acc + | Some Top -> Map.set acc ~key:x ~data:Range.bad + | Some Offset (base, _) -> match ldst with + | Some Store -> mkdef acc base n + | Some Load when Hash_set.mem si.bad l -> + (* Uninitialized load is UB: forbid this slot as + a candidate for coalescing. *) + Logs.debug (fun m -> + m "%s: uninitialized load at %a from %a: marking bad%!" + __FUNCTION__ Label.pp l Var.pp base); + Map.set acc ~key:base ~data:Range.bad + | Some Load | None -> mkuse acc base n + + let liveness_insn si acc s ip i = + let l = Insn.label i and op = Insn.op i in + let r = Insn.free_vars op in + let r, w, ldst = match Insn.load_or_store_to op with + | None -> r, None, None + | Some (ptr, _, ldst) -> + Set.remove r ptr, Some ptr, Some ldst in + Option.fold w ~init:acc ~f:(fun acc x -> + update si acc s x ip l ldst) |> fun init -> + Set.fold r ~init ~f:(fun acc x -> + update si acc s x ip l None) + + let liveness_ctrl si acc s ip l c = + Ctrl.free_vars c |> Set.fold ~init:acc + ~f:(fun acc x -> update si acc s x ip l None) + + let liveness cfg blks slots t si = + let ip = ref 0 in + let nums = Vec.create () in + let init = + Hash_set.fold t.esc ~init:Var.Map.empty + ~f:(fun acc x -> Map.set acc ~key:x ~data:Range.bad) in + let acc = + Graphlib.reverse_postorder_traverse + (module Cfg) ~start:Label.pseudoentry cfg |> + Seq.filter_map ~f:(Ltree.find blks) |> + Seq.fold ~init ~f:(fun acc b -> + let l = Blk.label b in + let s = ref @@ get t l in + let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i -> + let op = Insn.op i in + let acc = liveness_insn si acc !s !ip i in + Vec.push nums (Insn.label i); + s := Sinit.S.transfer_op slots !s op; + incr ip; + acc) in + let acc = liveness_ctrl si acc !s !ip l @@ Blk.ctrl b in + Vec.push nums l; + incr ip; + acc) in + acc, nums + + let collect_deads blks slots rs t = + Ltree.fold blks ~init:Lset.empty + ~f:(fun ~key ~data:b init -> + let s = ref @@ get t key in + Blk.insns b |> Seq.fold ~init ~f:(fun acc i -> + let op = Insn.op i in + let acc = match Insn.load_or_store_to op with + | Some (ptr, _, Store) -> + begin match Map.find !s ptr with + | Some Offset (base, _) -> + begin match Map.find rs base with + | Some {tg = Def; _} -> + (* This slot is only ever stored to, so we can + safely remove it. *) + Lset.add acc @@ Insn.label i + | _ -> acc + end + | _ -> acc + end + | _ -> acc in + s := Sinit.S.transfer_op slots !s op; + acc)) + + let debug_show slots rs nums deads p subst = + Logs.debug (fun m -> + Map.iter_keys slots ~f:(fun x -> + let ppr ppf x = match Map.find rs x with + | None -> Format.fprintf ppf "none" + | Some r when Range.is_bad r -> + Format.fprintf ppf "bad" + | Some r -> + Format.fprintf ppf "%a (%a to %a)" + Range.pp r + Label.pp (Vec.get_exn nums r.lo) + Label.pp (Vec.get_exn nums r.hi) in + m "%s: %a: %a%!" __FUNCTION__ Var.pp x ppr x)); + Logs.debug (fun m -> + List.iter p ~f:(fun g -> + m "%s: group: %s%!" __FUNCTION__ + (List.to_string g ~f:(fun x -> + Var.to_string x.var)))); + Logs.debug (fun m -> + if not @@ Lset.is_empty deads then + m "%s: dead stores: %a%!" + __FUNCTION__ + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") + Label.pp) + (Lset.to_list deads)); + Logs.debug (fun m -> + Map.iteri subst ~f:(fun ~key ~data -> + m "%s: coalesce slot: %a => %a%!" + __FUNCTION__ Var.pp key Virtual.pp_operand data)) + + let run fn = + let slots = Sinit.S.collect_slots fn in + if Map.is_empty slots then empty else + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let t = Sinit.S.analyze cfg blks slots in + let si = Sinit.analyze' t cfg blks slots in + let rs, nums = liveness cfg blks slots t si in + let p = partition slots rs in + let deads = collect_deads blks slots rs t in + let subst = make_subst slots p in + debug_show slots rs nums deads p subst; + {subst; deads} +end diff --git a/src/lib/passes/egraph_opt/egraph_opt.ml b/src/lib/passes/egraph_opt/egraph_opt.ml index 36361e23..39b2f842 100644 --- a/src/lib/passes/egraph_opt/egraph_opt.ml +++ b/src/lib/passes/egraph_opt/egraph_opt.ml @@ -3,5 +3,5 @@ module Rules = Egraph_opt_rules -let run tenv fn = Egraph.run fn tenv Rules.all -let run_no_rules tenv fn = Egraph.run fn tenv Rules.none +let run tenv fn = Egraph.run fn tenv @@ Rules.all () +let run_no_rules tenv fn = Egraph.run fn tenv @@ Rules.none () diff --git a/src/lib/passes/egraph_opt/egraph_opt_rules.ml b/src/lib/passes/egraph_opt/egraph_opt_rules.ml index 24d8d8da..d737a530 100644 --- a/src/lib/passes/egraph_opt/egraph_opt_rules.ml +++ b/src/lib/passes/egraph_opt/egraph_opt_rules.ml @@ -2041,5 +2041,14 @@ module Groups = struct ] end -let all = Egraph.compile Groups.all -let none = Egraph.compile [] +let thunked name g = + let r = ref None in + fun () -> match !r with + | Some p -> p + | None -> + let p = Egraph.compile ~name g in + r := Some p; + p + +let all = thunked "egraph-all" Groups.all +let none = thunked "egraph-none" [] diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml index 3ff18ea9..c9d0103f 100644 --- a/src/lib/passes/passes.ml +++ b/src/lib/passes/passes.ml @@ -3,6 +3,7 @@ open Virtual open Context.Syntax module Abi_loadopt = Abi_loadopt +module Coalesce_slots = Coalesce_slots module Egraph_opt = Egraph_opt module Lower_abi = Lower_abi module Promote_slots = Promote_slots @@ -11,6 +12,7 @@ module Remove_disjoint_blks = Remove_disjoint_blks module Resolve_constant_blk_args = Resolve_constant_blk_args module Sccp = Sccp module Simplify_cfg = Simplify_cfg +module Sroa = Sroa module Ssa = Ssa let initialize m = @@ -21,11 +23,14 @@ let initialize m = !!(tenv, m) let retype tenv m = - Module.funs m |> - Seq.to_list |> Typecheck.update_fns tenv + Module.funs m |> Seq.to_list |> Typecheck.update_fns tenv let optimize tenv m = let module Cv = Context.Virtual in + let*? m = Module.map_funs_err m ~f:Coalesce_slots.run in + let*? m = Module.map_funs_err m ~f:Resolve_constant_blk_args.run in + let*? m = Module.map_funs_err m ~f:Remove_dead_vars.run in + let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in let*? m = Module.map_funs_err m ~f:Promote_slots.run in let*? tenv = retype tenv m in let*? m = Module.map_funs_err m ~f:(Sccp.run tenv) in @@ -57,10 +62,18 @@ let to_abi tenv m = ~data:(Seq.to_list @@ Module.data m) let optimize_abi m = + let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in + let* m = Context.Virtual.Module.map_funs_abi m ~f:Sroa.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Promote_slots.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in let m = Abi.Module.map_funs m ~f:Remove_disjoint_blks.run_abi in let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in + let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in + let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in let* () = Context.iter_seq_err (Abi.Module.funs m) ~f:Ssa.check_abi in !!m diff --git a/src/lib/passes/passes.mli b/src/lib/passes/passes.mli index be95b1f8..fdc57fba 100644 --- a/src/lib/passes/passes.mli +++ b/src/lib/passes/passes.mli @@ -64,6 +64,16 @@ module Abi_loadopt : sig val run : Abi.func -> Abi.func Or_error.t end +(** Attempts to coalesce slots of compatible size and alignment which + do not interfere (i.e. are never live at the same time). + + Assumes the function is in SSA form. +*) +module Coalesce_slots : sig + val run : func -> func Or_error.t + val run_abi : Abi.func -> Abi.func Or_error.t +end + (** Uses the [Egraph] module to perform a variety of optimizations to a function. @@ -121,6 +131,7 @@ end *) module Resolve_constant_blk_args : sig val run : func -> func Or_error.t + val run_abi : Abi.func -> Abi.func Or_error.t end (** Performs the classic Sparse Conditional Constant @@ -141,6 +152,25 @@ module Simplify_cfg : sig val run : Typecheck.env -> func -> func Context.t end +(** Performs Scalar Replacement of Aggregates (SROA). + + This aims to analyze access patterns of stack slots + and break them up into individual (scalar) components. + + An important detail to note is that in the non-ABI pass, + slots being referenced in terms of user-defined compound + (aggregate) types are treated as opaque, and therefore + not optimized, as these are often ABI-dependent. + + When ABI lowering is performed, these references should + be desugared into access patterns on primitive types, + at which point the algorithm can attempt to process them. +*) +module Sroa : sig + val run : func -> func Context.t + val run_abi : Abi.func -> Abi.func Context.t +end + (** Transforms a function into semi-pruned SSA form. *) module Ssa : sig val run : func -> func Or_error.t diff --git a/src/lib/passes/promote_slots/promote_slots.ml b/src/lib/passes/promote_slots/promote_slots.ml index 1b0a5d14..4f89e36f 100644 --- a/src/lib/passes/promote_slots/promote_slots.ml +++ b/src/lib/passes/promote_slots/promote_slots.ml @@ -1,3 +1,4 @@ +open Core open Virtual open Promote_slots_impl @@ -47,18 +48,31 @@ module A = Make(struct open E.Syntax +let apply + (type fn) + (module M : Scalars.L with type Func.t = fn) + run ssa (fn : fn) = + let module Sinit = Slot_initialization.Make(M) in + let slots = Sinit.S.collect_slots fn in + let cfg = M.Cfg.create fn in + let blks = M.Func.map_of_blks fn in + let s = Sinit.analyze cfg blks slots in + let undef l = Hash_set.mem s.bad l in + run fn ~undef >>= ssa + let run fn = if Dict.mem (Func.dict fn) Tags.ssa then - V.run fn >>= Ssa.run + apply (module Scalars_common.VL) V.run Ssa.run fn else E.failf "In Promote_slots: expected SSA form for function $%s" (Func.name fn) () let run_abi fn = - if Dict.mem (Abi.Func.dict fn) Tags.ssa then - A.run fn >>= Ssa.run_abi + let open Abi in + if Dict.mem (Func.dict fn) Tags.ssa then + apply (module Scalars_common.AL) A.run Ssa.run_abi fn else E.failf "In Promote_slots (ABI): expected SSA form for function $%s" - (Abi.Func.name fn) () + (Func.name fn) () diff --git a/src/lib/passes/promote_slots/promote_slots_impl.ml b/src/lib/passes/promote_slots/promote_slots_impl.ml index 880c9dcd..28e7f488 100644 --- a/src/lib/passes/promote_slots/promote_slots_impl.ml +++ b/src/lib/passes/promote_slots/promote_slots_impl.ml @@ -101,24 +101,37 @@ module Make(M : L) = struct | Write (_, t') when not @@ Type.equal_basic t t' -> Bad | Read _ | Write _ -> Write (s, t) - let go env s = + let go env s ~undef = let x = Slot.var s in Resolver.uses env.reso x |> List.fold_until ~init:Bad ~finish:Fn.id ~f:(fun acc -> function |`blk _ -> Stop Bad - | `insn (i, _, _, _) -> match infer acc x i with - | (Read _ | Write _) as acc -> Continue acc - | Bad -> Stop Bad) + | `insn (i, _, _, _) -> + if undef @@ Insn.label i then Stop Bad + else match infer acc x i with + | (Read _ | Write _) as acc -> Continue acc + | Bad -> Stop Bad) end - let collect env = + let collect env ~undef = Func.slots env.fn |> Seq.fold ~init:Var.Map.empty ~f:(fun acc s -> - match Qualify.go env s with - | Bad -> acc - | Write (_, t) -> Map.set acc ~key:(Slot.var s) ~data:t + match Qualify.go env s ~undef with + | Bad -> + Logs.debug (fun m -> + m "%s: cannot promote %a%!" + __FUNCTION__ Var.pp (Slot.var s)); + acc + | Write (_, t) -> + Logs.debug (fun m -> + m "%s: promoting %a%!" + __FUNCTION__ Var.pp (Slot.var s)); + Map.set acc ~key:(Slot.var s) ~data:t | Read _ -> + Logs.debug (fun m -> + m "%s: slot %a is read, but never written to%!" + __FUNCTION__ Var.pp (Slot.var s)); (* In this case, we read from the slot but never stored anything to it. It's undefined behavior, but it's also what the programmer intended, so we should cancel this promotion. *) @@ -151,9 +164,9 @@ module Make(M : L) = struct | Some (y, t') -> replace_load env x l y t t' | None -> assert false)) - let run fn = + let run fn ~undef = let+ env = init fn in - let xs = collect env in + let xs = collect env ~undef in if not @@ Map.is_empty xs then let fn = remove_slots fn xs in replace env xs; diff --git a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml index 9b30ccf6..9522becf 100644 --- a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml +++ b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml @@ -16,6 +16,7 @@ module type S = sig module Insn : sig type t + val label : t -> Label.t val check_div_rem : t -> bool val is_effectful : t -> bool val lhs : t -> Var.t option @@ -96,15 +97,23 @@ module Make(M : S) = struct function is in SSA form then keeping in them in the alive set shouldn't affect the results. *) let insn (acc, changed, alive) i = match Insn.lhs i with - | Some x when not @@ keep i x alive -> acc, true, alive + | Some x when not @@ keep i x alive -> + Logs.debug (fun m -> + m "%s: %a: %a is dead%!" __FUNCTION__ + Label.pp (Insn.label i) Var.pp x); + acc, true, alive | Some x -> i :: acc, changed, alive -- x ++ Insn.free_vars i | None -> i :: acc, changed, alive ++ Insn.free_vars i + let remove_slot fn x = + Logs.debug (fun m -> m "%s: slot %a is dead%!" __FUNCTION__ Var.pp x); + Func.remove_slot fn x + let finalize fn blks live = let ins = Live.ins live @@ Func.entry fn in Func.slots fn |> Seq.map ~f:Slot.var |> Seq.filter ~f:(Fn.non @@ Set.mem ins) |> - Seq.fold ~init:fn ~f:Func.remove_slot |> + Seq.fold ~init:fn ~f:remove_slot |> Fn.flip Func.update_blks' blks let rec run fn blks cfg = diff --git a/src/lib/passes/resolve_constant_blk_args.ml b/src/lib/passes/resolve_constant_blk_args.ml index 2c72dcac..69b062e6 100644 --- a/src/lib/passes/resolve_constant_blk_args.ml +++ b/src/lib/passes/resolve_constant_blk_args.ml @@ -1,34 +1,78 @@ open Core open Virtual +open Phi_values -module Phis = Phi_values.Make(struct - (* `None` indicates that there may be many values for the phi, - while `Some v` indicates that `v` is its singular value. +module D = struct + (* `None` indicates that there may be many values for the phi, + while `Some v` indicates that `v` is its singular value. - We choose `option` because filtering the substitution produced - by the analysis can use the identity function via `Map.filter_map`, - thus avoiding extra allocations. - *) - type t = operand option [@@deriving equal] + We choose `option` because filtering the substitution produced + by the analysis can use the identity function via `Map.filter_map`, + thus avoiding extra allocations. + *) + type t = operand option [@@deriving equal] - let one v = Some v - let join x y = if equal x y then x else None - end) + let one v = Some v + let join x y = if equal x y then x else None +end -let run fn = - if Dict.mem (Func.dict fn) Tags.ssa then - let cfg = Cfg.create fn in - let blks = Func.map_of_blks fn in - let s = - Map.filter_map ~f:Fn.id @@ - Phis.analyze ~blk:(Label.Tree.find blks) cfg in - let fn = - if not @@ Map.is_empty s then Func.map_blks fn ~f:(fun b -> - let is, k = Subst_mapper.map_blk s b in - Blk.(with_ctrl (with_insns b is) k)) - else fn in - Ok fn - else +module V = Make(struct + module Ctrl = struct + type t = ctrl + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg + end)(D) + +module A = Make(struct + open Abi + module Ctrl = struct + type t = ctrl + let locals = (locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg + end)(D) + +let check_ssa msg n d fn f = + if Dict.mem (d fn) Tags.ssa then Ok (f ()) else Or_error.errorf - "In Resolve_constant_blk_args: function $%s is \ - not in SSA form" (Func.name fn) + "In Resolve_constant_blk_args%s: function $%s is \ + not in SSA form" msg (n fn) + +let apply fn cfg mb analyze map_blks map_blk with_ctrl with_insns = + let cfg = cfg fn in + let blks = mb fn in + let s = Map.filter_map ~f:Fn.id @@ + analyze ~blk:(Label.Tree.find blks) cfg in + if not @@ Map.is_empty s then map_blks fn ~f:(fun b -> + let is, k = map_blk s b in + with_ctrl (with_insns b is) k) + else fn +[@@specialise] + +let run fn = + check_ssa "" Func.name Func.dict fn @@ fun () -> + apply fn + Cfg.create + Func.map_of_blks + V.analyze + Func.map_blks + Subst_mapper.map_blk + Blk.with_ctrl + Blk.with_insns + +let run_abi fn = + let open Abi in + check_ssa " (ABI)" Func.name Func.dict fn @@ fun () -> + apply fn + Cfg.create + Func.map_of_blks + A.analyze + Func.map_blks + Subst_mapper_abi.map_blk + Blk.with_ctrl + Blk.with_insns diff --git a/src/lib/passes/scalars_common.ml b/src/lib/passes/scalars_common.ml new file mode 100644 index 00000000..10c273d9 --- /dev/null +++ b/src/lib/passes/scalars_common.ml @@ -0,0 +1,132 @@ +open Core +open Virtual +open Scalars + +let var_set_of_option = function + | Some x -> Var.Set.singleton x + | None -> Var.Set.empty + +(* Instructions that can produce a scalar `Offset` value. *) +let offset = function + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) -> + Some (x, Bv.to_int64 i) + | `bop (_, `sub _, `var x, `int (i, _)) -> + Some (x, Int64.neg @@ Bv.to_int64 i) + | _ -> None + +(* Instructions that behave like `copy` instead of causing + a variable to "escape". + + TODO: more cases? +*) +let copy_of = function + | `uop (_, `copy _, `var x) -> Some x + | `bop (_, `add _, `var x, `int (i, _)) + | `bop (_, `add _, `int (i, _), `var x) + | `bop (_, `sub _, `var x, `int (i, _)) + when Bv.(i = zero) -> Some x + | _ -> None + +let escapes mty fv = function + | `store (ty, `var x, `var y) when mty ty -> Var.Set.of_list [x; y] + | `store (ty, _, `var y) when mty ty -> Var.Set.of_list [y] + | `store (_, `var x, _) -> Var.Set.singleton x + | `store _ -> Var.Set.empty + | `load (_, ty, `var x) when mty ty -> Var.Set.singleton x + | `load _ -> Var.Set.empty + | o when Option.is_some (offset o) -> Var.Set.empty + | o when Option.is_some (copy_of o) -> Var.Set.empty + | o -> fv o +[@@specialise] + +let load_or_store_to = function + | `load (_, (#Type.basic as b), `var x) -> Some (x, b, Load) + | `store ((#Type.basic as b), _, `var x) -> Some (x, b, Store) + | _ -> None + +let replace_load_or_store_addr a = function + | `load (x, (#Type.basic as b), _) -> `load (x, b, `var a) + | `store ((#Type.basic as b), v, _) -> `store (b, v, `var a) + | op -> op + +let add x ty b o = + assert Int64.(o > 0L); + let i = Bv.(int64 o mod modulus (Type.sizeof_imm_base ty)) in + `bop (x, `add (ty :> Type.basic), `var b, `int (i, (ty :> Type.imm))) + +let is_named = function + | `name _ -> true + | _ -> false + +let escapes_ctrl fv = function + | `hlt -> Var.Set.empty + | `jmp `var x -> Var.Set.singleton x + | `jmp _ -> Var.Set.empty + | `br (c, `var y, `var n) -> Var.Set.of_list [c; y; n] + | `br (c, _, `var n) -> Var.Set.of_list [c; n] + | `br (c, `var y, _) -> Var.Set.of_list [c; y] + | `br (c, _, _) -> Var.Set.singleton c + | `ret _ as c -> fv c + | `sw (_, `var i, _, _) -> Var.Set.singleton i + | `sw _ -> Var.Set.empty +[@@specialise] + +(* Virtual language *) +module VL = struct + module Insn = struct + type t = Insn.t + type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op + let label = Insn.label + let lhs = var_set_of_option @. Insn.lhs_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes is_named free_vars :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + module Ctrl = struct + type t = ctrl + let free_vars = Ctrl.free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) + let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg +end + +(* Virtual.Abi language *) +module AL = struct + open Abi + module Insn = struct + type t = Insn.t + type op = Insn.op + let create ~label op = Insn.create op ~label + let with_op = Insn.with_op + let op = Insn.op + let label = Insn.label + let lhs = Insn.def_of_op + let offset = (offset :> op -> _) + let copy_of = (copy_of :> op -> _) + let free_vars = Insn.free_vars_of_op + let escapes = (escapes (const false) free_vars :> op -> _) + let load_or_store_to = (load_or_store_to :> op -> _) + let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _) + let add x ty b o = (add x ty b o :> op) + end + module Ctrl = struct + type t = ctrl + let free_vars = Ctrl.free_vars + let escapes = (escapes_ctrl free_vars :> t -> _) + let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _) + end + module Blk = Blk + module Func = Func + module Cfg = Cfg +end diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml b/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml index 2f3d9cb1..362f799d 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml @@ -58,8 +58,15 @@ let collect tenv env fn = let run tenv env fn = let xs = collect tenv env fn in let+ () = Context.List.iter xs ~f:(fun (b, c, l, args) -> + Logs.debug (fun m -> + m "%s: block %a, simplifying br on %a to %a%!" + __FUNCTION__ Label.pp (Blk.label b) + Var.pp c Label.pp l); let+ sels = Context.List.map args ~f:(fun (ty, y, n) -> let+ x, sel = Context.Virtual.sel ty c y n in + Logs.debug (fun m -> + m "%s: inserting %a%!" __FUNCTION__ + Insn.pp_op (Insn.op sel)); Hashtbl.set env.typs ~key:x ~data:(ty :> Type.t); `var x, sel) in let args, sels = List.unzip sels in diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml b/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml index 098bca2d..2ac42523 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml @@ -84,20 +84,29 @@ let find_loc env s : local -> local option = function | #global -> assert false | #local as l -> l -let map_dst changed env s d = match find_dst env s d with +let map_dst key changed env s d = match find_dst env s d with | Some x when equal_dst d x -> x - | Some x -> changed := true; x + | Some x -> + Logs.debug (fun m -> + m "%s: block %a: contracted %a to %a%!" + __FUNCTION__ Label.pp key pp_dst d pp_dst x); + changed := true; x | None -> d -let map_loc changed env s l = match find_loc env s l with +let map_loc key changed env s l = match find_loc env s l with | Some x when equal_local l x -> x - | Some x -> changed := true; x + | Some x -> + Logs.debug (fun m -> + m "%s: block %a: contracted %a to %a%!" + __FUNCTION__ Label.pp key pp_local l pp_local x); + changed := true; x | None -> l let contract_blks changed env (s : singles) = Hashtbl.map_inplace env.blks ~f:(fun b -> - let dst = map_dst changed env s in - let loc = map_loc changed env s in + let key = Blk.label b in + let dst = map_dst key changed env s in + let loc = map_loc key changed env s in Blk.map_ctrl b ~f:(function | (`hlt | `ret _) as x -> x | `jmp d -> `jmp (dst d) diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml b/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml index 5b549cc0..cc952caf 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml @@ -75,6 +75,11 @@ let collect env = | `br (c, y, n) -> Option.merge (true_br env c y n) (false_br env c y n) ~f:merge_br |> Option.value_map ~default:acc ~f:(fun br -> + Logs.debug (fun m -> + m "%s: block %a, simplifying c=%a, y=%a, n=%a to %a%!" + __FUNCTION__ Label.pp (Blk.label b) + Var.pp c pp_dst y pp_dst n + Ctrl.pp (br :> ctrl)); Label.Tree.set acc ~key ~data:br) | _ -> acc) diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml b/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml index ce668b8a..45c0b47e 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml @@ -49,6 +49,9 @@ and merge subst env l l' b b' = env.cfg <- Cfg.Node.remove l' env.cfg; List.iter es ~f:(fun e -> env.cfg <- Cfg.Edge.insert e env.cfg); + Logs.debug (fun m -> + m "%s: merged block %a into %a%!" + __FUNCTION__ Label.pp l' Label.pp l); try_merge ~child:l' subst env l let run env = diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml b/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml index 117bd9bc..90b7828e 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml @@ -15,6 +15,10 @@ let map_blk tbl rb b = match Hashtbl.find tbl @@ Blk.label b with let commit env tbl rb fn = let key = Blk.label rb in + Logs.debug (fun m -> + m "%s: merged returns to new block %a: %s%!" + __FUNCTION__ Label.pp key + (Hashtbl.keys tbl |> List.to_string ~f:Label.to_string)); Hashtbl.map_inplace env.blks ~f:(map_blk tbl rb); Hashtbl.set env.blks ~key ~data:rb; env.ret <- Some key; diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml b/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml index eead9500..34bcfd74 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml @@ -98,14 +98,23 @@ let redir changed env cphi = | `br (c, y, n) -> let y' = brcond env cphi c y ~f:(fun y _ -> changed := true; y) in let n' = brcond env cphi c n ~f:(fun _ n -> changed := true; n) in - if phys_equal y y' && phys_equal n n' then b - else Blk.with_ctrl b @@ `br (c, y', n') - | `jmp `label (l, args) -> + if phys_equal y y' && phys_equal n n' then b else + let () = Logs.debug (fun m -> + m "%s: block %a: simplified br c=%a, y=%a, n=%a: y'=%a, n'=%a%!" + __FUNCTION__ Label.pp (Blk.label b) + Var.pp c pp_dst y pp_dst n + pp_dst y' pp_dst n') in + Blk.with_ctrl b @@ `br (c, y', n') + | `jmp (`label (l, args) as loc) -> Option.value ~default:b begin let* y, n, i, ne = Label.Tree.find cphi l in let* x = List.nth args i >>= var_of_operand in let+ c = if ne then Hashtbl.find env.flag x else !!x in changed := true; + Logs.debug (fun m -> + m "%s: block %a: simplified jmp %a: c=%a, y=%a, n=%a%!" + __FUNCTION__ Label.pp (Blk.label b) + pp_local loc Var.pp c pp_dst y pp_dst n); Blk.with_ctrl b @@ `br (c, y, n) end | _ -> b) diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml b/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml index 796c529f..6ff05548 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml @@ -75,4 +75,17 @@ let go env fn = let* h = new_entry fn e in let*? fn = Func.with_blks fn (h :: blks) in env.start <- Blk.label h; + Logs.debug (fun m -> + let pp_call ppf (l, (args, l')) = + Format.fprintf ppf " %a:\n args: %s\n l': %a%!" + Label.pp l + (List.to_string args ~f:(fun a -> + Format.asprintf "%a" pp_operand a)) + Label.pp l' in + m "%s: transformed $%s to tailrec, h=%a:\n%a%!" + __FUNCTION__ (Func.name fn) Label.pp (Blk.label h) + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + pp_call) + (Label.Tree.to_list calls)); !!fn diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml b/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml index 2d8ef7f7..77625df6 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml @@ -17,6 +17,11 @@ let go fn = (i :> operand) (`int (v, t)) in let b = Blk.append_insn b cmp in + Logs.debug (fun m -> + m "%s: switch at %a collapsed: i=%a, c=%a, k=%a, d=%a%!" + __FUNCTION__ Label.pp (Blk.label b) + pp_operand (i :> operand) + Var.pp c pp_dst (k :> dst) pp_dst (d :> dst)); Some (Blk.with_ctrl b @@ `br (c, (k :> dst), (d :> dst))) | _ -> !!None) >>| Seq.to_list in Func.update_blks_exn fn bs diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml new file mode 100644 index 00000000..44f9705b --- /dev/null +++ b/src/lib/passes/sroa/sroa.ml @@ -0,0 +1,21 @@ +open Core +open Virtual +open Sroa_impl +open Scalars_common + +module V = Make(VL) +module A = Make(AL) + +open Context.Syntax + +let run fn = + let* () = Context.unless (Dict.mem (Func.dict fn) Tags.ssa) @@ fun () -> + Context.failf "In SROA: function $%s is not in SSA form" + (Func.name fn) () in + V.run fn + +let run_abi fn = + let* () = Context.unless (Dict.mem (Abi.Func.dict fn) Tags.ssa) @@ fun () -> + Context.failf "In SROA (ABI): function $%s is not in SSA form" + (Abi.Func.name fn) () in + A.run fn diff --git a/src/lib/passes/sroa/sroa.mli b/src/lib/passes/sroa/sroa.mli new file mode 100644 index 00000000..0f02dd46 --- /dev/null +++ b/src/lib/passes/sroa/sroa.mli @@ -0,0 +1,19 @@ +(** Performs SROA (Scalar Replacement of Aggregates). + + This pass aims to analyze the usage of slots in a function, where + individual fields of the slot can be separated into their own + unique slots themselves. + + This is typical when a slot is allocated to hold a structure and + pointer arithmetic is used to store to and load from individual + fields of this structure. If these fields can become their own slots, + then they can possibly promoted to SSA variables, enabling more + optimization opportunities. + + The function must be in SSA form for this pass. +*) + +open Virtual + +val run : func -> func Context.t +val run_abi : Abi.func -> Abi.func Context.t diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml new file mode 100644 index 00000000..45820419 --- /dev/null +++ b/src/lib/passes/sroa/sroa_impl.ml @@ -0,0 +1,341 @@ +(* A conservative implementation of SROA (Scalar Replacement of Aggregates). + + Most notably, we don't consider compound/aggregate types at all. Why? + Because they are second-class citizens in our IR, and they are highly + dependent on the implementation details of the target ABI. + + After ABI lowering, they are desugared, after which this pass becomes + more useful. +*) + +open Core +open Regular.Std +open Scalars + +module Slot = Virtual.Slot +module Allen = Allen_interval_algebra + +type range = { + lo : int64; + hi : int64; +} + +module Range = struct + type t = range + let lo r = r.lo [@@inline] + let hi r = r.hi [@@inline] + let size r = Int64.(r.hi - r.lo) [@@inline] + let coverage a b = Int64.(b.hi - a.lo) [@@inline] + let pp ppf r = Format.fprintf ppf "[0x%Lx, 0x%Lx)" r.lo r.hi +end + +module Algebra = Allen.Make(struct + include Range + type point = int64 + include Int64.Replace_polymorphic_compare + end) + +let basic_size ty = Type.sizeof_basic ty / 8 [@@inline] + +(* A partition of elements at a particular offset+size range. *) +type 'a partition = { + off : int64; + size : int64; + mems : 'a list; +} + +type 'a partitions = 'a partition list Var.Map.t + +module Partition = struct + type 'a t = 'a partition + + let cmp a b = Int64.compare a.off b.off + + (* Check if a partition covers the entire slot `s`. *) + let is_entire_slot s p = match p.off with + | 0L -> Int64.(of_int (Slot.size s) = p.size) + | _ -> false + [@@inline] + + let range p = { + lo = p.off; + hi = Int64.(p.off + p.size); + } [@@inline] + + let pp ppa ppf p = + Format.fprintf ppf "0x%Lx:%Ld: %a" + p.off p.size + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") + ppa) p.mems + + let add_member x p = {p with mems = x :: p.mems} [@@inline] + let change off size x p = {off; size; mems = x :: p.mems} [@@inline] + let singleton off size x = {off; size; mems = [x]} [@@inline] +end + +module Make(M : Scalars.L) : sig + val run : M.Func.t -> M.Func.t Context.t +end = struct + open M + + module Analysis = Scalars.Make(M) + + (* A memory access for a slot. *) + type access = { + insn : Insn.t; + off : int64; + ty : Type.basic; + ldst : load_or_store; + } + + type accesses = access list Var.Map.t + + module Access = struct + type t = access [@@ocaml.warning "-34"] + + let sizeof a = basic_size a.ty [@@inline] + + let cmp a b = + match Int64.compare a.off b.off with + | 0 -> Int.compare (sizeof a) (sizeof b) + | c -> c + + let range a = { + lo = a.off; + hi = Int64.(a.off + of_int (sizeof a)); + } [@@inline] + + let pp ppf a = + let neg = Int64.is_negative a.off in + let pre, off = if neg then '-', Int64.neg a.off else '+', a.off in + Format.fprintf ppf "%a[%a.%a,%c0x%Lx]" + Label.pp (Insn.label a.insn) + pp_load_or_store a.ldst + Type.pp_basic a.ty + pre off + end + + let collect_accesses slots fn t : accesses = + (* Group all memory accesses by their corresponding slot. *) + Func.blks fn |> Seq.fold ~init:Var.Map.empty ~f:(fun init b -> + let s = ref @@ get t @@ Blk.label b in + Blk.insns b |> Seq.fold ~init ~f:(fun acc insn -> + let op = Insn.op insn in + let acc = match Insn.load_or_store_to op with + | None -> acc + | Some (ptr, ty, ldst) -> match Map.find !s ptr with + | Some Offset (base, _) when escaped t base -> + (* Any slot that escaped at any time should not + be considered for partitioning. *) + Logs.debug (fun m -> + m "%s: ignoring escaped pointer %a%!" + __FUNCTION__ Var.pp base); + acc + | Some Offset (base, off) -> + Map.add_multi acc ~key:base ~data:{insn; off; ty; ldst} + | _ -> acc in + s := Analysis.transfer_op slots !s op; + acc)) |> + Map.map ~f:(List.sort ~compare:Access.cmp) + + (* Sort the memory accesses into self-contained, non-overlapping + partitions, which are the fully-or-partially scalarized sub-objects + of the aggregate. *) + let partition_acesses : accesses -> access partitions = fun m -> + let rec merge acc c = function + | [] -> List.sort (c :: acc) ~compare:Partition.cmp + | x :: xs -> + let rc = Partition.range c in + let rx = Access.range x in + let a = Algebra.relate rc rx in + let acc, p = match a with + | Equal | Started_by | Finished_by | Contains -> + (* Partition subsumes the access. *) + acc, Partition.add_member x c + | During | Finishes -> + (* Access subsumes the partition *) + acc, Partition.change rx.lo (Range.size rx) x c + | Overlaps | Starts -> + (* Extend the upper bound. *) + acc, Partition.change rc.lo (Range.coverage rc rx) x c + | Overlapped_by -> + (* Extend the lower bound. *) + acc, Partition.change rx.lo (Range.coverage rx rc) x c + | Before | After | Meets | Met_by -> + (* No overlap, so we start a new partition. *) + let sx = Int64.of_int @@ Access.sizeof x in + c :: acc, Partition.singleton x.off sx x in + Logs.debug (fun m -> + m "%s: partition %a, access %a: %a%!" + __FUNCTION__ Range.pp rc Range.pp rx Allen.pp a); + merge acc p xs in + (* pre: each access list is sorted *) + Map.filter_map m ~f:(function + | [] -> None + | x :: xs -> + let sx = Int64.of_int @@ Access.sizeof x in + let p = Partition.singleton x.off sx x in + Some (merge [] p xs)) + + (* Turn each partition into a concrete slot. *) + let materialize_partitions slots parts : scalars Context.t = + Map.to_sequence parts |> Seq.filter_map ~f:(fun (base, ps) -> + Map.find slots base |> Option.map ~f:(fun s -> base, ps, s)) |> + Context.Seq.fold ~init:Scalar.Map.empty ~f:(fun init (base, ps, s) -> + Seq.of_list ps |> Seq.filter ~f:(not @. Partition.is_entire_slot s) |> + Context.Seq.fold ~init ~f:(fun acc p -> + let open Context.Syntax in + let size = Int64.to_int_exn p.size in + (* TODO: look through `p.mems` and see if there is a store + that is larger than other acesses (i.e. `st.l` followed + by one or more `ld.w`). If so, this partition could be + broken down further if we modify the store instruction(s). *) + let* x = Context.Var.fresh in + let*? s = Slot.create x ~size ~align:size in + Logs.debug (fun m -> + m "%s: new slot %a, base=%a, off=0x%Lx, size=%Ld%!" + __FUNCTION__ Var.pp x Var.pp base p.off p.size); + !!(Map.set acc ~key:(base, p.off) ~data:s))) + + (* Find the corresponding partition for [base+off, base+off+size). *) + let find_partition (parts : 'a partitions) base off size = + Map.find parts base |> Option.bind ~f:(fun ps -> + let r = {lo = off; hi = Int64.(off + of_int size)} in + List.find ps ~f:(fun (p : 'a partition) -> + let rp = Partition.range p in + let a = Algebra.relate r rp in + Logs.debug (fun m -> + m "%s: relating %a to %a: %a%!" + __FUNCTION__ Range.pp r Range.pp rp Allen.pp a); + match a with + | Starts | During | Finishes | Equal -> true + | _ -> false)) + + (* Exact cover for a scalar at `base + off`. *) + let rewrite_insn_exact (m : scalars) i ~exact ~base ~off = + let open Context.Syntax in + Logs.debug (fun m -> + m "%s: exact=0x%Lx, off=0x%Lx, base=%a%!" + __FUNCTION__ exact off Var.pp base); + let op = Insn.op i in + let delta = Int64.(off - exact) in + match Map.find m (base, exact) with + | None -> + Logs.debug (fun m -> m "%s: no slot found%!" __FUNCTION__); + !![i] + | Some s when Int64.(delta = 0L) -> + Logs.debug (fun m -> + m "%s: found slot %a (base)%!" + __FUNCTION__ Var.pp (Slot.var s)); + (* Store to base of new slot. *) + let addr = Slot.var s in + let op' = Insn.replace_load_or_store_addr addr op in + !![Insn.with_op i op'] + | Some s -> + Logs.debug (fun m -> + m "%s: found slot %a (delta 0x%Lx)%!" + __FUNCTION__ Var.pp (Slot.var s) delta); + (* Compute offset of new slot and store to it. *) + let* l = Context.Label.fresh in + let* y = Context.Var.fresh in + let+ word = Context.target >>| Target.word in + let a = Insn.add y word (Slot.var s) delta in + let op' = Insn.replace_load_or_store_addr y op in + [Insn.create ~label:l a; Insn.with_op i op'] + + let debug_show_insn i ptr ty ldst = + Logs.debug (fun m -> + m "%s: %a: looking at %a.%a to %a%!" + __FUNCTION__ + Label.pp (Insn.label i) + pp_load_or_store ldst + Type.pp_basic ty + Var.pp ptr) + + let debug_show_bad_val ptr v = + Logs.debug (fun m -> + m "%s: cannot rewrite: %a is %a" + __FUNCTION__ Var.pp ptr + (Format.pp_print_option + ~none:pp_bot + pp_value) + v) + + (* Rewrite an instruction. *) + let rewrite_insn parts (m : scalars) (s : state) i = + let open Context.Syntax in + let op = Insn.op i in + match Insn.load_or_store_to op with + | None -> !![i] + | Some (ptr, ty, ldst) -> + debug_show_insn i ptr ty ldst; + match Map.find s ptr with + | (Some Top | None) as v -> + debug_show_bad_val ptr v; + !![i] + | Some Offset (base, off) -> + match find_partition parts base off @@ basic_size ty with + | Some p -> rewrite_insn_exact m i ~exact:p.off ~base ~off + | None -> + Logs.debug (fun m -> m "%s: no parts found%!" __FUNCTION__); + !![i] + + let rewrite_with_partitions slots fn t parts m = + let open Context.Syntax in + let* blks = Func.blks fn |> Context.Seq.map ~f:(fun b -> + let s = ref @@ get t @@ Blk.label b in + let+ insns = Blk.insns b |> Context.Seq.map ~f:(fun i -> + let+ is = rewrite_insn parts m !s i in + s := Analysis.transfer_op slots !s @@ Insn.op i; is) + >>| List.concat @. Seq.to_list in + Blk.with_insns b insns) >>| Seq.to_list in + Context.lift_err @@ Func.with_blks fn blks + + let insert_new_slots fn m = Map.fold m ~init:fn + ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data) + + let debug_show_parts parts = + Logs.debug (fun m -> + m "%s: partitions:\n%a%!" + __FUNCTION__ + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + (fun ppf (key, data) -> + Format.fprintf ppf "%a:\n%a" + Var.pp key + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n") + (fun ppf -> Format.fprintf ppf " %a" + (Partition.pp Access.pp))) + data)) + (Map.to_alist parts)) + + (* XXX: allowing the analysis to propagate through block parameters + could possibly be done, but the current transformation isn't + set up to properly handle it. + + The main issue arises from pointers into the slots that get + aliased via a block parameter. That dangling reference will + currently be left unchanged, leading to garbage values being + read from the old slot. + *) + let analyze slots fn = + Analysis.analyze ~blkparam:false slots fn + + let run fn = + let open Context.Syntax in + let slots = Analysis.collect_slots fn in + if Map.is_empty slots then !!fn else + let cfg = Cfg.create fn in + let blks = Func.map_of_blks fn in + let t = analyze cfg blks slots in + let accs = collect_accesses slots fn t in + let parts = partition_acesses accs in + if Map.is_empty parts then !!fn else + let () = debug_show_parts parts in + let* m = materialize_partitions slots parts in + if Map.is_empty m then !!fn else + let fn = insert_new_slots fn m in + rewrite_with_partitions slots fn t parts m +end diff --git a/src/lib/passes/ssa/ssa_impl_rename.ml b/src/lib/passes/ssa/ssa_impl_rename.ml index 2b58c832..35e22da7 100644 --- a/src/lib/passes/ssa/ssa_impl_rename.ml +++ b/src/lib/passes/ssa/ssa_impl_rename.ml @@ -15,20 +15,27 @@ end = struct | None -> raise_notrace @@ Missing_blk l | Some _ as b -> b + let fresh_version nums base = + let i = ref 1 in + Hashtbl.update nums base ~f:(function + | None -> !i + | Some n -> + let n = n + 1 in + i := n; + n); + !i + let new_name stk nums x = - let key = Var.base x in - let default = 1 in - let n = ref default in - let upd x = n := x + 1; !n in - Hashtbl.update nums key ~f:(Option.value_map ~default ~f:upd); - let y = Var.with_index x !n in - Hashtbl.add_multi stk ~key ~data:y; + let base = Var.base x in + let idx = fresh_version nums base in + let y = Var.with_index x idx in + Hashtbl.add_multi stk ~key:base ~data:y; y let rename_args stk nums b = Blk.args b |> Seq.map ~f:(new_name stk nums) |> Seq.to_list - let map_var stk x = match Hashtbl.find stk x with + let map_var stk x = match Hashtbl.find stk @@ Var.base x with | Some [] | None -> x | Some (y :: _) -> y diff --git a/src/lib/phi_values.ml b/src/lib/phi_values.ml index 282bc772..7ccea306 100644 --- a/src/lib/phi_values.ml +++ b/src/lib/phi_values.ml @@ -3,43 +3,68 @@ open Core open Regular.Std open Graphlib.Std -open Virtual + +let table enum d ds tbl = + enum tbl |> Seq.map ~f:snd |> + Seq.map ~f:(fun (`label (l, args)) -> l, args) |> + Seq.to_list |> List.cons (d, ds) + +let locals enum = + let open Virtual in function + | `hlt -> [] + | `jmp #global -> [] + | `jmp `label (l, args) -> [l, args] + | `br (_, #global, #global) -> [] + | `br (_, `label (y, ys), #global) -> [y, ys] + | `br (_, #global, `label (n, ns)) -> [n, ns] + | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns] + | `ret _ -> [] + | `sw (_, _, `label (d, ds), tbl) -> table enum d ds tbl module type Domain = sig type t [@@deriving equal] - val one : operand -> t + val one : Virtual.operand -> t val join : t -> t -> t end -module Make(D : Domain) = struct +module type L = sig + module Ctrl : sig + type t + val locals : t -> (Label.t * Virtual.operand list) list + end + module Blk : sig + type t + val args : ?rev:bool -> t -> Var.t seq + val ctrl : t -> Ctrl.t + end + module Func : sig + type t + end + module Cfg : sig + include Label.Graph_s + val create : Func.t -> t + end +end + +module Make(M : L)(D : Domain) = struct + open M + type state = D.t Var.Map.t [@@deriving equal] - let local ~blk s : local -> state = function - | `label (l, vs) -> - blk l |> Option.value_map ~default:s ~f:(fun b -> - let args = Seq.to_list @@ Blk.args b in - match List.zip args vs with - | Unequal_lengths -> assert false - | Ok xs -> List.fold xs ~init:s ~f:(fun s (x, v) -> - Map.update s x ~f:(function - | Some vs -> D.(join vs @@ one v) - | None -> D.one v))) - - let dst ~blk s : dst -> state = function - | #local as l -> local ~blk s l - | #global -> s + let local ~blk s (l, vs) : state = + blk l |> Option.value_map ~default:s ~f:(fun b -> + let args = Seq.to_list @@ Blk.args b in + match List.zip args vs with + | Unequal_lengths -> assert false + | Ok xs -> List.fold xs ~init:s ~f:(fun s (x, v) -> + Map.update s x ~f:(function + | Some vs -> D.(join vs @@ one v) + | None -> D.one v))) let transfer ~blk l s = blk l |> Option.value_map ~default:s ~f:(fun b -> - match Blk.ctrl b with - | `hlt | `ret _ -> s - | `jmp d -> dst ~blk s d - | `br (_, y, n) -> - dst ~blk (dst ~blk s y) n - | `sw (_, _, d, tbl) -> - let init = local ~blk s d in - Ctrl.Table.enum tbl |> Seq.fold ~init - ~f:(fun s (_, l') -> local ~blk s l')) + Blk.ctrl b |> Ctrl.locals |> + List.fold ~init:s ~f:(local ~blk)) let merge = Map.merge_skewed ~combine:(fun ~key:_ -> D.join) diff --git a/src/lib/regalloc/regalloc_irc.ml b/src/lib/regalloc/regalloc_irc.ml index 3de447de..641fddcd 100644 --- a/src/lib/regalloc/regalloc_irc.ml +++ b/src/lib/regalloc/regalloc_irc.ml @@ -50,7 +50,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let label = Insn.label i in let insn = Insn.insn i in let use = M.Insn.reads insn in - Set.iter use ~f:(update_cost ~loop_depth t); + Set.iter use ~f:(fun u -> + update_cost ~loop_depth t u; + inc_use t u); let def = M.Insn.writes insn in (* if isMoveInstruction(I) then *) let+ out = match M.Regalloc.is_copy insn with @@ -82,14 +84,16 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct Set.iter def ~f:(fun d -> (* forall l \in live AddEdge(l,d) *) + add_def t d label; Set.iter out ~f:(fun o -> add_edge t o d)); (* live := use(I) U (live\def(I)) *) Set.union use (Set.diff out def) (* Build the interference graph and other initial state for the algorithm. *) - let build t live = + let build t = (* forall b \in blocks in program *) + let live = Option.value_exn t.live in Func.blks t.fn |> C.Seq.iter ~f:(fun b -> let l = Blk.label b in let loop_depth = match Loop.blk t.loop l with @@ -98,12 +102,15 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* NB: levels start at 0 *) (Loop.(level (get t.loop lp)) :> int) + 1 in (* live := liveOut(b) *) - let out = Live.outs live l in + let out = ref @@ Live.outs live l in (* forall I \in instructions(b) in reverse order *) - let+ _out = - Blk.insns b ~rev:true |> - C.Seq.fold ~init:out ~f:(build_insn ~loop_depth t) in - ()) + let insns = Blk.insns b ~rev:true |> Seq.to_list in + let ord = ref (List.length insns - 1) in + C.List.iter insns ~f:(fun i -> + Hashtbl.set t.insn_blks ~key:(Insn.label i) ~data:(l, !ord); + let+ out' = build_insn ~loop_depth t !out i in + out := out'; + decr ord)) (* Initialize the worklists. *) let make_worklist t = @@ -127,6 +134,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct node_moves t n |> Lset.iter ~f:(fun m -> (* if m \in activeMoves then *) if Lset.mem t.amoves m then begin + Logs.debug (fun m_ -> + m_ "%s: enabling move %a for node %a%!" + __FUNCTION__ Label.pp m Rv.pp n); (* activeMoves := activeMoves \ {m} *) t.amoves <- Lset.remove t.amoves m; (* worklistMoves := worklistMoves U {m} *) @@ -165,7 +175,10 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct simplifyWorklist := simplifyWorklist \ {n} *) let n = take_one t.wsimplify in (* push(n, selectStack) *) - if can_be_colored t n then Stack.push t.select n; + if can_be_colored t n then begin + Logs.debug (fun m -> m "%s: selecting %a%!" __FUNCTION__ Rv.pp n); + Stack.push t.select n; + end; (* forall m \in Adjacent(n) *) adjacent t n |> Set.iter ~f:(decrement_degree t) @@ -185,12 +198,17 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct end let ok t a r = - (* t \in precolored *) - exclude_from_coloring t a || - (* degree[t] < K *) - degree t a < Regs.node_k a || - (* (a,r) \in adjSet *) - has_edge t a r + let res = + (* t \in precolored *) + exclude_from_coloring t a || + (* degree[t] < K *) + degree t a < Regs.node_k a || + (* (a,r) \in adjSet *) + has_edge t a r in + Logs.debug (fun m -> + m "%s: %a, %a: %b%!" + __FUNCTION__ Rv.pp a Rv.pp r res); + res (* forall t \in Adjacent(v), OK(t,u) *) let all_adjacent_ok t u v = @@ -203,16 +221,20 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct if degree[n] >= k then k := k + 1 return (k < K) *) - let conservative t nk nodes = - let k = Set.fold nodes ~init:0 ~f:(fun k n -> - if degree t n >= Regs.node_k n then k + 1 else k) in - k < nk + let conservative t nodes = + Set.fold nodes ~init:0 ~f:(fun k n -> + if degree t n >= Regs.node_k n then k + 1 else k) (* Conservative(Adjacent(u) U Adjacent(v)) *) let conservative_adj t u v = assert (Regs.same_class_node u v); let nodes = Set.union (adjacent t u) (adjacent t v) in - conservative t (Regs.node_k u) nodes + let nk = Regs.node_k u in + let k = conservative t nodes in + Logs.debug (fun m -> + m "%s: u=%a, v=%a, k=%d, nk=%d%!" + __FUNCTION__ Rv.pp u Rv.pp v k nk); + k < nk (* XXX: the algorithm in the paper does: @@ -227,7 +249,11 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct of `u`, leaving the degree of `t` unchanged. *) let combine_edge t u v = - if has_edge t u v then + let e = has_edge t u v in + Logs.debug (fun m -> + m "%s: combining edge u=%a, v=%a, has_edge=%b%!" + __FUNCTION__ Rv.pp u Rv.pp v e); + if e then decrement_degree t v else begin add_adjlist t u v; @@ -238,6 +264,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* Combine `v` with `u` in the interference graph, where `u` is the destination. *) let combine t u v = + Logs.debug (fun m -> + m "%s: combining u=%a with v=%a%!" + __FUNCTION__ Rv.pp u Rv.pp v); (* if v \in freezeWorklist *) if Hash_set.mem t.wfreeze v then (* freezeWorklist := freezeWorklist \ {v} *) @@ -259,12 +288,43 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct if (* degree[u] >= K *) degree t u >= Regs.node_k u && (* u \in freezeWorklist *) - Hash_set.mem t.wfreeze u then begin + Hash_set.mem t.wfreeze u + then (* freezeWorklist := freezeWorklist \ {u} *) - Hash_set.remove t.wfreeze u; + let () = Hash_set.remove t.wfreeze u in (* spillWorklist := spillWorklist U {u} *) add_spill t u - end + + (* We can bypass the Briggs conservative heuristic if this + move is trivially coalescable. + + Criteria: + + 1. The source node is not precolored. + 2. This move is its only use. + 3. All definitions dominate the use. + 4. The source node is not live-out. + *) + let is_trivial_du t m v = + can_be_colored t v && num_uses t v = 1 && + match Hashtbl.find t.insn_blks m with + | None -> false + | Some (bm, om) -> + let live = Option.value_exn t.live in + let out = Live.outs live bm in + not (Set.mem out v) && + Hashtbl.find t.defs v |> + Option.value ~default:Lset.empty |> + Lset.to_sequence |> Seq.for_all ~f:(fun d -> + match Hashtbl.find t.insn_blks d with + | None -> false + | Some (bd, od) when Label.(bm = bd) -> + Logs.debug (fun m -> + m "%s: bm=%a, om=%d, bd=%a, od=%d%!" + __FUNCTION__ Label.pp bm om Label.pp bd od); + od < om + | Some (bd, _) -> + Semi_nca.Tree.is_descendant_of t.dom ~parent:bd bm) (* P(u,v) = S(u,v) * (W(u,v) / (1 + D'(u) + D'(v))) @@ -297,6 +357,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let dv = effective_degree c.src pv in let w = Float.of_int @@ Int.pow 10 c.loop in let p = w /. Float.of_int (1 + du + dv) in + (* If if this move is trivially coalescable, then bump the weight a + bit so that it can coalesce earlier. *) + let p = if is_trivial_du t m c.src then p *. 2.0 else p in (* If one of the nodes is pre-colored, then this coalesce will be much riskier. If both are pre-colored, then we should avoid it at all costs (see topmost condition). *) @@ -308,12 +371,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct Lset.to_sequence t.wmoves |> Seq.map ~f:(fun m -> m, move_priority t m) |> Seq.max_elt ~compare:(fun (_, a) (_, b) -> Float.compare a b) |> - Option.value_exn |> fst + Option.value_exn (* pre: wmoves is not empty *) let coalesce t = (* let m_(=copy(x,y)) \in worklistMoves *) - let m = pick_move t in + let m, score = pick_move t in let c = Hashtbl.find_exn t.copies m in (* x := GetAlias(x) *) let x = alias t c.dst in @@ -324,39 +387,48 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct else let (u,v) = (x,y) *) let u, v = if exclude_from_coloring t y then y, x else x, y in + Logs.debug (fun m_ -> + m_ "%s: looking at move %a, score=%g, u=%a, v=%a%!" + __FUNCTION__ Label.pp m score Rv.pp u Rv.pp v); (* worklistMoves := worklistMoves \ {m} *) t.wmoves <- Lset.remove t.wmoves m; (* if u = v then *) - if Rv.(u = v) then begin + if Rv.(u = v) then (* coalescedMoves := coalescedMoves U {m} *) - t.cmoves <- Lset.add t.cmoves m; + let () = t.cmoves <- Lset.add t.cmoves m in + Logs.debug (fun m -> m "%s: already coalesced%!" __FUNCTION__); (* AddWorkList(u) *) add_worklist t u - end else if + else if (* v \in precolored *) exclude_from_coloring t v || (* (u,v) \in adjSet *) - has_edge t u v then begin + has_edge t u v + then (* constrainedMoves := constrainedMoves U {m} *) - t.kmoves <- Lset.add t.kmoves m; + let () = t.kmoves <- Lset.add t.kmoves m in + Logs.debug (fun m_ -> + m_ "%s: constraining %a%!" __FUNCTION__ Label.pp m); (* addWorkList(u) *) add_worklist t u; (* addWorkList(v) *) add_worklist t v - end else if + else if (* u \in precolored ^ (\forall t \in Adjacent(v), OK(t,u)) *) (exclude_from_coloring t u && all_adjacent_ok t u v) || (* u \notin precolored ^ Conservative(Adjacent(u), Adjacent(v)) *) - (can_be_colored t u && conservative_adj t u v) then begin + (can_be_colored t u && (is_trivial_du t m v || conservative_adj t u v)) + then (* coalescedMoves := coalescedMoves U {m} *) - t.cmoves <- Lset.add t.cmoves m; + let () = t.cmoves <- Lset.add t.cmoves m in (* Combine(u,v) *) combine t u v; (* AddWorkList(u) *) add_worklist t u - end else + else (* activeMoves := activeMoves U {m} *) - t.amoves <- Lset.add t.amoves m + let () = t.amoves <- Lset.add t.amoves m in + Logs.debug (fun m -> m "%s: adding to active moves%!" __FUNCTION__) (* pre: m \in copies @@ -381,6 +453,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* worklistMoves := worklistMoves \ {m} *) t.wmoves <- Lset.remove t.wmoves m; (* frozenMoves := frozenMoves U {m} *) + Logs.debug (fun m_ -> + m_ "%s: freezing move %a, v=%a" + __FUNCTION__ Label.pp m Rv.pp v); t.fmoves <- Lset.add t.fmoves m; (* if NodeMoves(v) = {} ^ degree[v] < K *) if Lset.is_empty (node_moves t v) @@ -396,6 +471,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* let u \in freezeWorklist freezeWorklist := freezeWorklist \ {u} *) let u = take_one t.wfreeze in + Logs.debug (fun m -> m "%s: frozen node u=%a%!" __FUNCTION__ Rv.pp u); (* simplifyWorklist := simplifyWorklist U {u} *) Hash_set.add t.wsimplify u; (* FreezeMoves(u) *) @@ -403,6 +479,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let select_spill t = Pairing_heap.pop t.wspill |> Option.iter ~f:(fun m -> + Logs.debug (fun m_ -> + m_ "%s: selecting spill node %a%!" + __FUNCTION__ Rv.pp m); (* spillWorklist := spillWorklist \ {m} *) Hashtbl.remove t.wspill_elts m; (* simplifyWorklist := simplifyworklist U {m} *) @@ -419,7 +498,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct Seq.filter ~f:(fun w -> Rv.is_reg w || Hash_set.mem t.colored w) |> (* okColors := okColors \ {color[GetAlias(w)]} *) Seq.filter_map ~f:(color t) |> - Seq.iter ~f:(fun c -> cs := Z.(!cs land ~!(one lsl c))) + Seq.iter ~f:(fun c -> cs := Bitset.Id.clear !cs c) let assign_colors t = (* while SelectStack is not empty @@ -428,19 +507,19 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct assert (can_be_colored t n); (* okColors := {0,...,K-1} *) let k = Regs.node_k n in - let cs = ref Z.(pred (one lsl k)) in + let cs = ref @@ Bitset.Id.init k in eliminate_colors t n cs; (* if okColors = {} then *) - if Z.(equal !cs zero) then + match Bitset.Id.min_elt !cs with + | None -> (* spilledNodes := spilledNodes U {n} *) t.spilled <- Set.add t.spilled n - else begin + | Some c -> (* coloredNodes := coloredNodes U {n} *) Hash_set.add t.colored n; (* let c \in okColors color[n] := c *) - set_color t n @@ Z.trailing_zeros !cs - end); + set_color t n c); (* forall n \in coalescedNodes *) Hash_set.iter t.coalesced ~f:(fun n -> (* color[n] := color[GetAlias(n)] *) @@ -470,9 +549,15 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let r = Rv.var GPR v in match find_reusable_slot t m n size with | Some r' -> + Logs.debug (fun m -> + m "%s: re-using slot %a for spilled node %a%!" + __FUNCTION__ Rv.pp r' Rv.pp r); Hashtbl.set t.slots ~key:r ~data:r'; acc, m | None -> + Logs.debug (fun m -> + m "%s: spilling %a to new slot%!" + __FUNCTION__ Rv.pp r); let s = Virtual.Slot.create_exn v ~size ~align:size in Hashtbl.set t.slots ~key:r ~data:r; s :: acc, Map.add_multi m ~key:size ~data:r) in @@ -599,10 +684,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct (* Clear the relevant state for the next round. *) let new_round t = + t.live <- None; Hashtbl.clear t.adjlist; Hashtbl.clear t.degree; Hashtbl.clear t.copies; + Hashtbl.clear t.nuse; + Hashtbl.clear t.defs; Hashtbl.clear t.moves; + Hashtbl.clear t.insn_blks; Hashtbl.clear t.spill_cost; (* This doesn't seem to happen in the paper, but we should discard the previous coloring since we introduced new spill/reload code. @@ -617,9 +706,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct let* () = C.when_ (round > max_rounds) @@ fun () -> C.failf "In Regalloc.main: maximum rounds reached (%d) with no \ convergence on spilling" max_rounds () in + Logs.debug (fun m -> + m "%s: $%s: round %d of %d" + __FUNCTION__ (Func.name t.fn) round max_rounds); (* Build the interference graph. *) - let live = Live.compute ~keep:t.keep t.fn in - let* () = build t live in + t.live <- Some (Live.compute ~keep:t.keep t.fn); + let* () = build t in make_worklist t; (* Process the worklists. *) let continue = ref true in diff --git a/src/lib/regalloc/regalloc_irc_state.ml b/src/lib/regalloc/regalloc_irc_state.ml index 2c9c00cb..394bbfc4 100644 --- a/src/lib/regalloc/regalloc_irc_state.ml +++ b/src/lib/regalloc/regalloc_irc_state.ml @@ -48,7 +48,10 @@ module Make(M : Machine_intf.S) = struct adjlist : Rv.Set.t Rv.Table.t; degree : int Rv.Table.t; moves : Lset.t Rv.Table.t; + insn_blks : (Label.t * int) Label.Table.t; copies : copy Label.Table.t; + nuse : int Rv.Table.t; + defs : Lset.t Rv.Table.t; mutable wmoves : Lset.t; (* worklist moves *) mutable amoves : Lset.t; (* active moves *) mutable cmoves : Lset.t; (* coalesced moves *) @@ -72,6 +75,7 @@ module Make(M : Machine_intf.S) = struct loop : Loop.t; spill_cost : int Rv.Table.t; dom : Label.t Semi_nca.tree; + mutable live : Live.t option; } (* Explicit registers and variables that correspond to stack slots @@ -123,7 +127,10 @@ module Make(M : Machine_intf.S) = struct adjlist = Rv.Table.create (); degree; moves = Rv.Table.create (); + insn_blks = Label.Table.create (); copies = Label.Table.create (); + nuse = Rv.Table.create (); + defs = Rv.Table.create (); wmoves = Lset.empty; amoves = Lset.empty; cmoves = Lset.empty; @@ -147,11 +154,15 @@ module Make(M : Machine_intf.S) = struct loop; spill_cost; dom; + live = None; } let add_spill t n = if can_be_colored t n && not (Hashtbl.mem t.wspill_elts n) then let elt = Pairing_heap.add_removable t.wspill n in + Logs.debug (fun m -> + m "%s: adding %a to spill worklist%!" + __FUNCTION__ Rv.pp n); Hashtbl.set t.wspill_elts ~key:n ~data:elt let remove_spill t n = Hashtbl.change t.wspill_elts n ~f:(function @@ -216,6 +227,19 @@ module Make(M : Machine_intf.S) = struct let move_related t n = not @@ Lset.is_empty @@ node_moves t n + let inc_use t n = + Hashtbl.update t.nuse n ~f:(function + | Some n -> n + 1 + | None -> 1) + + let num_uses t n = + Hashtbl.find t.nuse n |> Option.value ~default:0 + + let add_def t n l = + Hashtbl.update t.defs n ~f:(function + | None -> Lset.singleton l + | Some s -> Lset.add s l) + (* if n \in coalescedNodes then GetAlias(alias[n]) else n *) @@ -228,7 +252,11 @@ module Make(M : Machine_intf.S) = struct | Second _ -> Hashtbl.find t.colors n | First r -> Regs.reg_color r - let set_color t n c = Hashtbl.set t.colors ~key:n ~data:c + let set_color t n c = + Logs.debug (fun m -> + m "%s: assigning color %a=%d%!" + __FUNCTION__ Rv.pp n c); + Hashtbl.set t.colors ~key:n ~data:c let add_move t label n = Hashtbl.update t.moves n ~f:(function diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml new file mode 100644 index 00000000..5d5f720e --- /dev/null +++ b/src/lib/scalars.ml @@ -0,0 +1,267 @@ +open Core +open Regular.Std +open Graphlib.Std + +let (@.) = Fn.compose +let (@<) = Fn.flip + +module Slot = Virtual.Slot + +(* A scalar access. *) +module Scalar = struct + module T = struct + type t = Var.t * int64 [@@deriving compare, equal, hash, sexp] + end + include T + module Map = Map.Make(T) + module Table = Hashtbl.Make(T) +end + +type scalar = Scalar.t [@@deriving compare, equal, hash, sexp] +type scalars = Slot.t Scalar.Map.t + +(* Lattice of scalar accesses. + + [Top]: the access is inconsistent or escapes + + [Offset (s, o)]: access to slot [s] at offset [o] +*) +type value = + | Top + | Offset of scalar +[@@deriving equal, sexp] + +let pp_value ppf = function + | Top -> Format.fprintf ppf "\u{22a4}" + | Offset (slot, offset) -> + let neg = Int64.is_negative offset in + let pre, off = if neg then '-', Int64.neg offset else '+', offset in + Format.fprintf ppf "%a%c0x%Lx" Var.pp slot pre off + +let pp_bot ppf () = Format.fprintf ppf "\u{22a5}" [@@ocaml.warning "-32"] + +module Value = struct + type t = value [@@deriving equal, sexp] + let merge a b = match a, b with + | Offset s1, Offset s2 when equal_scalar s1 s2 -> a + | _ -> Top +end + +type slots = Slot.t Var.Map.t + +module State : sig + type t = value Var.Map.t [@@deriving equal, sexp] + val empty : t + val merge : t -> t -> t + val derive : slots -> t -> Var.t -> int64 -> value option +end = struct + (* NB: the keys are the LHS of a given instruction *) + type t = value Var.Map.t [@@deriving equal, sexp] + + let empty = Var.Map.empty + + let merge a b = Map.merge_skewed a b + ~combine:(fun ~key:_ a b -> Value.merge a b) + + let is_bad slots ptr offset = + Int64.(offset < 0L) || match Map.find slots ptr with + | Some s -> Int64.(offset >= of_int (Slot.size s)) + | None -> false + + (* Normalize the scalar referred to by `ptr` and `offset`. *) + let derive slots s ptr offset = match Map.find s ptr with + | (Some Top | None) as v -> v + | Some Offset (ptr', offset') -> + let offset'' = Int64.(offset + offset') in + (* Out of bounds offset to a slot should be undefined. *) + let value = + if is_bad slots ptr' offset'' then Top + else Offset (ptr', offset'') in + Some value +end + +type state = State.t [@@deriving equal, sexp] + +let pp_state ppf s = + let pp_sep ppf () = Format.fprintf ppf "@ " in + let pp_elt ppf (x, v) = Format.fprintf ppf "(%a@ %a)" Var.pp x pp_value v in + let pp_elts = Format.pp_print_list ~pp_sep pp_elt in + Format.fprintf ppf "@[%a@]" pp_elts @@ Map.to_alist s +[@@ocaml.warning "-32"] + +type solution = (Label.t, state) Solution.t + +let empty_solution = Solution.create Label.Map.empty State.empty + +type load_or_store = Load | Store + +let pp_load_or_store ppf = function + | Load -> Format.fprintf ppf "load" + | Store -> Format.fprintf ppf "store" + +let is_store = function + | Load -> false + | Store -> true + +module type L = sig + module Insn : sig + type t + type op + + val create : label:Label.t -> op -> t + + (* General accessors. *) + val op : t -> op + val label : t -> Label.t + + (* Used during analysis. *) + val lhs : op -> Var.Set.t + val offset : op -> scalar option + val copy_of : op -> Var.t option + val free_vars : op -> Var.Set.t + val escapes : op -> Var.Set.t + + (* Used during replacement. *) + val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option + val replace_load_or_store_addr : Var.t -> op -> op + val with_op : t -> op -> t + val add : Var.t -> Type.imm_base -> Var.t -> int64 -> op + end + + module Ctrl : sig + type t + val free_vars : t -> Var.Set.t + val escapes : t -> Var.Set.t + val locals : t -> (Label.t * Virtual.operand list) list + end + + module Blk : sig + type t + val label : t -> Label.t + val args : ?rev:bool -> t -> Var.t seq + val insns : ?rev:bool -> t -> Insn.t seq + val ctrl : t -> Ctrl.t + val with_insns : t -> Insn.t list -> t + end + + module Func : sig + type t + val slots : ?rev:bool -> t -> Slot.t seq + val blks : ?rev:bool -> t -> Blk.t seq + val map_of_blks : t -> Blk.t Label.Tree.t + val with_blks : t -> Blk.t list -> t Or_error.t + val insert_slot : t -> Slot.t -> t + end + + module Cfg : sig + include Label.Graph_s + val create : Func.t -> t + end +end + +type t = { + soln : solution; (* Dataflow solution *) + esc : Var.Hash_set.t; (* All slots that escaped, globally *) +} + +let get t l = Solution.get t.soln l +let escaped t x = Hash_set.mem t.esc x + +module Make(M : L) = struct + open M + + (* Set all known scalars to `Top` according to `f`, which is the + set of variables that escape. *) + let escaping ?esc f x s = + Set.fold (f x) ~init:s ~f:(fun s v -> + match Map.find s v with + | Some Offset (ptr, _) -> + Option.iter esc ~f:(fun t -> + Hash_set.strict_add t ptr |> + Or_error.iter ~f:(fun () -> + Logs.debug (fun m -> + m "%s: %a escapes via %a%!" + __FUNCTION__ Var.pp ptr Var.pp v))); + Map.set s ~key:ptr ~data:Top + | Some _ | None -> s) + + (* Transfer function for a single instruction. *) + let transfer_op ?esc slots s op = + let value = match Insn.offset op with + | Some (ptr, offset) -> State.derive slots s ptr offset + | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in + let s = match value with + | None -> s + | Some v -> + Insn.lhs op |> Set.fold ~init:s + ~f:(fun s key -> Map.set s ~key ~data:v) in + escaping ?esc Insn.escapes op s + + let merge_blkarg acc src dst = match src with + | `var src when Var.(src = dst) -> acc + | `var src -> + begin match Map.find acc src with + | None -> acc + | Some v -> Map.update acc dst ~f:(function + | Some v' -> Value.merge v v' + | None -> v) + end + | _ -> acc + + let blkargs blks s (l, xs) = + Label.Tree.find blks l |> + Option.value_map ~default:s ~f:(fun b -> + let args = Seq.to_list @@ Blk.args b in + List.fold2 xs args ~init:s ~f:merge_blkarg |> function + | Ok s -> s + | Unequal_lengths -> + Logs.warn (fun m -> + m "%s: unequal lengths (%d vs %d) for %a%!" + __FUNCTION__ (List.length xs) (List.length args) Label.pp l); + s) + + let ctrl_esc blkparam = + if blkparam then Ctrl.escapes else Ctrl.free_vars + [@@inline] + + (* Transfer for control-flow instruction. *) + let transfer_ctrl ?(blkparam = true) ?esc blks s c = + let init = escaping ?esc (ctrl_esc blkparam) c s in + (* Propagate the block parameters we are passing. *) + Ctrl.locals c |> List.fold ~init ~f:(blkargs blks) + [@@specialise] + + (* Transfer function for a block. *) + let transfer ?(blkparam = true) ?esc slots blks l s = + Label.Tree.find blks l |> + Option.value_map ~default:s ~f:(fun b -> + Blk.insns b |> Seq.map ~f:Insn.op |> + Seq.fold ~init:s ~f:(transfer_op ?esc slots) |> + transfer_ctrl ~blkparam ?esc blks @< Blk.ctrl b) + [@@specialise] + + (* Initial constraints. *) + let initialize slots _blks = + (* Set all slots to point to their own base address. *) + let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in + Label.Map.singleton Label.pseudoentry init |> + Solution.create @< State.empty + [@@specialise] + + (* All slots mapped to their names. *) + let collect_slots fn = Func.slots fn |> Seq.fold ~init:Var.Map.empty + ~f:(fun acc s -> Map.set acc ~key:(Slot.var s) ~data:s) + + (* Run the dataflow analysis. *) + let analyze ?(blkparam = true) cfg blks slots = + let esc = Var.Hash_set.create () in + let s = + Graphlib.fixpoint (module Cfg) cfg + ~init:(initialize slots blks) + ~start:Label.pseudoentry + ~equal:State.equal + ~merge:State.merge + ~f:(transfer ~blkparam ~esc slots blks) in + {soln = s; esc} + [@@specialise] +end diff --git a/src/lib/slot_initialization.ml b/src/lib/slot_initialization.ml new file mode 100644 index 00000000..a8656095 --- /dev/null +++ b/src/lib/slot_initialization.ml @@ -0,0 +1,177 @@ +open Core +open Regular.Std +open Graphlib.Std + +module Slot = Virtual.Slot + +type interval = { + lo : int; + hi : int; +} [@@deriving compare, sexp] + +module Interval = struct + type point = int [@@deriving compare, sexp] + type t = interval [@@deriving compare, sexp] + + let lower t = t.lo + let upper t = t.hi + + let pp ppf t = Format.fprintf ppf "[%d,%d]" t.lo t.hi + + let from_access off ty = + let lo = Int64.to_int_exn off in + let sz = Type.sizeof_basic ty / 8 in + {lo; hi = lo + sz - 1} + + let extended t = { + lo = t.lo - 1; + hi = t.hi + 1; + } +end + +module Tree = Interval_tree.Make(Interval) + +(* For each slot, we have a set of intervals corresponding to + relative byte offsets that were initialized by a store. *) +type state = unit Tree.t Var.Map.t + +let equal_state s1 s2 = + Map.equal (fun t1 t2 -> + Seq.equal (fun (i1, _) (i2, _) -> + Interval.compare i1 i2 = 0) + (Tree.to_sequence t1) + (Tree.to_sequence t2)) + s1 s2 + +let empty_state : state = Var.Map.empty + +(* Starting constraint has the entry block with no incoming + initializations. *) +let init_constraints : state Label.Map.t = + Label.Map.singleton Label.pseudoentry empty_state + +(* Our top element, which is every slot having been initialized. *) +let top_state slots : state = + Map.map slots ~f:(fun s -> + let i = {lo = 0; hi = Slot.size s - 1} in + Tree.singleton i ()) + +(* Coalesce `i` with any overlapping or adjacent intervals in `t`. *) +let normalize_add t i = + let lo, hi, t = + Interval.extended i |> Tree.intersections t |> Seq.map ~f:fst |> + Seq.fold ~init:(i.lo, i.hi, t) ~f:(fun (lo, hi, t) i -> + min lo i.lo, max hi i.hi, Tree.remove t i) in + Tree.add t {lo; hi} () + +(* Intersect the intervals (and also normalize them). *) +let merge_tree t1 t2 = + Tree.to_sequence t1 |> Seq.map ~f:fst |> + Seq.fold ~init:Tree.empty ~f:(fun init i1 -> + Tree.intersections t2 i1 |> Seq.map ~f:fst |> + Seq.fold ~init ~f:(fun acc i2 -> + let lo = max i1.lo i2.lo in + let hi = min i1.hi i2.hi in + if lo <= hi then normalize_add acc {lo; hi} else acc)) + +(* Since this is a "must" forward-flow analysis, incoming + predecessor states must intersect. *) +let merge_state s1 s2 = + Map.merge s1 s2 ~f:(fun ~key:_ -> function + | `Both (t1, t2) -> Some (merge_tree t1 t2) + | `Left _ | `Right _ -> None) + +type solution = (Label.t, state) Solution.t + +type t = { + soln : solution; + bad : Label.Hash_set.t; +} + +(* If the slot is not always initialized by the + time we reach the load, then we have UB. *) +let is_uninitialized acc base off ty = + match Map.find acc base with + | None -> true + | Some t -> + let i = Interval.from_access off ty in + not (Tree.dominates t i) + +let transfer_store esc acc ptr ty (s : Scalars.state) = + match Map.find s ptr with + | Some Offset (base, off) -> + (* If `base` ever escaped, then don't ever consider + it initialized. *) + if Hash_set.mem esc base then + let () = Logs.debug (fun m -> + m "%s: ignoring escaped slot %a%!" + __FUNCTION__ Var.pp base) in + acc + else + let i = Interval.from_access off ty in + Map.update acc base ~f:(function + | None -> Tree.singleton i () + | Some t when Tree.dominates t i -> t + | Some t -> normalize_add t i) + | _ -> acc + +let transfer_load bad acc l ptr ty (s : Scalars.state) = + match Map.find s ptr with + | Some Offset (base, off) -> + if is_uninitialized acc base off ty then + Hash_set.add bad l; + acc + | _ -> acc + +let debug_dump blks bad s = + Logs.debug (fun m -> + Label.Tree.iter blks ~f:(fun ~key ~data:_ -> + let s = Solution.get s key in + let pp_tree ppf (x, t) = + Tree.to_sequence t |> Seq.to_list |> + List.to_string ~f:(fun (i, ()) -> + Format.asprintf "%a" Interval.pp i) |> + Format.fprintf ppf "%a:%s" Var.pp x in + m "%s: %a: incoming must-initialize: %s%!" + __FUNCTION__ Label.pp key + (Map.to_alist s |> + List.to_string ~f:(Format.asprintf "%a" pp_tree)))); + Logs.debug (fun m -> + Hash_set.iter bad ~f:(fun l -> + m "%s: load at %a is potentially uninitialized%!" + __FUNCTION__ Label.pp l)) + +module Make(M : Scalars.L) = struct + open M + + module S = Scalars.Make(M) + + let transfer bad t blks slots l st = + match Label.Tree.find blks l with + | None -> st + | Some b -> + let s = ref @@ Scalars.get t l in + Blk.insns b |> Seq.fold ~init:st ~f:(fun acc i -> + let op = Insn.op i and l = Insn.label i in + let acc = match Insn.load_or_store_to op with + | Some (ptr, ty, Store) -> transfer_store t.esc acc ptr ty !s + | Some (ptr, ty, Load) -> transfer_load bad acc l ptr ty !s + | _ -> acc in + s := S.transfer_op slots !s op; + acc) + + let analyze' t cfg blks slots = + let bad = Label.Hash_set.create () in + let s = Graphlib.fixpoint (module Cfg) cfg + ~init:(Solution.create init_constraints @@ top_state slots) + ~start:Label.pseudoentry + ~equal:equal_state + ~merge:merge_state + ~f:(transfer bad t blks slots) in + debug_dump blks bad s; + {soln = s; bad} + + let analyze cfg blks slots = + let t = S.analyze cfg blks slots in + analyze' t cfg blks slots +end diff --git a/src/lib/subst_mapper_abi.ml b/src/lib/subst_mapper_abi.ml new file mode 100644 index 00000000..f43f81ec --- /dev/null +++ b/src/lib/subst_mapper_abi.ml @@ -0,0 +1,74 @@ +(* Same as `Subst_mapper` with as much re-use as possible. *) + +open Core +open Regular.Std +open Virtual +open Abi +open Subst_mapper + +type t = operand Var.Map.t + +let map_callarg subst : Insn.callarg -> Insn.callarg = function + | `reg (a, r) -> `reg (map_arg subst a, r) + | `stk (a, o) -> `stk (map_arg subst a, o) + +let map_op subst (op : Insn.op) = + let arg = map_arg subst in + match op with + | `bop (x, b, l, r) -> `bop (x, b, arg l, arg r) + | `uop (x, u, a) -> `uop (x, u, arg a) + | `sel (x, t, c, l, r) -> map_sel subst x t c l r + | `call (x, f, args) -> + let f = map_global subst f in + let args = List.map args ~f:(map_callarg subst) in + `call (x, f, args) + | `load (x, t, a) -> `load (x, t, arg a) + | `store (t, v, a) -> `store (t, arg v, arg a) + | `regcopy _ -> op + | `regstore (r, a) -> `regstore (r, arg a) + | `regassign (r, a) -> `regassign (r, arg a) + | `stkargs _ -> op + +let map_insn subst i = + Insn.with_op i @@ map_op subst @@ Insn.op i + +let map_tbl_entry subst i l = i, map_local subst l + +let map_sw subst t i d tbl = + let d = map_local subst d in + let tbl = Ctrl.Table.map_exn tbl ~f:(map_tbl_entry subst) in + match i with + | `sym _ -> `sw (t, i, d, tbl) + | `var x -> match Map.find subst x with + | Some (#Ctrl.swindex as i) -> `sw (t, i, d, tbl) + | Some `int (i, _) -> + let d = Ctrl.Table.find tbl i |> Option.value ~default:d in + `jmp (d :> dst) + | Some o -> invalid "sw" o + | None -> `sw (t, i, d, tbl) + +let map_ctrl subst : ctrl -> ctrl = function + | `hlt -> `hlt + | `jmp d -> `jmp (map_dst subst d) + | `br (c, y, n) -> map_br subst c y n + | `ret xs -> `ret (List.map xs ~f:(fun (r, a) -> r, map_arg subst a)) + | `sw (t, i, d, tbl) -> map_sw subst t i d tbl + +let map_blk subst b = + let insns = Blk.insns b |> Seq.map ~f:(map_insn subst) in + Seq.to_list insns, map_ctrl subst (Blk.ctrl b) + +(* TODO: should we enable more than just the `jmp` case? With + `br` and `switch` we can have different applications of the + substitution for the same destination. *) +let map_blk_args subst b l = match Blk.ctrl b with + | `jmp `label (l', args) when Label.(l = l') -> + Some (List.map args ~f:(map_arg subst)) + | _ -> None + +let blk_extend subst b b' = + Blk.label b' |> map_blk_args subst b |> Option.bind ~f:(fun args -> + Blk.args b' |> Seq.to_list |> List.zip args |> function + | Ok l -> Option.some @@ List.fold l ~init:subst + ~f:(fun subst (o, x) -> Map.set subst ~key:x ~data:o) + | Unequal_lengths -> None) diff --git a/src/lib/target.ml b/src/lib/target.ml index 7cb56d66..80864f72 100644 --- a/src/lib/target.ml +++ b/src/lib/target.ml @@ -24,7 +24,7 @@ let declare ~name ~word ~little () = targets := m; t -let find = Map.find !targets +let find name = Map.find !targets name let name t = t.name let word t = t.word diff --git a/src/lib/virtual/abi/abi.ml b/src/lib/virtual/abi/abi.ml index 5a2a384a..d9114623 100644 --- a/src/lib/virtual/abi/abi.ml +++ b/src/lib/virtual/abi/abi.ml @@ -15,3 +15,4 @@ module Module = Abi_module module Cfg = Abi_cfg module Live = Abi_live module Resolver = Abi_resolver +module Eval = Abi_eval diff --git a/src/lib/virtual/abi/abi_eval.ml b/src/lib/virtual/abi/abi_eval.ml new file mode 100644 index 00000000..5550ec36 --- /dev/null +++ b/src/lib/virtual/abi/abi_eval.ml @@ -0,0 +1 @@ +include Virtual_eval diff --git a/src/lib/virtual/abi/abi_insn.ml b/src/lib/virtual/abi/abi_insn.ml index b4bdf4b2..c2f3d18e 100644 --- a/src/lib/virtual/abi/abi_insn.ml +++ b/src/lib/virtual/abi/abi_insn.ml @@ -171,7 +171,7 @@ module Tag = struct "non-tail" (module Unit) end -let def i = match i.op with +let def_of_op = function | `bop (x, _, _, _) | `uop (x, _, _) | `sel (x, _, _, _, _) @@ -180,3 +180,5 @@ let def i = match i.op with | `stkargs x -> Var.Set.singleton x | `store _ | `regstore _ | `regassign _ -> Var.Set.empty | `call (xs, _, _) -> Var.Set.of_list @@ List.map xs ~f:fst3 + +let def i = def_of_op i.op diff --git a/src/lib/virtual/virtual.mli b/src/lib/virtual/virtual.mli index 23a3aee7..d2588ee7 100644 --- a/src/lib/virtual/virtual.mli +++ b/src/lib/virtual/virtual.mli @@ -231,53 +231,7 @@ end type insn = Insn.t [@@deriving bin_io, compare, equal, sexp] (** Evaluation of instructions. *) -module Eval : sig - (** [binop_int o x y] evaluates a binary operator [o] over integers - [x] and [y], and returns the result if it is defined. *) - val binop_int : - Insn.binop -> - Bv.t -> - Bv.t -> - [`bool of bool | `int of Bv.t * Type.imm] option - - (** [binop_single o x y] evaluates a binary operator [o] over 32-bit - floats [x] and [y], and returns the result if it is defined. *) - val binop_single : - Insn.binop -> - Float32.t -> - Float32.t -> - [`bool of bool | `float of Float32.t] option - - (** [binop_double o x y] evaluates a binary operator [o] over 64-bit - floats [x] and [y], and returns the result if it is defined. *) - val binop_double : - Insn.binop -> - float -> - float -> - [`bool of bool | `double of float] option - - (** [unop_int o x ty] evaluates a unary operator [o] over the integer - [x] with type [ty], and returns the result if it is defined. *) - val unop_int : - Insn.unop -> - Bv.t -> - Type.imm -> - [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option - - (** [unop_single o x] evaluates a unary operator [o] over the 32-bit float - [x], and returns the result if it is defined. *) - val unop_single : - Insn.unop -> - Float32.t -> - [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option - - (** [unop_double o x] evaluates a unary operator [o] over the 64-bit float - [x], and returns the result if it is defined. *) - val unop_double : - Insn.unop -> - float -> - [`double of float | `int of Bv.t * Type.imm] option -end +module Eval : Virtual_eval_intf.S (** Control-flow-effectful instructions. *) module Ctrl : Virtual_ctrl_intf.S @@ -693,7 +647,10 @@ module Abi : sig (** Transforms the underlying operation. *) val map : t -> f:(op -> op) -> t - (** Returns the set of defined variables of the instruction. *) + (** Returns the set of defined variables of the underlying op. *) + val def_of_op : op -> Var.Set.t + + (** Equivalent to [def_of_op (op i)] *) val def : t -> Var.Set.t (** Same as [pp_op]. *) @@ -708,6 +665,8 @@ module Abi : sig type insn = Insn.t [@@deriving bin_io, compare, equal, sexp] + module Eval : Virtual_eval_intf.S + module Ctrl : Virtual_ctrl_intf.S with type operand := operand and type local := local diff --git a/src/lib/virtual/virtual_eval_intf.ml b/src/lib/virtual/virtual_eval_intf.ml new file mode 100644 index 00000000..9f63915e --- /dev/null +++ b/src/lib/virtual/virtual_eval_intf.ml @@ -0,0 +1,49 @@ +module Insn = Virtual_insn + +module type S = sig + (** [binop_int o x y] evaluates a binary operator [o] over integers + [x] and [y], and returns the result if it is defined. *) + val binop_int : + Insn.binop -> + Bv.t -> + Bv.t -> + [`bool of bool | `int of Bv.t * Type.imm] option + + (** [binop_single o x y] evaluates a binary operator [o] over 32-bit + floats [x] and [y], and returns the result if it is defined. *) + val binop_single : + Insn.binop -> + Float32.t -> + Float32.t -> + [`bool of bool | `float of Float32.t] option + + (** [binop_double o x y] evaluates a binary operator [o] over 64-bit + floats [x] and [y], and returns the result if it is defined. *) + val binop_double : + Insn.binop -> + float -> + float -> + [`bool of bool | `double of float] option + + (** [unop_int o x ty] evaluates a unary operator [o] over the integer + [x] with type [ty], and returns the result if it is defined. *) + val unop_int : + Insn.unop -> + Bv.t -> + Type.imm -> + [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option + + (** [unop_single o x] evaluates a unary operator [o] over the 32-bit float + [x], and returns the result if it is defined. *) + val unop_single : + Insn.unop -> + Float32.t -> + [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option + + (** [unop_double o x] evaluates a unary operator [o] over the 64-bit float + [x], and returns the result if it is defined. *) + val unop_double : + Insn.unop -> + float -> + [`double of float | `int of Bv.t * Type.imm] option +end diff --git a/src/lib/virtual_lexer.mll b/src/lib/virtual_lexer.mll index 6cd8ab2c..8333a289 100644 --- a/src/lib/virtual_lexer.mll +++ b/src/lib/virtual_lexer.mll @@ -2,8 +2,6 @@ open Lexing open Virtual_parser - let string_buff = Buffer.create 256 - let char_for_backslash = function | 'n' -> '\010' | 'r' -> '\013' @@ -56,7 +54,7 @@ let exp = ('E' | 'e') (integer | ninteger) let inf = ("INF" | "inf" | "INFINITY" | "infinity") let nan = ("NAN" | "NaN" | "nan") let flt = ('-'? digit+ '.' digit+ exp?) | ('-'? inf) | ('-'? nan) -let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] +let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r'] let imm = ['w' 'l' 'b' 'h'] let imm_base = ['w' 'l'] let fp = ['s' 'd'] @@ -65,6 +63,7 @@ let special = ['m' 'f'] let typ = (basic | special) rule token = parse + | "(;" { block_comment 0 lexbuf; token lexbuf } | ';' { line_comment lexbuf; token lexbuf } | '\n' { newline lexbuf; token lexbuf } | space { token lexbuf } @@ -72,13 +71,13 @@ rule token = parse | '-' { MINUS } | ':' (ident as id) { TYPENAME id } | ':' { COLON } + | '@' (integer as id) { LABEL id } | '$' (ident as id) { SYM id } | '@' (ident as id) { LABEL id } - | '@' (integer as id) { LABEL id } - | '%' (ident as id) '.' (integer as i) { VAR (id, int_of_string i) } - | '%' (ident as id) { IDENT id } | '%' (integer as id) { TEMP id } | '%' (integer as id) '.' (integer as i) { TEMPI (id, int_of_string i) } + | '%' (ident as id) '.' (integer as i) { VAR (id, int_of_string i) } + | '%' (ident as id) { IDENT id } | "module" space+ (ident as id) { MODULE id } | "align" { ALIGN } | "const" { CONST () } @@ -92,7 +91,7 @@ rule token = parse | ',' { COMMA } | '=' { EQUALS } | "->" { ARROW } - | "..." { ELIPSIS } + | "..." { ELLIPSIS } | "sb" { SB } | "sh" { SH } | "sw" { SW } @@ -184,9 +183,9 @@ rule token = parse | "section" { SECTION } | "noreturn" { NORETURN } | '"' { - Buffer.clear string_buff; - string lexbuf; - STRING (Buffer.contents string_buff) + let buf = Buffer.create 64 in + string buf lexbuf; + STRING (Buffer.contents buf) } | eof { EOF } | (posints as i) '_' (imm as t) { @@ -204,17 +203,26 @@ rule token = parse | "false" { BOOL false } | _ { raise Error } -and string = parse +and string buf = parse | '"' { () } | '\\' (backslash_escapes as c) { - Buffer.add_char string_buff (char_for_backslash c); - string lexbuf + Buffer.add_char buf (char_for_backslash c); + string buf lexbuf } + | eof { raise Error } | _ as c { - Buffer.add_char string_buff c; - string lexbuf + Buffer.add_char buf c; + string buf lexbuf } +and block_comment depth = parse + | "(;" { block_comment (depth + 1) lexbuf } + | ";)" { if depth > 0 then block_comment (depth - 1) lexbuf } + | '\n' { newline lexbuf; block_comment depth lexbuf } + | eof { raise Error } + | _ { block_comment depth lexbuf } + and line_comment = parse - | '\n' { () } + | '\n' { newline lexbuf } + | eof { () } | _ { line_comment lexbuf } diff --git a/src/lib/virtual_parser.mly b/src/lib/virtual_parser.mly index 2e94e8ea..2a91d792 100644 --- a/src/lib/virtual_parser.mly +++ b/src/lib/virtual_parser.mly @@ -120,7 +120,7 @@ %token COMMA %token EQUALS %token ARROW -%token ELIPSIS +%token ELLIPSIS %token SB SH SW W L B H S D Z %token ADD DIV MUL SUB NEG %token REM MULH UMULH UDIV UREM AND OR ASR LSL LSR ROL ROR XOR NOT @@ -268,7 +268,7 @@ func: { make_fn slots blks args l name return true } func_args: - | ELIPSIS { !!([], true) } + | ELLIPSIS { !!([], true) } | t = type_arg x = var { let+ x = x in [x, t], false } | t = type_arg x = var COMMA rest = func_args { @@ -456,7 +456,7 @@ call_args: let+ a = a and+ rest = rest in Arg a :: rest } - | a = operand COMMA ELIPSIS COMMA vargs = separated_nonempty_list(COMMA, operand) + | a = operand COMMA ELLIPSIS COMMA vargs = separated_nonempty_list(COMMA, operand) { let+ a = a and+ vargs = Context.List.all vargs in Arg a :: Core.List.map vargs ~f:(fun a -> Varg a) diff --git a/src/test/data/opt/ackermann.driver.sysv.amd64.c b/src/test/data/opt/ackermann.driver.sysv.amd64.c index a984836e..2edb4408 100644 --- a/src/test/data/opt/ackermann.driver.sysv.amd64.c +++ b/src/test/data/opt/ackermann.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int m, int n); -int main() { +int +main() { assert(foo(0, 0) == 1); assert(foo(1, 3) == 5); assert(foo(2, 2) == 7); diff --git a/src/test/data/opt/analyze_array.driver.sysv.amd64.c b/src/test/data/opt/analyze_array.driver.sysv.amd64.c index 2548db80..7bbb43a4 100644 --- a/src/test/data/opt/analyze_array.driver.sysv.amd64.c +++ b/src/test/data/opt/analyze_array.driver.sysv.amd64.c @@ -3,7 +3,8 @@ extern int analyze_array(int *arr, int n); extern int analyze_array_inl(int *arr, int n); -int main() { +int +main() { int arr1[] = {3, 1, 4, 1, 5, 9, 2}; assert(analyze_array(arr1, 7) == 229); assert(analyze_array_inl(arr1, 7) == 229); diff --git a/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc index 7b584c3e..9e6e62a7 100644 --- a/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc @@ -68,11 +68,10 @@ export function $analyze_array { ; returns: rax mov ecx, r13d ; @75 sub ecx, ebx ; @122 add eax, dword ptr [rbp - 0x10] ; @77 - add ecx, eax ; @78 - lea eax, qword ptr [r14 + 0x1] ; @79 - mov dword ptr [rbp - 0x10], ecx ; @186 + add eax, ecx ; @78 + inc r14d ; @79 + mov dword ptr [rbp - 0x10], eax ; @186 mov edi, r13d ; @119 - mov r14d, eax ; @120 jmp @13 ; @121 } diff --git a/src/test/data/opt/and_test.driver.sysv.amd64.c b/src/test/data/opt/and_test.driver.sysv.amd64.c index 306ee73a..2bb8ea11 100644 --- a/src/test/data/opt/and_test.driver.sysv.amd64.c +++ b/src/test/data/opt/and_test.driver.sysv.amd64.c @@ -4,7 +4,8 @@ extern int foo(int x, int y); extern int bar(int x, int y); extern long baz(long x); -int main() { +int +main() { assert(foo(3, 4) == 3); assert(foo(4, 4) == 4); assert(foo(1, 4) == 1); diff --git a/src/test/data/opt/badload1.vir b/src/test/data/opt/badload1.vir new file mode 100644 index 00000000..9fcc5a48 --- /dev/null +++ b/src/test/data/opt/badload1.vir @@ -0,0 +1,21 @@ +module badload1 + +export function w $foo(w %x) { + %a = slot 4, align 4 + %b = slot 4, align 4 +@start: + %c = slt.w %x, 0_w + br %c, @neg, @nonneg +@neg: + %v = ld.w %a + jmp @stuff +@nonneg: + st.w 5_w, %a + %v = ld.w %a + jmp @stuff +@stuff: + st.w 4_w, %b + %w = ld.w %b + %r = add.w %v, %w + ret %r +} diff --git a/src/test/data/opt/badload1.vir.opt b/src/test/data/opt/badload1.vir.opt new file mode 100644 index 00000000..c0937b1a --- /dev/null +++ b/src/test/data/opt/badload1.vir.opt @@ -0,0 +1,17 @@ +module badload1 + +export function w $foo(w %x) { + %a = slot 4, align 4 +@2: + %0 = slt.w %x, 0x0_w ; @13 + br %0, @3, @4 +@3: + %v.3 = ld.w %a ; @7 + jmp @6(%v.3) +@4: + st.w 0x5_w, %a ; @8 + jmp @6(0x5_w) +@6(%v.1): + %1 = add.w %v.1, 0x4_w ; @14 + ret %1 +} diff --git a/src/test/data/opt/badload2.vir b/src/test/data/opt/badload2.vir new file mode 100644 index 00000000..1308e2d0 --- /dev/null +++ b/src/test/data/opt/badload2.vir @@ -0,0 +1,20 @@ +module badload2 + +export function w $foo(w %x) { + %a = slot 4, align 4 + %b = slot 4, align 4 +@start: + %c = slt.w %x, 0_w + br %c, @neg, @nonneg +@neg: + jmp @stuff +@nonneg: + st.w 5_w, %a + jmp @stuff +@stuff: + %v = ld.w %a + st.w 4_w, %b + %w = ld.w %b + %r = add.w %v, %w + ret %r +} diff --git a/src/test/data/opt/badload2.vir.opt b/src/test/data/opt/badload2.vir.opt new file mode 100644 index 00000000..1cd43f63 --- /dev/null +++ b/src/test/data/opt/badload2.vir.opt @@ -0,0 +1,15 @@ +module badload2 + +export function w $foo(w %x) { + %a = slot 4, align 4 +@2: + %0 = slt.w %x, 0x0_w ; @12 + br %0, @6, @4 +@4: + st.w 0x5_w, %a ; @7 + jmp @6 +@6: + %v.1 = ld.w %a ; @8 + %1 = add.w %v.1, 0x4_w ; @13 + ret %1 +} diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.c b/src/test/data/opt/bsearch.driver.sysv.amd64.c new file mode 100644 index 00000000..a4d5efa6 --- /dev/null +++ b/src/test/data/opt/bsearch.driver.sysv.amd64.c @@ -0,0 +1,40 @@ +#include + +extern void *bsearch(const void *key, const void *base, size_t nel, + size_t width, int (*cmp)(const void *, const void *)); + +extern int intcmp(const void *a, const void *b); + +static void +test_bsearch(const int *arr, size_t n, int key) { + const int *res = bsearch(&key, arr, n, sizeof(int), intcmp); + if (res) { + printf("found %d at index %ld, value %d\n", key, (long)(res - arr), *res); + } else { + printf("%d not found\n", key); + } +} + +int +main() { + int arr[] = {-10, -3, -1, 0, 1, 2, 4, 7, 10, 25, 100}; + + size_t n = sizeof(arr) / sizeof(arr[0]); + + /* found */ + test_bsearch(arr, n, -10); + test_bsearch(arr, n, -1); + test_bsearch(arr, n, 0); + test_bsearch(arr, n, 4); + test_bsearch(arr, n, 25); + test_bsearch(arr, n, 100); + + /* not found */ + test_bsearch(arr, n, -11); + test_bsearch(arr, n, -2); + test_bsearch(arr, n, 3); + test_bsearch(arr, n, 5); + test_bsearch(arr, n, 8); + test_bsearch(arr, n, 99); + test_bsearch(arr, n, 101); +} diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.output b/src/test/data/opt/bsearch.driver.sysv.amd64.output new file mode 100644 index 00000000..a80e8202 --- /dev/null +++ b/src/test/data/opt/bsearch.driver.sysv.amd64.output @@ -0,0 +1,13 @@ +found -10 at index 0, value -10 +found -1 at index 2, value -1 +found 0 at index 3, value 0 +found 4 at index 6, value 4 +found 25 at index 9, value 25 +found 100 at index 10, value 100 +-11 not found +-2 not found +3 not found +5 not found +8 not found +99 not found +101 not found diff --git a/src/test/data/opt/bsearch.vir b/src/test/data/opt/bsearch.vir new file mode 100644 index 00000000..2ef348a8 --- /dev/null +++ b/src/test/data/opt/bsearch.vir @@ -0,0 +1,40 @@ +module bsearch + +export function w $intcmp(l %a, l %b) { +@start: + %ia = ld.w %a + %ib = ld.w %b + %k = sub.w %ia, %ib + ret %k +} + +export function l $bsearch(l %key, l %base, l %nel, l %width, l %cmp) { +@start: + jmp @hdr +@hdr: + %c = gt.l %nel, 0_l + br %c, @cmp, @null +@cmp: + %nel2 = udiv.l %nel, 2_l + %off = mul.l %width, %nel2 + %try = add.l %base, %off + %sign = call.w %cmp(%key, %try) + %c = slt.w %sign, 0_w + br %c, @less, @notless +@less: + %nel = udiv.l %nel, 2_l + jmp @hdr +@notless: + %c = sgt.w %sign, 0_w + br %c, @greater, @equal +@greater: + %base = add.l %try, %width + %ne2 = udiv.l %nel, 2_l + %ne2 = add.l %ne2, 1_l + %nel = sub.l %nel, %ne2 + jmp @hdr +@equal: + ret %try +@null: + ret 0_l +} diff --git a/src/test/data/opt/bsearch.vir.opt b/src/test/data/opt/bsearch.vir.opt new file mode 100644 index 00000000..acb1c8f1 --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt @@ -0,0 +1,34 @@ +module bsearch + +export function w $intcmp(l %a, l %b) { +@2: + %ia.1 = ld.w %a ; @3 + %ib.1 = ld.w %b ; @4 + %1 = sub.w %ia.1, %ib.1 ; @27 + ret %1 +} + +export function l $bsearch(l %key, l %base, l %nel, l %width, l %cmp) { +@6: + jmp @7(%nel, %base) +@7(%nel.1, %base.1): + %2 = ne.l %nel.1, 0x0_l ; @28 + br %2, @8, @26(0x0_l) +@8: + %3 = lsr.l %nel.1, 0x1_l ; @29 + %4 = mul.l %width, %3 ; @30 + %5 = add.l %base.1, %4 ; @31 + %sign.1 = call.w %cmp(%key, %5) ; @16 + %6 = slt.w %sign.1, 0x0_w ; @32 + br %6, @7(%3, %base.1), @12 +@12: + %7 = sgt.w %sign.1, 0x0_w ; @33 + br %7, @19, @26(%5) +@19: + %8 = add.l %5, %width ; @34 + %9 = add.l %3, 0x1_l ; @35 + %10 = sub.l %nel.1, %9 ; @36 + jmp @7(%10, %8) +@26(%0): + ret %0 +} diff --git a/src/test/data/opt/bsearch.vir.opt.sysv b/src/test/data/opt/bsearch.vir.opt.sysv new file mode 100644 index 00000000..1595ba27 --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt.sysv @@ -0,0 +1,34 @@ +module bsearch + +export function $intcmp(l %a/rdi, l %b/rsi) { +@2: + %ia.1 = ld.w %a ; @3 + %ib.1 = ld.w %b ; @4 + %1 = sub.w %ia.1, %ib.1 ; @27 + ret rax/%1 +} + +export function $bsearch(l %key/rdi, l %base/rsi, l %nel/rdx, l %width/rcx, l %cmp/r8) { +@6: + jmp @7(%nel, %base) +@7(%nel.1, %base.1): + %2 = ne.l %nel.1, 0x0_l ; @28 + br %2, @8, @26(0x0_l) +@8: + %3 = lsr.l %nel.1, 0x1_l ; @29 + %4 = mul.l %width, %3 ; @30 + %5 = add.l %base.1, %4 ; @31 + %sign.1/w/rax = call %cmp(%key/rdi, %5/rsi) ; @16 + %6 = slt.w %sign.1, 0x0_w ; @32 + br %6, @7(%3, %base.1), @12 +@12: + %7 = sgt.w %sign.1, 0x0_w ; @33 + br %7, @19, @26(%5) +@19: + %8 = add.l %5, %width ; @34 + %9 = add.l %3, 0x1_l ; @35 + %10 = sub.l %nel.1, %9 ; @36 + jmp @7(%10, %8) +@26(%0): + ret rax/%0 +} diff --git a/src/test/data/opt/bsearch.vir.opt.sysv.amd64 b/src/test/data/opt/bsearch.vir.opt.sysv.amd64 new file mode 100644 index 00000000..9c5c5a7d --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt.sysv.amd64 @@ -0,0 +1,66 @@ +module bsearch + +export function $intcmp { ; returns: rax +@2: + mov %a:l, rdi ; @3 + mov %b:l, rsi ; @40 + mov %ia.1:w, dword ptr [%a] ; @41 + mov %ib.1:w, dword ptr [%b] ; @4 + mov %1:w, %ia.1:w ; @27 + sub %1:w, %ib.1:w ; @39 + mov eax, %1:w ; @37 + ret ; @38 +} + +export function $bsearch { ; returns: rax +@6: + mov %key:l, rdi ; @77 + mov %base:l, rsi ; @78 + mov %nel:l, rdx ; @79 + mov %width:l, rcx ; @80 + mov %cmp:l, r8 ; @81 + mov %nel.1:l, %nel:l ; @82 + mov %base.1:l, %base:l ; @83 + jmp @7 ; @84 +@7: + test %nel.1:l, %nel.1:l ; @72 + jne @8 ; @73 + jmp @42 ; @74 +@42: + xor %0:w, %0:w ; @70 + jmp @26 ; @71 +@8: + mov %3:l, %nel.1:l ; @29 + shr %3:l, 0x1_b ; @69 + mov %4:l, %width:l ; @30 + imul %4:l, %3:l ; @68 + lea %5:l, qword ptr [%base.1 + %4*1] ; @31 + mov rdi, %key:l ; @16 + mov rsi, %5:l ; @65 + call %cmp:l ; rdi rsi ; @66 + mov %sign.1:w, eax ; @67 + test %sign.1:w, %sign.1:w ; @60 + js @43 ; @61 + jmp @12 ; @62 +@43: + mov %nel.1:l, %3:l ; @58 + jmp @7 ; @59 +@12: + cmp %sign.1:w, 0x0_w ; @53 + jg @19 ; @54 + jmp @44 ; @55 +@44: + mov %0:l, %5:l ; @51 + jmp @26 ; @52 +@26: + mov rax, %0:l ; @49 + ret ; @50 +@19: + lea %8:l, qword ptr [%5 + %width*1] ; @34 + lea %9:l, qword ptr [%3 + 0x1] ; @35 + mov %10:l, %nel.1:l ; @36 + sub %10:l, %9:l ; @48 + mov %nel.1:l, %10:l ; @45 + mov %base.1:l, %8:l ; @46 + jmp @7 ; @47 +} diff --git a/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc new file mode 100644 index 00000000..a717a761 --- /dev/null +++ b/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc @@ -0,0 +1,65 @@ +module bsearch + +export function $intcmp { ; returns: rax +@2: + mov eax, dword ptr [rdi] ; @41 + sub eax, dword ptr [rsi] ; @39 + ret ; @38 +} + +export function $bsearch { ; returns: rax +@6: + push rbp ; @89 + mov rbp, rsp ; @90 + sub rsp, 0x18_l ; @91 + push r12 ; @92 + push r13 ; @93 + push r14 ; @94 + push r15 ; @95 + push rbx ; @96 + mov qword ptr [rbp - 0x18], rdi ; @85 + mov r15, rsi ; @78 + mov r13, rdx ; @79 + mov r14, rcx ; @80 + mov qword ptr [rbp - 0x10], r8 ; @86 +@7: + test r13, r13 ; @72 + jne @8 ; @73 +@42: + xor eax, eax ; @70 + jmp @26 ; @71 +@8: + mov rbx, r13 ; @29 + shr rbx, 0x1_b ; @69 + mov rax, r14 ; @30 + imul rax, rbx ; @68 + lea r12, qword ptr [r15 + rax*1] ; @31 + mov rdi, qword ptr [rbp - 0x18] ; @87 + mov rsi, r12 ; @65 + call qword ptr [rbp - 0x10] ; rdi rsi ; @66 + test eax, eax ; @60 + jns @12 ; @61 +@43: + mov r13, rbx ; @58 + jmp @7 ; @59 +@12: + cmp eax, 0x0_w ; @53 + jg @19 ; @54 +@44: + mov rax, r12 ; @51 +@26: + pop rbx ; @97 + pop r15 ; @98 + pop r14 ; @99 + pop r13 ; @100 + pop r12 ; @101 + leave ; @102 + ret ; @50 +@19: + lea r15, qword ptr [r12 + r14*1] ; @34 + lea rcx, qword ptr [rbx + 0x1] ; @35 + mov rax, r13 ; @36 + sub rax, rcx ; @48 + mov r13, rax ; @45 + jmp @7 ; @47 +} diff --git a/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c b/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c index 1532619b..8bdd5ca2 100644 --- a/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c +++ b/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c @@ -1,10 +1,10 @@ -#include #include extern unsigned char clz8(unsigned char n); extern unsigned char ctz8(unsigned char n); -int main() { +int +main() { assert(clz8(0b00001111) == 4); assert(ctz8(0b00001111) == 0); diff --git a/src/test/data/opt/coalesce1.vir b/src/test/data/opt/coalesce1.vir new file mode 100644 index 00000000..35efae18 --- /dev/null +++ b/src/test/data/opt/coalesce1.vir @@ -0,0 +1,36 @@ +module coalesce1 + +;; %a can coalesce with either %b or %c, but not both +export function w $f(w %x, w %y, w %z) { + %a = slot 8, align 8 + %b = slot 8, align 8 + %c = slot 8, align 8 +@start: + st.w %x, %a + jmp @b1 +@b1: + %u1 = ld.w %a + jmp @b2 +@b2: + jmp @b3 +@b3: + st.w %y, %b + jmp @b4 +@b4: + %u2 = ld.w %b + jmp @b5 +@b5: + st.w %z, %c + jmp @b6 +@b6: + %u3 = ld.w %c + jmp @b7 +@b7: + %u4 = ld.w %b + jmp @b8 +@b8: + %r = add.w %u1, %u2 + %r = add.w %r, %u3 + %r = add.w %r, %u4 + ret %r +} diff --git a/src/test/data/opt/coalesce1.vir.opt b/src/test/data/opt/coalesce1.vir.opt new file mode 100644 index 00000000..7bc292e4 --- /dev/null +++ b/src/test/data/opt/coalesce1.vir.opt @@ -0,0 +1,34 @@ +module coalesce1 + +export function w $f(w %x, w %y, w %z) { + %b = slot 8, align 8 + %c = slot 8, align 8 +@2: + st.w %x, %c ; @4 + jmp @3 +@3: + %u1.1 = ld.w %c ; @6 + jmp @5 +@5: + jmp @7 +@7: + st.w %y, %b ; @9 + jmp @8 +@8: + %u2.1 = ld.w %b ; @11 + jmp @10 +@10: + st.w %z, %c ; @13 + jmp @12 +@12: + %u3.1 = ld.w %c ; @15 + jmp @14 +@14: + %u4.1 = ld.w %b ; @17 + jmp @16 +@16: + %r.1 = add.w %u1.1, %u2.1 ; @18 + %r.2 = add.w %r.1, %u3.1 ; @19 + %r.3 = add.w %r.2, %u4.1 ; @20 + ret %r.3 +} diff --git a/src/test/data/opt/coalesce1a.vir b/src/test/data/opt/coalesce1a.vir new file mode 100644 index 00000000..f7b6ae9c --- /dev/null +++ b/src/test/data/opt/coalesce1a.vir @@ -0,0 +1,36 @@ +module coalesce1a + +;; same as coalesce1, but let all of the opts run +export function w $f(w %x, w %y, w %z) { + %a = slot 8, align 8 + %b = slot 8, align 8 + %c = slot 8, align 8 +@start: + st.w %x, %a + jmp @b1 +@b1: + %u1 = ld.w %a + jmp @b2 +@b2: + jmp @b3 +@b3: + st.w %y, %b + jmp @b4 +@b4: + %u2 = ld.w %b + jmp @b5 +@b5: + st.w %z, %c + jmp @b6 +@b6: + %u3 = ld.w %c + jmp @b7 +@b7: + %u4 = ld.w %b + jmp @b8 +@b8: + %r = add.w %u1, %u2 + %r = add.w %r, %u3 + %r = add.w %r, %u4 + ret %r +} diff --git a/src/test/data/opt/coalesce1a.vir.opt b/src/test/data/opt/coalesce1a.vir.opt new file mode 100644 index 00000000..9234fd83 --- /dev/null +++ b/src/test/data/opt/coalesce1a.vir.opt @@ -0,0 +1,9 @@ +module coalesce1a + +export function w $f(w %x, w %y, w %z) { +@2: + %2 = add.w %x, %y ; @21 + %4 = add.w %z, %y ; @23 + %5 = add.w %2, %4 ; @24 + ret %5 +} diff --git a/src/test/data/opt/collatz.driver.sysv.amd64.c b/src/test/data/opt/collatz.driver.sysv.amd64.c index 3dd398dd..3a1e3450 100644 --- a/src/test/data/opt/collatz.driver.sysv.amd64.c +++ b/src/test/data/opt/collatz.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int n); -int main() { +int +main() { assert(foo(1) == 0); assert(foo(2) == 1); assert(foo(3) == 7); diff --git a/src/test/data/opt/collatz_rec.driver.sysv.amd64.c b/src/test/data/opt/collatz_rec.driver.sysv.amd64.c index 61ce7be7..ed1030e0 100644 --- a/src/test/data/opt/collatz_rec.driver.sysv.amd64.c +++ b/src/test/data/opt/collatz_rec.driver.sysv.amd64.c @@ -2,11 +2,13 @@ extern int foo(int n, int count); -int bar(int n) { +int +bar(int n) { return foo(n, 0); } -int main() { +int +main() { assert(bar(1) == 0); assert(bar(2) == 1); assert(bar(3) == 7); diff --git a/src/test/data/opt/cpyarray.driver.sysv.amd64.c b/src/test/data/opt/cpyarray.driver.sysv.amd64.c index b5e44d25..49735158 100644 --- a/src/test/data/opt/cpyarray.driver.sysv.amd64.c +++ b/src/test/data/opt/cpyarray.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern void foo(int *dst, int *src, unsigned int n); -int main() { +int +main() { int dst[3]; int src[3] = {1, 2, 3}; foo(dst, src, 3); diff --git a/src/test/data/opt/esc1.vir b/src/test/data/opt/esc1.vir new file mode 100644 index 00000000..225c0a35 --- /dev/null +++ b/src/test/data/opt/esc1.vir @@ -0,0 +1,27 @@ +module esc1 + +export function w $foo(w %x) { + %a = slot 8, align 8 + %b = slot 8, align 8 +@start: + %x1 = add.w %x, 1_w + %x2 = add.w %x, 2_w + %x3 = add.w %x, 3_w + st.w %x, %a + %a4 = add.l %a, 4_l + st.w %x1, %a4 + st.w %x2, %b + %b4 = add.l %b, 4_l + st.w %x3, %b4 + %c = slt.w %x, 0_w + br %c, @yes, @no +@yes: + %p = copy.l %b4 + jmp @done +@no: + %p = copy.l %a4 + jmp @done +@done: + %r = ld.w %p + ret %r +} diff --git a/src/test/data/opt/esc1.vir.opt b/src/test/data/opt/esc1.vir.opt new file mode 100644 index 00000000..6d33a0e6 --- /dev/null +++ b/src/test/data/opt/esc1.vir.opt @@ -0,0 +1,20 @@ +module esc1 + +export function w $foo(w %x) { + %a = slot 8, align 8 + %b = slot 8, align 8 +@2: + %0 = add.w %x, 0x1_w ; @19 + %1 = add.w %x, 0x2_w ; @20 + %2 = add.w %x, 0x3_w ; @21 + st.w %x, %a ; @8 + %3 = add.l %a, 0x4_l ; @22 + st.w %0, %3 ; @10 + st.w %1, %b ; @11 + %4 = add.l %b, 0x4_l ; @23 + st.w %2, %4 ; @13 + %5 = slt.w %x, 0x0_w ; @24 + %6 = sel.l %5, %4, %3 ; @25 + %r.1 = ld.w %6 ; @18 + ret %r.1 +} diff --git a/src/test/data/opt/gcdext.driver.sysv.amd64.c b/src/test/data/opt/gcdext.driver.sysv.amd64.c index ed6172a4..1cd1b678 100644 --- a/src/test/data/opt/gcdext.driver.sysv.amd64.c +++ b/src/test/data/opt/gcdext.driver.sysv.amd64.c @@ -8,13 +8,15 @@ struct S { extern struct S gcd(int a, int b); -void test(int a, int b, int x) { +void +test(int a, int b, int x) { struct S s = gcd(a, b); assert(s.gcd == x); assert(a * s.x + b * s.y == s.gcd); } -int main() { +int +main() { test(12, 18, 6); test(30, 20, 10); test(101, 103, 1); diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv index d13e608d..8a62f120 100644 --- a/src/test/data/opt/gcdext.vir.opt.sysv +++ b/src/test/data/opt/gcdext.vir.opt.sysv @@ -1,56 +1,38 @@ module gcdext export function $gcd(w %a/rdi, w %b/rsi) { - %res = slot 16, align 8 - %r = slot 16, align 8 - %8 = slot 16, align 8 - %13 = slot 16, align 8 - %18 = slot 16, align 8 + %32 = slot 8, align 8 + %33 = slot 8, align 8 + %34 = slot 8, align 8 @2: - %1 = eq.w %a, 0x0_w ; @30 - %2 = add.l %res, 0x8_l ; @31 - %3 = add.l %res, 0x4_l ; @32 - br %1, @3, @4 + %1.1 = eq.w %a, 0x0_w ; @30 + br %1.1, @3, @4 @3: - st.w %b, %res ; @6 - st.w 0x0_w, %3 ; @8 - st.w 0x1_w, %2 ; @10 - %19 = ld.l %res ; @49 - st.l %19, %18 ; @50 - %21 = ld.l %2 ; @52 - %22 = add.l %18, 0x8_l ; @53 - st.l %21, %22 ; @54 - jmp @29(%18) + st.w %b, %33 ; @6 + %35.1 = add.l %33, 0x4_l ; @61 + st.w 0x0_w, %35.1 ; @8 + st.w 0x1_w, %34 ; @10 + %19.1 = ld.l %33 ; @49 + %21.1 = ld.l %34 ; @52 + jmp @29(%21.1, %19.1) @4: %m.1 = rem.w %b, %a ; @12 - %27/l/rax, %28/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 - %26 = add.l %8, 0x8_l ; @58 - st.l %27, %8 ; @59 - st.l %28, %26 ; @60 - %9 = ld.l %8 ; @37 - st.l %9, %r ; @38 - %11 = ld.l %26 ; @40 - %12 = add.l %r, 0x8_l ; @41 - st.l %11, %12 ; @42 - %rg.1 = ld.w %r ; @15 - %4 = add.l %r, 0x4_l ; @33 - %rx.1 = ld.w %4 ; @17 - %ry.1 = ld.w %12 ; @19 - st.w %rg.1, %res ; @20 + %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13 + st.l %27.1, %33 ; @38 + st.l %28.1, %32 ; @42 + %rg.1 = ld.w %33 ; @15 + %36.1 = add.l %33, 0x4_l ; @62 + %rx.1 = ld.w %36.1 ; @17 + %ry.1 = ld.w %32 ; @19 + st.w %rg.1, %33 ; @20 %nx.1 = div.w %b, %a ; @21 - %6 = mul.w %nx.1, %rx.1 ; @35 - %7 = sub.w %ry.1, %6 ; @36 - st.w %7, %3 ; @25 - st.w %rx.1, %2 ; @27 - %14 = ld.l %res ; @43 - st.l %14, %13 ; @44 - %16 = ld.l %2 ; @46 - %17 = add.l %13, 0x8_l ; @47 - st.l %16, %17 ; @48 - jmp @29(%13) -@29(%0): - %23 = ld.l %0 ; @55 - %24 = add.l %0, 0x8_l ; @56 - %25 = ld.l %24 ; @57 - ret rax/%23, rdx/%25 + %6.1 = mul.w %nx.1, %rx.1 ; @35 + %7.1 = sub.w %ry.1, %6.1 ; @36 + st.w %7.1, %36.1 ; @25 + st.w %rx.1, %34 ; @27 + %14.1 = ld.l %33 ; @43 + %16.1 = ld.l %34 ; @46 + jmp @29(%16.1, %14.1) +@29(%30.1, %29.1): + ret rax/%29.1, rdx/%30.1 } diff --git a/src/test/data/opt/int_pow.driver.sysv.amd64.c b/src/test/data/opt/int_pow.driver.sysv.amd64.c index 764a9398..2a37a2cb 100644 --- a/src/test/data/opt/int_pow.driver.sysv.amd64.c +++ b/src/test/data/opt/int_pow.driver.sysv.amd64.c @@ -4,7 +4,8 @@ extern int64_t int_pow(int64_t base, int64_t exponent); extern int64_t int_pow_alt(int64_t base, int64_t exponent); -int main() { +int +main() { assert(int_pow(2, 10) == 1024); assert(int_pow(5, 3) == 125); assert(int_pow(7, 0) == 1); diff --git a/src/test/data/opt/int_pow.vir.opt b/src/test/data/opt/int_pow.vir.opt index b279f5cc..f57d0cfe 100644 --- a/src/test/data/opt/int_pow.vir.opt +++ b/src/test/data/opt/int_pow.vir.opt @@ -11,19 +11,23 @@ export function l $int_pow(l %base, l %exponent) { %2 = mul.l %1, %1 ; @57 %3 = mul.l %2, %1 ; @58 %4 = and.l %exponent.1, 0x3_l ; @59 - %5 = mul.l %ret.2, %1 ; @60 - switch.l %4, @21(%ret.2) [0x1_l -> @21(%5), + switch.l %4, @21(%ret.2) [0x1_l -> @14, 0x2_l -> @15, 0x3_l -> @16] +@14: + %11 = mul.l %ret.2, %1 ; @66 + jmp @21(%11) @15: - %9 = mul.l %5, %1 ; @64 - jmp @21(%9) + %9 = mul.l %ret.2, %1 ; @64 + %10 = mul.l %9, %1 ; @65 + jmp @21(%10) @16: - %7 = mul.l %5, %2 ; @62 + %6 = mul.l %ret.2, %1 ; @61 + %7 = mul.l %6, %2 ; @62 jmp @21(%7) @21(%ret.3): - %6 = asr.l %exponent.1, 0x2_l ; @61 - jmp @3(%ret.3, %3, %1, %6) + %5 = asr.l %exponent.1, 0x2_l ; @60 + jmp @3(%ret.3, %3, %1, %5) @10: ret %ret.2 } @@ -32,31 +36,31 @@ export function l $int_pow_alt(l %base, l %exponent) { %mul = slot 32, align 8 @27: st.l 0x1_l, %mul ; @30 - %10 = add.l %mul, 0x8_l ; @65 - st.l %base, %10 ; @32 - %11 = add.l %mul, 0x18_l ; @66 - st.l 0x1_l, %11 ; @34 - %12 = add.l %mul, 0x10_l ; @67 + %12 = add.l %mul, 0x8_l ; @67 + st.l %base, %12 ; @32 + %13 = add.l %mul, 0x18_l ; @68 + st.l 0x1_l, %13 ; @34 + %14 = add.l %mul, 0x10_l ; @69 jmp @28(0x1_l, %exponent) @28(%ret.2, %exponent.1): - %13 = ne.l %exponent.1, 0x0_l ; @68 - br %13, @35, @36 + %15 = ne.l %exponent.1, 0x0_l ; @70 + br %15, @35, @36 @35: - %mul1.1 = ld.l %10 ; @41 - %mul3.1 = ld.l %11 ; @42 - %14 = mul.l %mul1.1, %mul3.1 ; @69 - st.l %14, %10 ; @44 - %15 = mul.l %14, %14 ; @70 - st.l %15, %12 ; @46 - %16 = mul.l %15, %14 ; @71 - st.l %16, %11 ; @48 - %17 = and.l %exponent.1, 0x3_l ; @72 - %18 = lsl.l %17, 0x3_l ; @73 - %19 = add.l %mul, %18 ; @74 - %mule.1 = ld.l %19 ; @52 - %20 = mul.l %ret.2, %mule.1 ; @75 - %21 = asr.l %exponent.1, 0x2_l ; @76 - jmp @28(%20, %21) + %mul1.1 = ld.l %12 ; @41 + %mul3.1 = ld.l %13 ; @42 + %16 = mul.l %mul1.1, %mul3.1 ; @71 + st.l %16, %12 ; @44 + %17 = mul.l %16, %16 ; @72 + st.l %17, %14 ; @46 + %18 = mul.l %17, %16 ; @73 + st.l %18, %13 ; @48 + %19 = and.l %exponent.1, 0x3_l ; @74 + %20 = lsl.l %19, 0x3_l ; @75 + %21 = add.l %mul, %20 ; @76 + %mule.1 = ld.l %21 ; @52 + %22 = mul.l %ret.2, %mule.1 ; @77 + %23 = asr.l %exponent.1, 0x2_l ; @78 + jmp @28(%22, %23) @36: ret %ret.2 } diff --git a/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc index d20e0a65..364833bc 100644 --- a/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc @@ -2,75 +2,73 @@ module int_pow export function $int_pow { ; returns: rax @2: - mov rcx, rdi ; @119 - mov r8d, 0x1_w ; @121 - mov r9d, 0x1_w ; @122 + mov rdx, rsi ; @123 + mov r8d, 0x1_w ; @124 + mov esi, 0x1_w ; @125 @3: - test rsi, rsi ; @114 - jne @9 ; @115 + test rdx, rdx ; @117 + jne @9 ; @118 @10: - mov rax, r8 ; @112 - ret ; @113 + mov rax, r8 ; @115 + ret ; @116 @9: - imul rcx, r9 ; @111 - mov rdx, rcx ; @57 - imul rdx, rcx ; @110 - mov r9, rdx ; @58 - imul r9, rcx ; @109 - mov rax, rsi ; @59 - and rax, 0x3_l ; @108 - mov rdi, r8 ; @60 - imul rdi, rcx ; @107 - test rax, rax ; @96 - je @77 ; @97 - dec rax ; @99 - cmp rax, 0x2_l ; @100 - ja @77 ; @101 - lea r10, qword ptr [rip + @95] ; @102 - movsxd rax, dword ptr [r10 + rax*4] ; @103 - add rax, r10 ; @104 - jmp rax ; @105 - .tbl @95 [@21, @15, @16] ; @106 -@77: - mov rdi, r8 ; @93 - jmp @21 ; @94 + imul rdi, rsi ; @114 + mov rcx, rdi ; @57 + imul rcx, rdi ; @113 + mov rsi, rcx ; @58 + imul rsi, rdi ; @112 + mov rax, rdx ; @59 + and rax, 0x3_l ; @111 + je @21 ; @101 + dec rax ; @103 + cmp rax, 0x2_l ; @104 + ja @21 ; @105 + lea r9, qword ptr [rip + @99] ; @106 + movsxd rax, dword ptr [r9 + rax*4] ; @107 + add rax, r9 ; @108 + jmp rax ; @109 + .tbl @99 [@14, @15, @16] ; @110 @16: - imul rdi, rdx ; @90 - jmp @21 ; @89 + imul r8, rdi ; @96 + imul r8, rcx ; @95 + jmp @21 ; @94 @15: - imul rdi, rcx ; @87 + imul r8, rdi ; @92 + imul r8, rdi ; @91 + jmp @21 ; @90 +@14: + imul r8, rdi ; @88 @21: - sar rsi, 0x2_b ; @84 - mov r8, rdi ; @79 - jmp @3 ; @83 + sar rdx, 0x2_b ; @85 + jmp @3 ; @84 } export function $int_pow_alt { ; returns: rax @27: - sub rsp, 0x28_l ; @148 - mov qword ptr [rsp], 0x1_l ; @147 + sub rsp, 0x28_l ; @151 + mov qword ptr [rsp], 0x1_l ; @150 mov qword ptr [rsp + 0x8], rdi ; @32 mov qword ptr [rsp + 0x18], 0x1_l ; @34 - mov eax, 0x1_w ; @143 + mov eax, 0x1_w ; @146 @28: - test rsi, rsi ; @138 - jne @35 ; @139 + test rsi, rsi ; @141 + jne @35 ; @142 @36: - add rsp, 0x28_l ; @149 - ret ; @137 + add rsp, 0x28_l ; @152 + ret ; @140 @35: mov rdx, qword ptr [rsp + 0x8] ; @41 - imul rdx, qword ptr [rsp + 0x18] ; @135 + imul rdx, qword ptr [rsp + 0x18] ; @138 mov qword ptr [rsp + 0x8], rdx ; @44 - mov rcx, rdx ; @70 - imul rcx, rdx ; @134 + mov rcx, rdx ; @72 + imul rcx, rdx ; @137 mov qword ptr [rsp + 0x10], rcx ; @46 - imul rcx, rdx ; @133 + imul rcx, rdx ; @136 mov qword ptr [rsp + 0x18], rcx ; @48 - mov rcx, rsi ; @72 - and rcx, 0x3_l ; @132 + mov rcx, rsi ; @74 + and rcx, 0x3_l ; @135 mov rcx, qword ptr [rsp + rcx*8] ; @52 - imul rax, rcx ; @130 - sar rsi, 0x2_b ; @129 - jmp @28 ; @128 + imul rax, rcx ; @133 + sar rsi, 0x2_b ; @132 + jmp @28 ; @131 } diff --git a/src/test/data/opt/nosink.driver.sysv.amd64.c b/src/test/data/opt/nosink.driver.sysv.amd64.c index c272d3f2..bed0c02d 100644 --- a/src/test/data/opt/nosink.driver.sysv.amd64.c +++ b/src/test/data/opt/nosink.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int x); -int main() { +int +main() { assert(foo(0) == 0); assert(foo(1) == 9); assert(foo(2) == 15); diff --git a/src/test/data/opt/palindrome.driver.sysv.amd64.c b/src/test/data/opt/palindrome.driver.sysv.amd64.c index 3e1ec13e..80cef3f2 100644 --- a/src/test/data/opt/palindrome.driver.sysv.amd64.c +++ b/src/test/data/opt/palindrome.driver.sysv.amd64.c @@ -3,7 +3,8 @@ bool palindrome(int n); -int main() { +int +main() { assert(palindrome(0)); assert(palindrome(1)); assert(palindrome(12321)); diff --git a/src/test/data/opt/popcnt.driver.sysv.amd64.c b/src/test/data/opt/popcnt.driver.sysv.amd64.c index f2cc0f4f..65b3711b 100644 --- a/src/test/data/opt/popcnt.driver.sysv.amd64.c +++ b/src/test/data/opt/popcnt.driver.sysv.amd64.c @@ -5,7 +5,8 @@ extern unsigned short popcnt16(unsigned short n); extern unsigned int popcnt32(unsigned int n); extern unsigned long popcnt64(unsigned long n); -int main() { +int +main() { assert(popcnt8(0xFF) == 8); assert(popcnt8(0x0F) == 4); diff --git a/src/test/data/opt/prime.driver.sysv.amd64.c b/src/test/data/opt/prime.driver.sysv.amd64.c index 4ca232e7..35abd58a 100644 --- a/src/test/data/opt/prime.driver.sysv.amd64.c +++ b/src/test/data/opt/prime.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(unsigned int x); -unsigned int nth(int n) { +unsigned int +nth(int n) { unsigned int x = 1; unsigned int i = 0; while (1) { @@ -15,7 +16,8 @@ unsigned int nth(int n) { } } -int main() { +int +main() { for (unsigned int i = 1; i <= 20; ++i) { printf("%d\n", nth(i)); } diff --git a/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc index d5905974..f2cbb6ea 100644 --- a/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc @@ -15,7 +15,7 @@ export function $main { ; returns: rax push rbx ; @69 mov rdi, qword ptr [rsi + 0x8] ; @5 call $atoi ; rdi ; @60 - mov r14d, eax ; @56 + mov r14d, eax ; @61 mov r13d, 0x1_w ; @57 mov eax, 0x1_w ; @58 @3: @@ -38,10 +38,8 @@ export function $main { ; returns: rax mov edx, ebx ; @36 xor al, al ; @37 call $printf ; rax rdi rdx rsi ; @38 - lea ecx, qword ptr [r14 - 0x1] ; @25 - lea eax, qword ptr [r13 + 0x1] ; @26 - mov r14d, ecx ; @31 - mov r13d, eax ; @32 + dec r14d ; @25 + inc r13d ; @26 mov eax, r12d ; @33 jmp @3 ; @34 @9: diff --git a/src/test/data/opt/promote2-partial.driver.sysv.amd64.c b/src/test/data/opt/promote2-partial.driver.sysv.amd64.c index b0c7ebcd..c15d4734 100644 --- a/src/test/data/opt/promote2-partial.driver.sysv.amd64.c +++ b/src/test/data/opt/promote2-partial.driver.sysv.amd64.c @@ -2,12 +2,14 @@ extern int foo(int a, int b); -void test(int a, int b, int x) { +void +test(int a, int b, int x) { int gcd = foo(a, b); assert(gcd == x); } -int main() { +int +main() { test(12, 18, 6); test(30, 20, 10); test(101, 103, 1); diff --git a/src/test/data/opt/qsort.driver.sysv.amd64.c b/src/test/data/opt/qsort.driver.sysv.amd64.c index b02d498e..13e3c46d 100644 --- a/src/test/data/opt/qsort.driver.sysv.amd64.c +++ b/src/test/data/opt/qsort.driver.sysv.amd64.c @@ -3,7 +3,8 @@ extern void qsort(int *arr, long low, long high); -int main() { +int +main() { int arr[] = {10, 7, 8, 9, 1, 5, 22, 59, 6, 17, 54}; int n = sizeof(arr) / sizeof(arr[0]); qsort(arr, 0, n - 1); diff --git a/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc index aa502040..2c542c94 100644 --- a/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc @@ -20,13 +20,13 @@ function $partition { ; returns: rax push r15 ; @132 push rbx ; @133 mov r12, rdi ; @49 - mov rcx, rsi ; @100 - lea rax, qword ptr [r12 + rdx*4] ; @50 + mov rcx, rdx ; @101 + lea rax, qword ptr [r12 + rcx*4] ; @50 mov qword ptr [rbp - 0x10], rax ; @124 - mov r14d, dword ptr [r12 + rdx*4] ; @11 - lea rax, qword ptr [rcx - 0x1] ; @51 - lea r15, qword ptr [rdx - 0x1] ; @52 - mov rbx, rcx ; @97 + mov r14d, dword ptr [r12 + rcx*4] ; @11 + lea rax, qword ptr [rsi - 0x1] ; @51 + lea r15, qword ptr [rcx - 0x1] ; @52 + mov rbx, rsi ; @97 @8: cmp rbx, r15 ; @92 jle @14 ; @93 @@ -44,18 +44,17 @@ function $partition { ; returns: rax leave ; @139 ret ; @88 @14: - lea rcx, qword ptr [r12 + rbx*4] ; @58 - mov edx, dword ptr [rcx] ; @22 - cmp edx, r14d ; @81 + lea rsi, qword ptr [r12 + rbx*4] ; @58 + mov ecx, dword ptr [rsi] ; @22 + cmp ecx, r14d ; @81 jg @19 ; @82 @18: lea r13, qword ptr [rax + 0x1] ; @61 lea rdi, qword ptr [r12 + r13*4] ; @63 - mov rsi, rcx ; @76 call $swap ; rdi rsi ; @77 mov rax, r13 ; @74 @19: - inc rbx ; @71 + inc rbx ; @60 jmp @8 ; @73 } @@ -68,8 +67,8 @@ export function $qsort { push r14 ; @144 push rbx ; @145 mov r14, rdi ; @119 + mov rbx, rsi ; @120 mov r13, rdx ; @121 - mov rbx, rsi ; @122 @38: cmp rbx, r13 ; @114 jl @39 ; @115 @@ -90,7 +89,6 @@ export function $qsort { mov rdi, r14 ; @44 mov rsi, rbx ; @106 call $qsort ; rdi rdx rsi ; @108 - lea rax, qword ptr [r12 + 0x1] ; @66 - mov rbx, rax ; @104 + lea rbx, qword ptr [r12 + 0x1] ; @66 jmp @38 ; @105 } diff --git a/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc index b345194d..ab3f3f24 100644 --- a/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc @@ -38,8 +38,8 @@ export function $qsort { push r14 ; @122 push rbx ; @123 mov r14, rdi ; @113 + mov rbx, rsi ; @114 mov r13, rdx ; @115 - mov rbx, rsi ; @116 @39: cmp rbx, r13 ; @108 jl @40 ; @109 @@ -60,7 +60,6 @@ export function $qsort { mov rdi, r14 ; @45 mov rsi, rbx ; @100 call $qsort ; rdi rdx rsi ; @102 - lea rax, qword ptr [r12 + 0x1] ; @67 - mov rbx, rax ; @98 + lea rbx, qword ptr [r12 + 0x1] ; @67 jmp @39 ; @99 } diff --git a/src/test/data/opt/retmem.vir.opt.sysv b/src/test/data/opt/retmem.vir.opt.sysv index 5514c598..096623cd 100644 --- a/src/test/data/opt/retmem.vir.opt.sysv +++ b/src/test/data/opt/retmem.vir.opt.sysv @@ -1,25 +1,13 @@ module retmem export function $foo(l %3/rdi, l %a/rsi, l %b/rdx, l %c/rcx, l %d/r8) { - %x = slot 32, align 8 @2: - st.l %a, %x ; @3 - %0 = add.l %x, 0x8_l ; @11 - st.l %b, %0 ; @5 - %1 = add.l %x, 0x10_l ; @12 - st.l %c, %1 ; @7 - %2 = add.l %x, 0x18_l ; @13 - st.l %d, %2 ; @9 - %4 = ld.l %x ; @14 - st.l %4, %3 ; @15 - %6 = ld.l %0 ; @17 - %7 = add.l %3, 0x8_l ; @18 - st.l %6, %7 ; @19 - %9 = ld.l %1 ; @21 - %10 = add.l %3, 0x10_l ; @22 - st.l %9, %10 ; @23 - %12 = ld.l %2 ; @25 - %13 = add.l %3, 0x18_l ; @26 - st.l %12, %13 ; @27 + st.l %a, %3 ; @15 + %7.1 = add.l %3, 0x8_l ; @18 + st.l %b, %7.1 ; @19 + %10.1 = add.l %3, 0x10_l ; @22 + st.l %c, %10.1 ; @23 + %13.1 = add.l %3, 0x18_l ; @26 + st.l %d, %13.1 ; @27 ret rax/%3 } diff --git a/src/test/data/opt/sink1.driver.sysv.amd64.c b/src/test/data/opt/sink1.driver.sysv.amd64.c new file mode 100644 index 00000000..a8ba4927 --- /dev/null +++ b/src/test/data/opt/sink1.driver.sysv.amd64.c @@ -0,0 +1,13 @@ +#include + +extern int foo(int x); + +int +main() { + assert(foo(0) == 1); + assert(foo(1) == 0); + assert(foo(-1) == 0); + assert(foo(2) == 0); + assert(foo(1234) == 0); + assert(foo(-999) == 0); +} diff --git a/src/test/data/opt/sink1.vir b/src/test/data/opt/sink1.vir new file mode 100644 index 00000000..4d261a19 --- /dev/null +++ b/src/test/data/opt/sink1.vir @@ -0,0 +1,15 @@ +module sink1 + +export function w $foo(w %x) { +@start: + %y = add.w %x, 1_w + %c = eq.w %x, 0_w + br %c, @zero, @notzero +@zero: + %c = eq.w %y, 1_w + br %c, @one, @notzero +@one: + ret 1_w +@notzero: + ret 0_w +} diff --git a/src/test/data/opt/sink1.vir.opt b/src/test/data/opt/sink1.vir.opt new file mode 100644 index 00000000..8d310f66 --- /dev/null +++ b/src/test/data/opt/sink1.vir.opt @@ -0,0 +1,14 @@ +module sink1 + +export function w $foo(w %x) { +@2: + %2 = eq.w %x, 0x0_w ; @11 + br %2, @3, @9(0x0_w) +@3: + %3 = add.w %x, 0x1_w ; @12 + %4 = eq.w %3, 0x1_w ; @13 + %5 = flag.w %4 ; @14 + jmp @9(%5) +@9(%0): + ret %0 +} diff --git a/src/test/data/opt/spill1.driver.sysv.amd64.c b/src/test/data/opt/spill1.driver.sysv.amd64.c index cb7ec92d..3bf27f70 100644 --- a/src/test/data/opt/spill1.driver.sysv.amd64.c +++ b/src/test/data/opt/spill1.driver.sysv.amd64.c @@ -3,7 +3,8 @@ // NB: do not call this with 0 extern int foo(int n); -int main() { +int +main() { assert(foo(1) == 136); assert(foo(2) == 272); assert(foo(3) == 408); diff --git a/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc index 4e696f0a..e5a3c04c 100644 --- a/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc @@ -35,48 +35,48 @@ export function $foo { ; returns: rax add eax, 0x3_w ; @59 mov dword ptr [rsp + 0x40], eax ; @233 mov eax, dword ptr [rsp + 0x30] ; @235 - lea r8d, qword ptr [rax + 0x4] ; @60 + lea r10d, qword ptr [rax + 0x4] ; @60 mov eax, dword ptr [rsp + 0x2c] ; @236 - lea esi, qword ptr [rax + 0x5] ; @61 + lea r12d, qword ptr [rax + 0x5] ; @61 mov eax, dword ptr [rsp + 0x28] ; @237 - lea r13d, qword ptr [rax + 0x6] ; @62 + lea esi, qword ptr [rax + 0x6] ; @62 mov eax, dword ptr [rsp + 0x24] ; @238 - lea r15d, qword ptr [rax + 0x7] ; @63 + lea r11d, qword ptr [rax + 0x7] ; @63 mov eax, dword ptr [rsp + 0x20] ; @239 - lea r11d, qword ptr [rax + 0x8] ; @64 + lea edx, qword ptr [rax + 0x8] ; @64 mov eax, dword ptr [rsp + 0x1c] ; @240 - lea ecx, qword ptr [rax + 0x9] ; @65 + lea r9d, qword ptr [rax + 0x9] ; @65 mov eax, dword ptr [rsp + 0x18] ; @241 - lea r12d, qword ptr [rax + 0xa] ; @66 - mov eax, dword ptr [rsp + 0x14] ; @242 - lea r9d, qword ptr [rax + 0xb] ; @67 - mov eax, dword ptr [rsp + 0x10] ; @243 - lea r10d, qword ptr [rax + 0xc] ; @68 - mov eax, dword ptr [rsp + 0xc] ; @244 - lea ebx, qword ptr [rax + 0xd] ; @69 - mov eax, dword ptr [rsp + 0x8] ; @245 - lea edx, qword ptr [rax + 0xe] ; @70 - mov eax, dword ptr [rsp + 0x4] ; @246 - lea r14d, qword ptr [rax + 0xf] ; @71 - mov eax, dword ptr [rsp] ; @247 - add eax, 0x10_w ; @72 + add eax, 0xa_w ; @66 + mov ecx, dword ptr [rsp + 0x14] ; @242 + lea r13d, qword ptr [rcx + 0xb] ; @67 + mov ecx, dword ptr [rsp + 0x10] ; @243 + add ecx, 0xc_w ; @68 + mov r8d, dword ptr [rsp + 0xc] ; @244 + lea ebx, qword ptr [r8 + 0xd] ; @69 + mov r8d, dword ptr [rsp + 0x8] ; @245 + lea r14d, qword ptr [r8 + 0xe] ; @70 + mov r8d, dword ptr [rsp + 0x4] ; @246 + lea r15d, qword ptr [r8 + 0xf] ; @71 + mov r8d, dword ptr [rsp] ; @247 + add r8d, 0x10_w ; @72 dec edi ; @73 test edi, edi ; @189 je @21 ; @190 @168: - mov dword ptr [rsp], eax ; @248 - mov dword ptr [rsp + 0x4], r14d ; @249 - mov dword ptr [rsp + 0x8], edx ; @250 + mov dword ptr [rsp], r8d ; @248 + mov dword ptr [rsp + 0x4], r15d ; @249 + mov dword ptr [rsp + 0x8], r14d ; @250 mov dword ptr [rsp + 0xc], ebx ; @251 - mov dword ptr [rsp + 0x10], r10d ; @252 - mov dword ptr [rsp + 0x14], r9d ; @253 - mov dword ptr [rsp + 0x18], r12d ; @254 - mov dword ptr [rsp + 0x1c], ecx ; @255 - mov dword ptr [rsp + 0x20], r11d ; @256 - mov dword ptr [rsp + 0x24], r15d ; @257 - mov dword ptr [rsp + 0x28], r13d ; @258 - mov dword ptr [rsp + 0x2c], esi ; @259 - mov dword ptr [rsp + 0x30], r8d ; @260 + mov dword ptr [rsp + 0x10], ecx ; @252 + mov dword ptr [rsp + 0x14], r13d ; @253 + mov dword ptr [rsp + 0x18], eax ; @254 + mov dword ptr [rsp + 0x1c], r9d ; @255 + mov dword ptr [rsp + 0x20], edx ; @256 + mov dword ptr [rsp + 0x24], r11d ; @257 + mov dword ptr [rsp + 0x28], esi ; @258 + mov dword ptr [rsp + 0x2c], r12d ; @259 + mov dword ptr [rsp + 0x30], r10d ; @260 mov eax, dword ptr [rsp + 0x40] ; @261 mov dword ptr [rsp + 0x34], eax ; @262 mov eax, dword ptr [rsp + 0x44] ; @263 diff --git a/src/test/data/opt/spill2.driver.sysv.amd64.c b/src/test/data/opt/spill2.driver.sysv.amd64.c index 2f8984d0..02451abf 100644 --- a/src/test/data/opt/spill2.driver.sysv.amd64.c +++ b/src/test/data/opt/spill2.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern int foo(int *a, int *b, int *c, int *d, int n); -int main() { +int +main() { int a0[] = {}; int b0[] = {}; int c0[] = {}; diff --git a/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc index 66ef99ef..2eccda13 100644 --- a/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc @@ -12,11 +12,11 @@ export function $foo { ; returns: rax mov qword ptr [rsp], rsi ; @99 mov r15, rdx ; @92 mov r14, rcx ; @93 - mov r10d, r8d ; @94 + mov r11d, r8d ; @94 xor eax, eax ; @95 xor r13d, r13d ; @96 @3: - cmp r13d, r10d ; @85 + cmp r13d, r11d ; @85 jl @6 ; @86 @7: add rsp, 0x10_l ; @108 @@ -37,29 +37,27 @@ export function $foo { ; returns: rax mov r12d, eax ; @79 xor eax, eax ; @80 @9: - cmp eax, r10d ; @74 + cmp eax, r11d ; @74 jl @21 ; @75 @22: lea eax, qword ptr [rsi + rdi*1] ; @48 lea ecx, qword ptr [r8 + r9*1] ; @49 imul eax, ecx ; @73 add eax, r12d ; @51 - inc r13d ; @71 + inc r13d ; @52 jmp @3 ; @72 @21: - lea edx, qword ptr [rsi + rax*1] ; @53 - lea ecx, qword ptr [rdi + rax*1] ; @54 + lea ecx, qword ptr [rsi + rax*1] ; @53 + lea edx, qword ptr [rdi + rax*1] ; @54 lea ebx, qword ptr [r8 + rax*1] ; @55 - lea r11d, qword ptr [r9 + rax*1] ; @56 - imul edx, ecx ; @69 - mov ecx, ebx ; @58 - imul ecx, r11d ; @68 - add edx, ebx ; @59 - add r11d, ecx ; @60 - mov ecx, edx ; @61 - xor ecx, r11d ; @67 - add ecx, r12d ; @62 + lea r10d, qword ptr [r9 + rax*1] ; @56 + imul ecx, edx ; @69 + mov edx, ebx ; @58 + imul edx, r10d ; @68 + add ecx, ebx ; @59 + add edx, r10d ; @60 + xor ecx, edx ; @67 + add r12d, ecx ; @62 inc eax ; @63 - mov r12d, ecx ; @64 jmp @9 ; @66 } diff --git a/src/test/data/opt/sroa.vir b/src/test/data/opt/sroa.vir new file mode 100644 index 00000000..8227d9eb --- /dev/null +++ b/src/test/data/opt/sroa.vir @@ -0,0 +1,22 @@ +module sroa + +export function w $foo(w %a, w %b, w %c, w %d) { + %s = slot 16, align 4 +@start: + %p1 = add.l %s, 0_l + st.w %a, %p1 + %p2 = add.l %s, 4_l + st.w %b, %p2 + %p3 = add.l %s, 8_l + st.w %c, %p3 + %p4 = add.l %s, 12_l + st.w %d, %p4 + %a = ld.w %p1 + %b = ld.w %p2 + %c = ld.w %p3 + %d = ld.w %p4 + %x = add.w %a, %b + %x = add.w %x, %c + %x = add.w %x, %d + ret %x +} diff --git a/src/test/data/opt/sroa.vir.opt b/src/test/data/opt/sroa.vir.opt new file mode 100644 index 00000000..ed03491b --- /dev/null +++ b/src/test/data/opt/sroa.vir.opt @@ -0,0 +1,9 @@ +module sroa + +export function w $foo(w %a, w %b, w %c, w %d) { +@2: + %4 = add.w %a, %b ; @18 + %6 = add.w %c, %d ; @20 + %7 = add.w %4, %6 ; @21 + ret %7 +} diff --git a/src/test/data/opt/sumphi.driver.sysv.amd64.c b/src/test/data/opt/sumphi.driver.sysv.amd64.c new file mode 100644 index 00000000..4fb55013 --- /dev/null +++ b/src/test/data/opt/sumphi.driver.sysv.amd64.c @@ -0,0 +1,26 @@ +#include + +struct t { + int a; + int b; +}; + +int sumphi(struct t a, struct t b, int x); + +int +main() { + struct t s1 = {3, 4}; + struct t s2 = {10, -2}; + int res1 = sumphi(s1, s2, -1); + assert(res1 == 7); + int res2 = sumphi(s1, s2, 0); + assert(res2 == 8); + + struct t s3 = {5, 6}; + struct t s4 = {100, 200}; + int res3 = sumphi(s3, s4, -5); + int res4 = sumphi(s3, s4, 42); + assert(res3 == 11); + assert(res4 == 300); + return 0; +} diff --git a/src/test/data/opt/sumphi.vir.opt.sysv b/src/test/data/opt/sumphi.vir.opt.sysv index 30bdfc89..2e38fb39 100644 --- a/src/test/data/opt/sumphi.vir.opt.sysv +++ b/src/test/data/opt/sumphi.vir.opt.sysv @@ -3,31 +3,23 @@ module sumphi function $sum(l %s/rdi) { %p = slot 8, align 8 @2: - st.l %s, %p ; @24 + st.l %s, %p ; @22 %a.1 = ld.w %p ; @4 - %0.1 = add.l %p, 0x4_l ; @19 - %b.1 = ld.w %0.1 ; @6 - %1.1 = add.w %a.1, %b.1 ; @20 - ret rax/%1.1 + %0 = add.l %p, 0x4_l ; @19 + %b.1 = ld.w %0 ; @6 + %1 = add.w %a.1, %b.1 ; @20 + ret rax/%1 } export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) { - %ra = slot 8, align 8 - %9 = slot 8, align 8 - %11 = slot 8, align 8 @8: - st.l %a, %ra ; @28 %2.1 = slt.w %x, 0x0_w ; @21 br %2.1, @9, @10 @9: - st.l %a, %11 ; @34 - jmp @14(%11) + jmp @14(%a) @10: - st.l %b, %9 ; @32 - jmp @14(%9) -@14(%u.1): - st.w 0x5_w, %ra ; @17 - %13.1 = ld.l %u.1 ; @35 - %s.1/w/rax = call $sum(%13.1/rdi) ; @18 + jmp @14(%b) +@14(%rb.3): + %s.1/w/rax = call $sum(%rb.3/rdi) ; @18 ret rax/%s.1 } diff --git a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc new file mode 100644 index 00000000..058c6ada --- /dev/null +++ b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc @@ -0,0 +1,27 @@ +module sumphi + +function $sum { ; returns: rax +@2: + sub rsp, 0x8_l ; @55 + mov qword ptr [rsp], rdi ; @38 + mov eax, dword ptr [rsp + 0x4] ; @6 + add eax, dword ptr [rsp] ; @20 + add rsp, 0x8_l ; @56 + ret ; @37 +} + +export function $sumphi { ; returns: rax +@8: + push rbp ; @57 + mov rbp, rsp ; @58 + mov rax, rdi ; @21 + mov rdi, rsi ; @50 + test edx, edx ; @47 + jns @14 ; @48 +@9: + mov rdi, rax ; @43 +@14: + call $sum ; rdi ; @41 + leave ; @59 + ret ; @40 +} diff --git a/src/test/data/opt/switchcaseprop.vir b/src/test/data/opt/switchcaseprop.vir index 6ef49f61..1c80f737 100644 --- a/src/test/data/opt/switchcaseprop.vir +++ b/src/test/data/opt/switchcaseprop.vir @@ -17,3 +17,22 @@ export function w $foo(w %x) { %x = add.w %x, 1_w ret %x } + +export function w $bar(w %x) { +@start: + switch.w %x, @default [0x2_w -> @one, + 0x3_w -> @two, + 0x4_w -> @three, + 0x5_w -> @three] +@default: + ret %x +@one: + %x = add.w %x, 1_w + ret %x +@two: + %x = add.w %x, 1_w + ret %x +@three: + %x = add.w %x, 1_w + ret %x +} diff --git a/src/test/data/opt/switchcaseprop.vir.opt b/src/test/data/opt/switchcaseprop.vir.opt index e75f970d..249a0cfb 100644 --- a/src/test/data/opt/switchcaseprop.vir.opt +++ b/src/test/data/opt/switchcaseprop.vir.opt @@ -2,9 +2,22 @@ module foo export function w $foo(w %x) { @2: - switch.w %x, @10(%x) [0x1_w -> @10(0x2_w), - 0x2_w -> @10(0x3_w), - 0x3_w -> @10(0x4_w)] -@10(%0): + switch.w %x, @18(%x) [0x1_w -> @18(0x2_w), + 0x2_w -> @18(0x3_w), + 0x3_w -> @18(0x4_w)] +@18(%0): ret %0 } + +export function w $bar(w %x) { +@10: + switch.w %x, @19(%x) [0x2_w -> @19(0x3_w), + 0x3_w -> @19(0x4_w), + 0x4_w -> @14, + 0x5_w -> @14] +@14: + %2 = add.w %x, 0x1_w ; @20 + jmp @19(%2) +@19(%1): + ret %1 +} diff --git a/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 b/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 index c71df64e..4311036e 100644 --- a/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 +++ b/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 @@ -2,31 +2,63 @@ module foo export function $foo { ; returns: rax @2: - mov %x:w, edi ; @26 - test %x:w, %x:w ; @27 - je @11 ; @28 - mov %2:w, %x:w ; zx ; @29 - dec %2:l ; @30 - cmp %2:l, 0x2_l ; @31 - ja @11 ; @32 - lea %1:l, qword ptr [rip + @25] ; @33 - movsxd %3:l, dword ptr [%1 + %2*4] ; @34 - add %3:l, %1:l ; @35 - jmp %3:l ; @36 - .tbl @25 [@12, @13, @14] ; @37 -@11: - mov %0:w, %x:w ; @23 - jmp @10 ; @24 -@12: - mov %0:w, 0x2_w ; @21 - jmp @10 ; @22 -@13: - mov %0:w, 0x3_w ; @19 - jmp @10 ; @20 -@14: - mov %0:w, 0x4_w ; @17 - jmp @10 ; @18 + mov %x:w, edi ; @36 + test %x:w, %x:w ; @37 + je @21 ; @38 + mov %4:w, %x:w ; zx ; @39 + dec %4:l ; @40 + cmp %4:l, 0x2_l ; @41 + ja @21 ; @42 + lea %3:l, qword ptr [rip + @35] ; @43 + movsxd %5:l, dword ptr [%3 + %4*4] ; @44 + add %5:l, %3:l ; @45 + jmp %5:l ; @46 + .tbl @35 [@22, @23, @24] ; @47 +@21: + mov %0:w, %x:w ; @33 + jmp @18 ; @34 +@22: + mov %0:w, 0x2_w ; @31 + jmp @18 ; @32 +@23: + mov %0:w, 0x3_w ; @29 + jmp @18 ; @30 +@24: + mov %0:w, 0x4_w ; @27 + jmp @18 ; @28 +@18: + mov eax, %0:w ; @25 + ret ; @26 +} + +export function $bar { ; returns: rax @10: - mov eax, %0:w ; @15 - ret ; @16 + mov %x:w, edi ; @62 + cmp %x:w, 0x2_w ; @63 + jb @48 ; @64 + mov %7:w, %x:w ; zx ; @65 + sub %7:l, 0x2_l ; @66 + cmp %7:l, 0x3_l ; @67 + ja @48 ; @68 + lea %6:l, qword ptr [rip + @61] ; @69 + movsxd %8:l, dword ptr [%6 + %7*4] ; @70 + add %8:l, %6:l ; @71 + jmp %8:l ; @72 + .tbl @61 [@49, @50, @14, @14] ; @73 +@48: + mov %1:w, %x:w ; @59 + jmp @19 ; @60 +@49: + mov %1:w, 0x3_w ; @57 + jmp @19 ; @58 +@50: + mov %1:w, 0x4_w ; @55 + jmp @19 ; @56 +@14: + lea %2:w, qword ptr [%x + 0x1] ; @20 + mov %1:w, %2:w ; @53 + jmp @19 ; @54 +@19: + mov eax, %1:w ; @51 + ret ; @52 } diff --git a/src/test/data/opt/unref.driver.sysv.amd64.c b/src/test/data/opt/unref.driver.sysv.amd64.c new file mode 100644 index 00000000..75604f07 --- /dev/null +++ b/src/test/data/opt/unref.driver.sysv.amd64.c @@ -0,0 +1,42 @@ +#include + +struct t { + int a; + int b; +}; + +extern int sump(struct t *); +extern struct t mkt(int, int); +extern int sumt(int, int); + +int +main() { + struct t t1 = mkt(1, 2); + struct t t2 = mkt(3, 4); + struct t t3 = mkt(5, 6); + struct t t4 = mkt(7, 8); + struct t t5 = mkt(9, 10); + + assert(t1.a == 1); + assert(t1.b == 2); + assert(t2.a == 3); + assert(t2.b == 4); + assert(t3.a == 5); + assert(t3.b == 6); + assert(t4.a == 7); + assert(t4.b == 8); + assert(t5.a == 9); + assert(t5.b == 10); + + assert(sump(&t1) == 3); + assert(sump(&t2) == 7); + assert(sump(&t3) == 11); + assert(sump(&t4) == 15); + assert(sump(&t5) == 19); + + assert(sumt(1, 2) == 3); + assert(sumt(3, 4) == 7); + assert(sumt(5, 6) == 11); + assert(sumt(7, 8) == 15); + assert(sumt(9, 10) == 19); +} diff --git a/src/test/data/opt/unref.vir.opt.sysv b/src/test/data/opt/unref.vir.opt.sysv index f92df03c..3e6cf750 100644 --- a/src/test/data/opt/unref.vir.opt.sysv +++ b/src/test/data/opt/unref.vir.opt.sysv @@ -3,12 +3,12 @@ module unref function $sum(l %s/rdi) { %p = slot 8, align 8 @2: - st.l %s, %p ; @24 + st.l %s, %p ; @22 %a.1 = ld.w %p ; @4 - %0.1 = add.l %p, 0x4_l ; @19 - %b.1 = ld.w %0.1 ; @6 - %1.1 = add.w %a.1, %b.1 ; @20 - ret rax/%1.1 + %0 = add.l %p, 0x4_l ; @19 + %b.1 = ld.w %0 ; @6 + %1 = add.w %a.1, %b.1 ; @20 + ret rax/%1 } export function $sump(l %p/rdi) { @@ -22,10 +22,11 @@ export function $mkt(w %a/rdi, w %b/rsi) { %r = slot 8, align 8 @11: st.w %a, %r ; @12 - %2.1 = add.l %r, 0x4_l ; @21 - st.w %b, %2.1 ; @14 - %9.1 = ld.l %r ; @28 - ret rax/%9.1 + %2 = add.l %r, 0x4_l ; @21 + st.w %b, %2 ; @14 + %9 = ld.l %r ; @28 + st.l %9, %r ; @29 + ret rax/%9 } export function $sumt(w %a/rdi, w %b/rsi) { diff --git a/src/test/data/opt/uremby7.driver.sysv.amd64.c b/src/test/data/opt/uremby7.driver.sysv.amd64.c index 9c1e081a..a63d8329 100644 --- a/src/test/data/opt/uremby7.driver.sysv.amd64.c +++ b/src/test/data/opt/uremby7.driver.sysv.amd64.c @@ -2,7 +2,8 @@ extern unsigned int foo(unsigned int x); -int main() { +int +main() { assert(foo(0) == 0); assert(foo(1) == 1); assert(foo(2) == 2); diff --git a/src/test/data/opt/vaarg1.driver.sysv.amd64.c b/src/test/data/opt/vaarg1.driver.sysv.amd64.c index 41584fa9..305ffc92 100644 --- a/src/test/data/opt/vaarg1.driver.sysv.amd64.c +++ b/src/test/data/opt/vaarg1.driver.sysv.amd64.c @@ -7,7 +7,35 @@ struct S { extern long foo(long i, ...); -int main() { - struct S s = {0.0, 1}; - assert(foo(1, s) == 3); +int +main() { + struct S s1 = {0.0, 1}; + assert(foo(1, s1) == 3); + + struct S s2 = {1.0, 1}; + assert(foo(1, s2) == 4); + + struct S s3 = {2.0, 5}; + assert(foo(10, s3) == 18); + + struct S s4 = {-2.5, 100}; + assert(foo(-4, s4) == 95); + + struct S s5 = {12.75, -20}; + assert(foo(0, s5) == -7); + + struct S s6 = {0.999, 0}; + assert(foo(1, s6) == 3); + + struct S s7 = {-0.1, 7}; + assert(foo(3, s7) == 11); + + struct S s8 = {-1.234, 42}; + assert(foo(0, s8) == 42); + + struct S s9 = {1000.5, 5000}; + assert(foo(1000, s9) == 7001); + + struct S s10 = {-100.9, -50}; + assert(foo(10, s10) == -139); } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv b/src/test/data/opt/vaarg1.vir.opt.sysv index 8c6f33d9..46807d32 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv +++ b/src/test/data/opt/vaarg1.vir.opt.sysv @@ -1,92 +1,72 @@ module vaarg1 export function $foo(b %11/rax, l %i/rdi, ...) { - %ap = slot 24, align 8 - %r = slot 16, align 8 %5 = slot 176, align 16 - %21 = slot 16, align 8 + %52 = slot 8, align 8 @19: - %6 = add.l %5, 0x8_l ; @20 - regstore rsi, %6 ; @21 - %7 = add.l %5, 0x10_l ; @22 - regstore rdx, %7 ; @23 - %8 = add.l %5, 0x18_l ; @24 - regstore rcx, %8 ; @25 - %9 = add.l %5, 0x20_l ; @26 - regstore r8, %9 ; @27 - %10 = add.l %5, 0x28_l ; @28 - regstore r9, %10 ; @29 - %12 = eq.b %11, 0x0_b ; @30 - br %12, @2, @18 + %6.1 = add.l %5, 0x8_l ; @20 + regstore rsi, %6.1 ; @21 + %7.1 = add.l %5, 0x10_l ; @22 + regstore rdx, %7.1 ; @23 + %8.1 = add.l %5, 0x18_l ; @24 + regstore rcx, %8.1 ; @25 + %9.1 = add.l %5, 0x20_l ; @26 + regstore r8, %9.1 ; @27 + %10.1 = add.l %5, 0x28_l ; @28 + regstore r9, %10.1 ; @29 + %12.1 = eq.b %11, 0x0_b ; @30 + br %12.1, @2, @18 @18: - %13 = add.l %5, 0x30_l ; @31 - regstore xmm0, %13 ; @32 - %14 = add.l %5, 0x40_l ; @33 - regstore xmm1, %14 ; @34 - %15 = add.l %5, 0x50_l ; @35 - regstore xmm2, %15 ; @36 - %16 = add.l %5, 0x60_l ; @37 - regstore xmm3, %16 ; @38 - %17 = add.l %5, 0x70_l ; @39 - regstore xmm4, %17 ; @40 - %18 = add.l %5, 0x80_l ; @41 - regstore xmm5, %18 ; @42 - %19 = add.l %5, 0x90_l ; @43 - regstore xmm6, %19 ; @44 - %20 = add.l %5, 0xa0_l ; @45 - regstore xmm7, %20 ; @46 + %13.1 = add.l %5, 0x30_l ; @31 + regstore xmm0, %13.1 ; @32 + %14.1 = add.l %5, 0x40_l ; @33 + regstore xmm1, %14.1 ; @34 + %15.1 = add.l %5, 0x50_l ; @35 + regstore xmm2, %15.1 ; @36 + %16.1 = add.l %5, 0x60_l ; @37 + regstore xmm3, %16.1 ; @38 + %17.1 = add.l %5, 0x70_l ; @39 + regstore xmm4, %17.1 ; @40 + %18.1 = add.l %5, 0x80_l ; @41 + regstore xmm5, %18.1 ; @42 + %19.1 = add.l %5, 0x90_l ; @43 + regstore xmm6, %19.1 ; @44 + %20.1 = add.l %5, 0xa0_l ; @45 + regstore xmm7, %20.1 ; @46 jmp @2 @2: - st.w 0x8_w, %ap ; @53 - %26 = add.l %ap, 0x4_l ; @54 - st.w 0x30_w, %26 ; @55 - %27 = stkargs ; @56 - %28 = add.l %ap, 0x8_l ; @57 - st.l %27, %28 ; @58 - %29 = add.l %ap, 0x10_l ; @59 - st.l %5, %29 ; @60 + st.w 0x8_w, %52 ; @53 + %57.1 = add.l %52, 0x4_l ; @92 + st.w 0x30_w, %57.1 ; @55 + %27.1 = stkargs ; @56 jmp @62 @62: - %31 = ld.w %26 ; @68 - %32 = le.w %31, 0xa0_w ; @69 - br %32, @63, @65 + jmp @63 @63: - %33 = ld.w %ap ; @70 - %34 = le.w %33, 0x28_w ; @71 - br %34, @64, @65 + %33.1 = ld.w %52 ; @70 + %34.1 = le.w %33.1, 0x28_w ; @71 + br %34.1, @64, @65 @64: - %37 = zext.l %31 ; @74 - %38 = add.l %5, %37 ; @75 - %39 = zext.l %33 ; @76 - %40 = add.l %5, %39 ; @77 - %41 = add.w %31, 0x10_l ; @78 - %42 = add.w %33, 0x8_l ; @79 - st.w %41, %26 ; @80 - st.w %42, %ap ; @81 - jmp @66(%38, %40) + %38.1 = add.l %5, 0x30_l ; @75 + %39.1 = zext.l %33.1 ; @76 + %40.1 = add.l %5, %39.1 ; @77 + %42.1 = add.w %33.1, 0x8_l ; @79 + st.w 0x40_w, %57.1 ; @80 + st.w %42.1, %52 ; @81 + jmp @66(%38.1, %40.1) @65: - %44 = ld.l %28 ; @83 - %45 = add.l %44, 0x8_l ; @84 - %46 = add.l %44, 0x10_l ; @85 - st.l %46, %28 ; @86 - jmp @66(%44, %45) -@66(%47, %48): - %49 = ld.l %47 ; @87 - st.l %49, %21 ; @88 - %50 = ld.l %48 ; @89 - %51 = add.l %21, 0x8_l ; @90 - st.l %50, %51 ; @91 + %45.1 = add.l %27.1, 0x8_l ; @84 + jmp @66(%27.1, %45.1) +@66(%47.1, %48.1): + %49.1 = ld.l %47.1 ; @87 + st.l %49.1, %52 ; @88 + %50.1 = ld.l %48.1 ; @89 jmp @61 @61: - %22 = ld.l %21 ; @47 - st.l %22, %r ; @48 - %24 = ld.l %51 ; @50 - %25 = add.l %r, 0x8_l ; @51 - st.l %24, %25 ; @52 - %f1.1 = ld.d %r ; @6 - %0 = add.d %f1.1, 1.234_d ; @13 - %2 = ftosi.d.l %0 ; @15 - %3 = add.l %24, %2 ; @16 - %4 = add.l %3, %i ; @17 - ret rax/%4 + %f1.1 = ld.d %52 ; @6 + %0.1 = add.d %f1.1, 1.234_d ; @13 + %2.1 = ftosi.d.l %0.1 ; @15 + %3.1 = add.l %50.1, %2.1 ; @16 + %4.1 = add.l %3.1, %i ; @17 + ret rax/%4.1 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 index 6320d075..6c098966 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 @@ -1,21 +1,19 @@ module vaarg1 export function $foo { ; returns: rax - %ap = slot 24, align 8 - %r = slot 16, align 8 %5 = slot 176, align 16 - %21 = slot 16, align 8 + %52 = slot 8, align 8 @19: mov %11:b, al ; @20 - mov %i:l, rdi ; @121 + mov %i:l, rdi ; @120 mov qword ptr [%5 + 0x8], rsi ; @21 mov qword ptr [%5 + 0x10], rdx ; @23 mov qword ptr [%5 + 0x18], rcx ; @25 mov qword ptr [%5 + 0x20], r8 ; @27 mov qword ptr [%5 + 0x28], r9 ; @29 - test %11:b, %11:b ; @116 - je @2 ; @117 - jmp @18 ; @118 + test %11:b, %11:b ; @115 + je @2 ; @116 + jmp @18 ; @117 @18: movdqa xmmword ptr [%5 + 0x30], xmm0 ; @32 movdqa xmmword ptr [%5 + 0x40], xmm1 ; @34 @@ -25,62 +23,47 @@ export function $foo { ; returns: rax movdqa xmmword ptr [%5 + 0x80], xmm5 ; @42 movdqa xmmword ptr [%5 + 0x90], xmm6 ; @44 movdqa xmmword ptr [%5 + 0xa0], xmm7 ; @46 - jmp @2 ; @115 + jmp @2 ; @114 @2: - mov dword ptr [%ap], 0x8_w ; @53 - mov dword ptr [%ap + 0x4], 0x30_w ; @55 - lea %27:l, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [%ap + 0x8], %27:l ; @58 - mov qword ptr [%ap + 0x10], %5:l ; @60 - jmp @62 ; @114 + mov dword ptr [%52], 0x8_w ; @53 + mov dword ptr [%52 + 0x4], 0x30_w ; @55 + lea %27.1:l, qword ptr [rbp + 0x10] ; @56 + jmp @62 ; @113 @62: - mov %31:w, dword ptr [%ap + 0x4] ; @68 - cmp %31:w, 0xa0_w ; @109 - jbe @63 ; @110 - jmp @65 ; @111 + jmp @63 ; @112 @63: - mov %33:w, dword ptr [%ap] ; @70 - cmp %33:w, 0x28_w ; @104 - jbe @64 ; @105 - jmp @65 ; @106 + mov %33.1:w, dword ptr [%52] ; @70 + cmp %33.1:w, 0x28_w ; @107 + jbe @64 ; @108 + jmp @65 ; @109 @65: - mov %44:l, qword ptr [%ap + 0x8] ; @83 - lea %45:l, qword ptr [%44 + 0x8] ; @84 - lea %46:l, qword ptr [%44 + 0x10] ; @85 - mov qword ptr [%ap + 0x8], %46:l ; @86 - mov %47:l, %44:l ; @101 - mov %48:l, %45:l ; @102 - jmp @66 ; @103 + lea %45.1:l, qword ptr [%27.1 + 0x8] ; @84 + mov %47.1:l, %27.1:l ; @104 + mov %48.1:l, %45.1:l ; @105 + jmp @66 ; @106 @64: - mov %37:w, %31:w ; @74 - lea %38:l, qword ptr [%5 + %37*1] ; @75 - mov %39:w, %33:w ; @76 - lea %40:l, qword ptr [%5 + %39*1] ; @77 - lea %41:w, qword ptr [%31 + 0x10] ; @78 - lea %42:w, qword ptr [%33 + 0x8] ; @79 - mov dword ptr [%ap + 0x4], %41:w ; @80 - mov dword ptr [%ap], %42:w ; @81 - mov %47:l, %38:l ; @98 - mov %48:l, %40:l ; @99 - jmp @66 ; @100 + lea %38.1:l, qword ptr [%5 + 0x30] ; @75 + mov %39.1:w, %33.1:w ; @76 + lea %40.1:l, qword ptr [%5 + %39.1*1] ; @77 + lea %42.1:w, qword ptr [%33.1 + 0x8] ; @79 + mov dword ptr [%52 + 0x4], 0x40_w ; @80 + mov dword ptr [%52], %42.1:w ; @81 + mov %47.1:l, %38.1:l ; @101 + mov %48.1:l, %40.1:l ; @102 + jmp @66 ; @103 @66: - mov %49:l, qword ptr [%47] ; @87 - mov qword ptr [%21], %49:l ; @88 - mov %50:l, qword ptr [%48] ; @89 - mov qword ptr [%21 + 0x8], %50:l ; @91 - jmp @61 ; @97 + mov %49.1:l, qword ptr [%47.1] ; @87 + mov qword ptr [%52], %49.1:l ; @88 + mov %50.1:l, qword ptr [%48.1] ; @89 + jmp @61 ; @100 @61: - mov %22:l, qword ptr [%21] ; @47 - mov qword ptr [%r], %22:l ; @48 - mov %24:l, qword ptr [%21 + 0x8] ; @50 - mov qword ptr [%r + 0x8], %24:l ; @52 - movsd %f1.1:d, qword ptr [%r] ; @6 - movsd %0:d, %f1.1:d ; @13 - addsd %0:d, qword ptr [rip + @94] ; @95 - .fp64 @94, 1.234 ; @96 - cvtsd2si %2:l, %0:d ; @15 - lea %3:l, qword ptr [%24 + %2*1] ; @16 - lea %4:l, qword ptr [%3 + %i*1] ; @17 - mov rax, %4:l ; @92 - ret ; @93 + movsd %f1.1:d, qword ptr [%52] ; @6 + movsd %0.1:d, %f1.1:d ; @13 + addsd %0.1:d, qword ptr [rip + @97] ; @98 + .fp64 @97, 1.234 ; @99 + cvttsd2si %2.1:l, %0.1:d ; @15 + lea %3.1:l, qword ptr [%50.1 + %2.1*1] ; @16 + lea %4.1:l, qword ptr [%3.1 + %i*1] ; @17 + mov rax, %4.1:l ; @95 + ret ; @96 } diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc index 0815c0cb..eeafde73 100644 --- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc @@ -2,69 +2,51 @@ module vaarg1 export function $foo { ; returns: rax @19: - push rbp ; @124 - mov rbp, rsp ; @125 - sub rsp, 0xf0_l ; @126 - mov qword ptr [rbp - 0xe8], rsi ; @21 - mov qword ptr [rbp - 0xe0], rdx ; @23 - mov qword ptr [rbp - 0xd8], rcx ; @25 - mov qword ptr [rbp - 0xd0], r8 ; @27 - mov qword ptr [rbp - 0xc8], r9 ; @29 - test al, al ; @116 - je @2 ; @117 + push rbp ; @122 + mov rbp, rsp ; @123 + sub rsp, 0xc0_l ; @124 + mov qword ptr [rbp - 0xb8], rsi ; @21 + mov qword ptr [rbp - 0xb0], rdx ; @23 + mov qword ptr [rbp - 0xa8], rcx ; @25 + mov qword ptr [rbp - 0xa0], r8 ; @27 + mov qword ptr [rbp - 0x98], r9 ; @29 + test al, al ; @115 + je @2 ; @116 @18: - movdqa xmmword ptr [rbp - 0xc0], xmm0 ; @32 - movdqa xmmword ptr [rbp - 0xb0], xmm1 ; @34 - movdqa xmmword ptr [rbp - 0xa0], xmm2 ; @36 - movdqa xmmword ptr [rbp - 0x90], xmm3 ; @38 - movdqa xmmword ptr [rbp - 0x80], xmm4 ; @40 - movdqa xmmword ptr [rbp - 0x70], xmm5 ; @42 - movdqa xmmword ptr [rbp - 0x60], xmm6 ; @44 - movdqa xmmword ptr [rbp - 0x50], xmm7 ; @46 + movdqa xmmword ptr [rbp - 0x90], xmm0 ; @32 + movdqa xmmword ptr [rbp - 0x80], xmm1 ; @34 + movdqa xmmword ptr [rbp - 0x70], xmm2 ; @36 + movdqa xmmword ptr [rbp - 0x60], xmm3 ; @38 + movdqa xmmword ptr [rbp - 0x50], xmm4 ; @40 + movdqa xmmword ptr [rbp - 0x40], xmm5 ; @42 + movdqa xmmword ptr [rbp - 0x30], xmm6 ; @44 + movdqa xmmword ptr [rbp - 0x20], xmm7 ; @46 @2: - mov dword ptr [rbp - 0x40], 0x8_w ; @53 - mov dword ptr [rbp - 0x3c], 0x30_w ; @55 + mov dword ptr [rbp - 0x10], 0x8_w ; @53 + mov dword ptr [rbp - 0xc], 0x30_w ; @55 lea rax, qword ptr [rbp + 0x10] ; @56 - mov qword ptr [rbp - 0x38], rax ; @58 - lea rax, qword ptr [rbp - 0xf0] ; @60 - mov qword ptr [rbp - 0x30], rax ; @123 -@62: - mov esi, dword ptr [rbp - 0x3c] ; @68 - cmp esi, 0xa0_w ; @109 - ja @65 ; @110 -@63: - mov edx, dword ptr [rbp - 0x40] ; @70 - cmp edx, 0x28_w ; @104 - jbe @64 ; @105 + mov ecx, dword ptr [rbp - 0x10] ; @70 + cmp ecx, 0x28_w ; @107 + jbe @64 ; @108 @65: - mov rcx, qword ptr [rbp - 0x38] ; @83 - lea rax, qword ptr [rcx + 0x8] ; @84 - lea rdx, qword ptr [rcx + 0x10] ; @85 - mov qword ptr [rbp - 0x38], rdx ; @86 - jmp @66 ; @103 + lea rdx, qword ptr [rax + 0x8] ; @84 + jmp @66 ; @106 @64: - lea rcx, qword ptr [rbp + rsi*1 - 0xf0] ; @75 - lea rax, qword ptr [rbp + rdx*1 - 0xf0] ; @77 - add esi, 0x10_w ; @78 - add edx, 0x8_w ; @79 - mov dword ptr [rbp - 0x3c], esi ; @80 - mov dword ptr [rbp - 0x40], edx ; @81 + lea rax, qword ptr [rbp - 0x90] ; @75 + lea rdx, qword ptr [rbp + rcx*1 - 0xc0] ; @77 + add ecx, 0x8_w ; @79 + mov dword ptr [rbp - 0xc], 0x40_w ; @80 + mov dword ptr [rbp - 0x10], ecx ; @81 @66: - mov rcx, qword ptr [rcx] ; @87 - mov qword ptr [rbp - 0x18], rcx ; @88 - mov rax, qword ptr [rax] ; @89 - mov qword ptr [rbp - 0x10], rax ; @91 -@61: - mov rax, qword ptr [rbp - 0x18] ; @47 - mov qword ptr [rbp - 0x28], rax ; @48 - mov rcx, qword ptr [rbp - 0x10] ; @50 - mov qword ptr [rbp - 0x20], rcx ; @52 - movsd xmm0, qword ptr [rbp - 0x28] ; @6 - addsd xmm0, qword ptr [rip + @94] ; @95 - .fp64 @94, 1.234 ; @96 - cvtsd2si rax, xmm0 ; @15 + mov rax, qword ptr [rax] ; @87 + mov qword ptr [rbp - 0x10], rax ; @88 + mov rcx, qword ptr [rdx] ; @89 + movsd xmm0, qword ptr [rbp - 0x10] ; @6 + addsd xmm0, qword ptr [rip + @97] ; @98 + .fp64 @97, 1.234 ; @99 + cvttsd2si rax, xmm0 ; @15 add rax, rcx ; @16 add rax, rdi ; @17 - leave ; @127 - ret ; @93 + leave ; @125 + ret ; @96 } diff --git a/src/test/data/opt/vaarg2.driver.sysv.amd64.c b/src/test/data/opt/vaarg2.driver.sysv.amd64.c new file mode 100644 index 00000000..54bb762d --- /dev/null +++ b/src/test/data/opt/vaarg2.driver.sysv.amd64.c @@ -0,0 +1,17 @@ +#include + +extern long foo(long i, ...); +extern char bar(char b, ...); /* NB: bar actually has no named arguments */ + +struct t { + long l1; + long l2; +}; + +int +main() { + struct t s = {10, 32}; + assert(foo(5, s) == 47); + assert(!bar(0)); + assert(bar(1)); +} diff --git a/src/test/data/opt/vaarg2.vir.opt.sysv b/src/test/data/opt/vaarg2.vir.opt.sysv index c6f36e3d..5a781bc6 100644 --- a/src/test/data/opt/vaarg2.vir.opt.sysv +++ b/src/test/data/opt/vaarg2.vir.opt.sysv @@ -1,148 +1,112 @@ module vaarg2 export function $foo(b %9/rax, l %i/rdi, ...) { - %ap = slot 24, align 8 - %r = slot 16, align 8 %3 = slot 176, align 16 - %19 = slot 16, align 8 + %76 = slot 8, align 8 @18: - %4 = add.l %3, 0x8_l ; @19 - regstore rsi, %4 ; @20 - %5 = add.l %3, 0x10_l ; @21 - regstore rdx, %5 ; @22 - %6 = add.l %3, 0x18_l ; @23 - regstore rcx, %6 ; @24 - %7 = add.l %3, 0x20_l ; @25 - regstore r8, %7 ; @26 - %8 = add.l %3, 0x28_l ; @27 - regstore r9, %8 ; @28 - %10 = eq.b %9, 0x0_b ; @29 - br %10, @2, @17 + %4.1 = add.l %3, 0x8_l ; @19 + regstore rsi, %4.1 ; @20 + %5.1 = add.l %3, 0x10_l ; @21 + regstore rdx, %5.1 ; @22 + %6.1 = add.l %3, 0x18_l ; @23 + regstore rcx, %6.1 ; @24 + %7.1 = add.l %3, 0x20_l ; @25 + regstore r8, %7.1 ; @26 + %8.1 = add.l %3, 0x28_l ; @27 + regstore r9, %8.1 ; @28 + %10.1 = eq.b %9, 0x0_b ; @29 + br %10.1, @2, @17 @17: - %11 = add.l %3, 0x30_l ; @30 - regstore xmm0, %11 ; @31 - %12 = add.l %3, 0x40_l ; @32 - regstore xmm1, %12 ; @33 - %13 = add.l %3, 0x50_l ; @34 - regstore xmm2, %13 ; @35 - %14 = add.l %3, 0x60_l ; @36 - regstore xmm3, %14 ; @37 - %15 = add.l %3, 0x70_l ; @38 - regstore xmm4, %15 ; @39 - %16 = add.l %3, 0x80_l ; @40 - regstore xmm5, %16 ; @41 - %17 = add.l %3, 0x90_l ; @42 - regstore xmm6, %17 ; @43 - %18 = add.l %3, 0xa0_l ; @44 - regstore xmm7, %18 ; @45 + %11.1 = add.l %3, 0x30_l ; @30 + regstore xmm0, %11.1 ; @31 + %12.1 = add.l %3, 0x40_l ; @32 + regstore xmm1, %12.1 ; @33 + %13.1 = add.l %3, 0x50_l ; @34 + regstore xmm2, %13.1 ; @35 + %14.1 = add.l %3, 0x60_l ; @36 + regstore xmm3, %14.1 ; @37 + %15.1 = add.l %3, 0x70_l ; @38 + regstore xmm4, %15.1 ; @39 + %16.1 = add.l %3, 0x80_l ; @40 + regstore xmm5, %16.1 ; @41 + %17.1 = add.l %3, 0x90_l ; @42 + regstore xmm6, %17.1 ; @43 + %18.1 = add.l %3, 0xa0_l ; @44 + regstore xmm7, %18.1 ; @45 jmp @2 @2: - st.w 0x8_w, %ap ; @52 - %24 = add.l %ap, 0x4_l ; @53 - st.w 0x30_w, %24 ; @54 - %25 = stkargs ; @55 - %26 = add.l %ap, 0x8_l ; @56 - st.l %25, %26 ; @57 - %27 = add.l %ap, 0x10_l ; @58 - st.l %3, %27 ; @59 + st.w 0x8_w, %76 ; @52 + %79.1 = add.l %76, 0x4_l ; @139 + st.w 0x30_w, %79.1 ; @54 + %25.1 = stkargs ; @55 jmp @61 @61: - %28 = ld.w %ap ; @65 - %29 = le.w %28, 0x20_w ; @66 - br %29, @62, @63 + %28.1 = ld.w %76 ; @65 + %29.1 = le.w %28.1, 0x20_w ; @66 + br %29.1, @62, @63 @62: - %32 = zext.l %28 ; @69 - %33 = add.l %3, %32 ; @70 - %34 = add.w %28, 0x10_w ; @71 - st.w %34, %ap ; @72 - jmp @64(%33) + %32.1 = zext.l %28.1 ; @69 + %33.1 = add.l %3, %32.1 ; @70 + %34.1 = add.w %28.1, 0x10_w ; @71 + st.w %34.1, %76 ; @72 + jmp @64(%33.1) @63: - %36 = ld.l %26 ; @74 - %37 = add.l %36, 0x10_l ; @75 - st.l %37, %26 ; @76 - jmp @64(%36) -@64(%38): - %39 = ld.l %38 ; @77 - st.l %39, %19 ; @78 - %40 = add.l %38, 0x8_l ; @79 - %41 = ld.l %40 ; @80 - %42 = add.l %19, 0x8_l ; @81 - st.l %41, %42 ; @82 + jmp @64(%25.1) +@64(%38.1): + %39.1 = ld.l %38.1 ; @77 + %40.1 = add.l %38.1, 0x8_l ; @79 + %41.1 = ld.l %40.1 ; @80 jmp @60 @60: - %20 = ld.l %19 ; @46 - st.l %20, %r ; @47 - %22 = ld.l %42 ; @49 - %23 = add.l %r, 0x8_l ; @50 - st.l %22, %23 ; @51 - %f1.1 = ld.l %r ; @6 - %1 = add.l %f1.1, %22 ; @15 - %2 = add.l %1, %i ; @16 - ret rax/%2 + st.l %39.1, %76 ; @47 + %1.1 = add.l %39.1, %41.1 ; @15 + %2.1 = add.l %1.1, %i ; @16 + ret rax/%2.1 } export function $bar(b %49/rax, ...) { - %ap = slot 24, align 8 %43 = slot 176, align 16 @84: regstore rdi, %43 ; @85 - %44 = add.l %43, 0x8_l ; @86 - regstore rsi, %44 ; @87 - %45 = add.l %43, 0x10_l ; @88 - regstore rdx, %45 ; @89 - %46 = add.l %43, 0x18_l ; @90 - regstore rcx, %46 ; @91 - %47 = add.l %43, 0x20_l ; @92 - regstore r8, %47 ; @93 - %48 = add.l %43, 0x28_l ; @94 - regstore r9, %48 ; @95 - %50 = eq.b %49, 0x0_b ; @96 - br %50, @11, @83 + %44.1 = add.l %43, 0x8_l ; @86 + regstore rsi, %44.1 ; @87 + %45.1 = add.l %43, 0x10_l ; @88 + regstore rdx, %45.1 ; @89 + %46.1 = add.l %43, 0x18_l ; @90 + regstore rcx, %46.1 ; @91 + %47.1 = add.l %43, 0x20_l ; @92 + regstore r8, %47.1 ; @93 + %48.1 = add.l %43, 0x28_l ; @94 + regstore r9, %48.1 ; @95 + %50.1 = eq.b %49, 0x0_b ; @96 + br %50.1, @11, @83 @83: - %51 = add.l %43, 0x30_l ; @97 - regstore xmm0, %51 ; @98 - %52 = add.l %43, 0x40_l ; @99 - regstore xmm1, %52 ; @100 - %53 = add.l %43, 0x50_l ; @101 - regstore xmm2, %53 ; @102 - %54 = add.l %43, 0x60_l ; @103 - regstore xmm3, %54 ; @104 - %55 = add.l %43, 0x70_l ; @105 - regstore xmm4, %55 ; @106 - %56 = add.l %43, 0x80_l ; @107 - regstore xmm5, %56 ; @108 - %57 = add.l %43, 0x90_l ; @109 - regstore xmm6, %57 ; @110 - %58 = add.l %43, 0xa0_l ; @111 - regstore xmm7, %58 ; @112 + %51.1 = add.l %43, 0x30_l ; @97 + regstore xmm0, %51.1 ; @98 + %52.1 = add.l %43, 0x40_l ; @99 + regstore xmm1, %52.1 ; @100 + %53.1 = add.l %43, 0x50_l ; @101 + regstore xmm2, %53.1 ; @102 + %54.1 = add.l %43, 0x60_l ; @103 + regstore xmm3, %54.1 ; @104 + %55.1 = add.l %43, 0x70_l ; @105 + regstore xmm4, %55.1 ; @106 + %56.1 = add.l %43, 0x80_l ; @107 + regstore xmm5, %56.1 ; @108 + %57.1 = add.l %43, 0x90_l ; @109 + regstore xmm6, %57.1 ; @110 + %58.1 = add.l %43, 0xa0_l ; @111 + regstore xmm7, %58.1 ; @112 jmp @11 @11: - st.w 0x0_w, %ap ; @113 - %59 = add.l %ap, 0x4_l ; @114 - st.w 0x30_w, %59 ; @115 - %60 = stkargs ; @116 - %61 = add.l %ap, 0x8_l ; @117 - st.l %60, %61 ; @118 - %62 = add.l %ap, 0x10_l ; @119 - st.l %43, %62 ; @120 jmp @122 @122: - %63 = ld.w %ap ; @126 - %64 = le.w %63, 0x28_w ; @127 - br %64, @123, @124 + jmp @123 @123: - %67 = zext.l %63 ; @130 - %68 = add.l %43, %67 ; @131 - %69 = add.w %63, 0x8_w ; @132 - st.w %69, %ap ; @133 - jmp @125(%68) -@124: - %71 = ld.l %61 ; @135 - %72 = add.l %71, 0x8_l ; @136 - st.l %72, %61 ; @137 - jmp @125(%71) -@125(%73): - %t.1 = ld.b %73 ; @138 + %68.1 = add.l %43, 0x0_l ; @131 + jmp @125 +@125: + %t.1 = ld.b %68.1 ; @138 jmp @121 @121: ret rax/%t.1 diff --git a/src/test/data/opt/vasum.driver.sysv.amd64.c b/src/test/data/opt/vasum.driver.sysv.amd64.c index 74579f54..e2ee8244 100644 --- a/src/test/data/opt/vasum.driver.sysv.amd64.c +++ b/src/test/data/opt/vasum.driver.sysv.amd64.c @@ -2,6 +2,7 @@ extern int twenty_eight(void); -int main() { +int +main() { assert(twenty_eight() == 28); } diff --git a/src/test/data/opt/vasum.vir.opt.sysv b/src/test/data/opt/vasum.vir.opt.sysv index 1183d6fd..3406b829 100644 --- a/src/test/data/opt/vasum.vir.opt.sysv +++ b/src/test/data/opt/vasum.vir.opt.sysv @@ -1,79 +1,66 @@ module vasum export function $sum(b %9/rax, w %n/rdi, ...) { - %ap = slot 24, align 8 %3 = slot 176, align 16 @19: - %4 = add.l %3, 0x8_l ; @20 - regstore rsi, %4 ; @21 - %5 = add.l %3, 0x10_l ; @22 - regstore rdx, %5 ; @23 - %6 = add.l %3, 0x18_l ; @24 - regstore rcx, %6 ; @25 - %7 = add.l %3, 0x20_l ; @26 - regstore r8, %7 ; @27 - %8 = add.l %3, 0x28_l ; @28 - regstore r9, %8 ; @29 - %10 = eq.b %9, 0x0_b ; @30 - br %10, @2, @18 + %4.1 = add.l %3, 0x8_l ; @20 + regstore rsi, %4.1 ; @21 + %5.1 = add.l %3, 0x10_l ; @22 + regstore rdx, %5.1 ; @23 + %6.1 = add.l %3, 0x18_l ; @24 + regstore rcx, %6.1 ; @25 + %7.1 = add.l %3, 0x20_l ; @26 + regstore r8, %7.1 ; @27 + %8.1 = add.l %3, 0x28_l ; @28 + regstore r9, %8.1 ; @29 + %10.1 = eq.b %9, 0x0_b ; @30 + br %10.1, @2, @18 @18: - %11 = add.l %3, 0x30_l ; @31 - regstore xmm0, %11 ; @32 - %12 = add.l %3, 0x40_l ; @33 - regstore xmm1, %12 ; @34 - %13 = add.l %3, 0x50_l ; @35 - regstore xmm2, %13 ; @36 - %14 = add.l %3, 0x60_l ; @37 - regstore xmm3, %14 ; @38 - %15 = add.l %3, 0x70_l ; @39 - regstore xmm4, %15 ; @40 - %16 = add.l %3, 0x80_l ; @41 - regstore xmm5, %16 ; @42 - %17 = add.l %3, 0x90_l ; @43 - regstore xmm6, %17 ; @44 - %18 = add.l %3, 0xa0_l ; @45 - regstore xmm7, %18 ; @46 + %11.1 = add.l %3, 0x30_l ; @31 + regstore xmm0, %11.1 ; @32 + %12.1 = add.l %3, 0x40_l ; @33 + regstore xmm1, %12.1 ; @34 + %13.1 = add.l %3, 0x50_l ; @35 + regstore xmm2, %13.1 ; @36 + %14.1 = add.l %3, 0x60_l ; @37 + regstore xmm3, %14.1 ; @38 + %15.1 = add.l %3, 0x70_l ; @39 + regstore xmm4, %15.1 ; @40 + %16.1 = add.l %3, 0x80_l ; @41 + regstore xmm5, %16.1 ; @42 + %17.1 = add.l %3, 0x90_l ; @43 + regstore xmm6, %17.1 ; @44 + %18.1 = add.l %3, 0xa0_l ; @45 + regstore xmm7, %18.1 ; @46 jmp @2 @2: - st.w 0x8_w, %ap ; @47 - %19 = add.l %ap, 0x4_l ; @48 - st.w 0x30_w, %19 ; @49 - %20 = stkargs ; @50 - %21 = add.l %ap, 0x8_l ; @51 - st.l %20, %21 ; @52 - %22 = add.l %ap, 0x10_l ; @53 - st.l %3, %22 ; @54 - jmp @3(0x0_w, 0x0_w) -@3(%x.2, %i.2): - %0 = lt.w %i.2, %n ; @15 - br %0, @7, @8 + %20.1 = stkargs ; @50 + jmp @3(%20.1, 0x8_w, 0x0_w, 0x0_w) +@3(%36.2, %34.2, %x.1, %i.1): + %0.1 = lt.w %i.1, %n ; @15 + br %0.1, @7, @8 @7: jmp @56 @56: - %23 = ld.w %ap ; @60 - %24 = le.w %23, 0x28_w ; @61 - br %24, @57, @58 + %24.1 = le.w %34.2, 0x28_w ; @61 + br %24.1, @57, @58 @57: - %26 = ld.l %22 ; @63 - %27 = zext.l %23 ; @64 - %28 = add.l %26, %27 ; @65 - %29 = add.w %23, 0x8_w ; @66 - st.w %29, %ap ; @67 - jmp @59(%28) + %27.1 = zext.l %34.2 ; @64 + %28.1 = add.l %3, %27.1 ; @65 + %29.1 = add.w %34.2, 0x8_w ; @66 + jmp @59(%36.2, %29.1, %28.1) @58: - %31 = ld.l %21 ; @69 - %32 = add.l %31, 0x8_l ; @70 - st.l %32, %21 ; @71 - jmp @59(%31) -@59(%33): - %y.1 = ld.w %33 ; @72 + %32.1 = add.l %36.2, 0x8_l ; @70 + jmp @59(%32.1, %34.2, %36.2) +@59(%36.3, %34.3, %33.1): + %y.1 = ld.w %33.1 ; @72 jmp @55 @55: - %1 = add.w %x.2, %y.1 ; @16 - %2 = add.w %i.2, 0x1_w ; @17 - jmp @3(%1, %2) + %1.1 = add.w %x.1, %y.1 ; @16 + %2.1 = add.w %i.1, 0x1_w ; @17 + jmp @3(%36.3, %34.3, %1.1, %2.1) @8: - ret rax/%x.2 + ret rax/%x.1 } export function $twenty_eight() { diff --git a/src/test/data/opt/vasum.vir.opt.sysv.amd64 b/src/test/data/opt/vasum.vir.opt.sysv.amd64 index 887fe9c4..cd1095a8 100644 --- a/src/test/data/opt/vasum.vir.opt.sysv.amd64 +++ b/src/test/data/opt/vasum.vir.opt.sysv.amd64 @@ -1,19 +1,18 @@ module vasum export function $sum { ; returns: rax - %ap = slot 24, align 8 %3 = slot 176, align 16 @19: mov %9:b, al ; @20 - mov %n:w, edi ; @103 + mov %n:w, edi ; @111 mov qword ptr [%3 + 0x8], rsi ; @21 mov qword ptr [%3 + 0x10], rdx ; @23 mov qword ptr [%3 + 0x18], rcx ; @25 mov qword ptr [%3 + 0x20], r8 ; @27 mov qword ptr [%3 + 0x28], r9 ; @29 - test %9:b, %9:b ; @98 - je @2 ; @99 - jmp @18 ; @100 + test %9:b, %9:b ; @106 + je @2 ; @107 + jmp @18 ; @108 @18: movdqa xmmword ptr [%3 + 0x30], xmm0 ; @32 movdqa xmmword ptr [%3 + 0x40], xmm1 ; @34 @@ -23,70 +22,69 @@ export function $sum { ; returns: rax movdqa xmmword ptr [%3 + 0x80], xmm5 ; @42 movdqa xmmword ptr [%3 + 0x90], xmm6 ; @44 movdqa xmmword ptr [%3 + 0xa0], xmm7 ; @46 - jmp @2 ; @97 + jmp @2 ; @105 @2: - mov dword ptr [%ap], 0x8_w ; @47 - mov dword ptr [%ap + 0x4], 0x30_w ; @49 - lea %20:l, qword ptr [rbp + 0x10] ; @50 - mov qword ptr [%ap + 0x8], %20:l ; @52 - mov qword ptr [%ap + 0x10], %3:l ; @54 - xor %x.2:w, %x.2:w ; @94 - xor %i.2:w, %i.2:w ; @95 - jmp @3 ; @96 + lea %20.1:l, qword ptr [rbp + 0x10] ; @50 + mov %36.2:l, %20.1:l ; @100 + mov %34.2:w, 0x8_w ; @101 + xor %x.1:w, %x.1:w ; @102 + xor %i.1:w, %i.1:w ; @103 + jmp @3 ; @104 @3: - cmp %i.2:w, %n:w ; @89 - jb @7 ; @90 - jmp @8 ; @91 + cmp %i.1:w, %n:w ; @95 + jb @7 ; @96 + jmp @8 ; @97 @8: - mov eax, %x.2:w ; @87 - ret ; @88 + mov eax, %x.1:w ; @93 + ret ; @94 @7: - jmp @56 ; @86 + jmp @56 ; @92 @56: - mov %23:w, dword ptr [%ap] ; @60 - cmp %23:w, 0x28_w ; @81 - jbe @57 ; @82 - jmp @58 ; @83 + cmp %34.2:w, 0x28_w ; @87 + jbe @57 ; @88 + jmp @58 ; @89 @58: - mov %31:l, qword ptr [%ap + 0x8] ; @69 - lea %32:l, qword ptr [%31 + 0x8] ; @70 - mov qword ptr [%ap + 0x8], %32:l ; @71 - mov %33:l, %31:l ; @79 - jmp @59 ; @80 + lea %32.1:l, qword ptr [%36.2 + 0x8] ; @70 + mov %36.3:l, %32.1:l ; @83 + mov %34.3:w, %34.2:w ; @84 + mov %33.1:l, %36.2:l ; @85 + jmp @59 ; @86 @57: - mov %26:l, qword ptr [%ap + 0x10] ; @63 - mov %27:w, %23:w ; @64 - lea %28:l, qword ptr [%26 + %27*1] ; @65 - lea %29:w, qword ptr [%23 + 0x8] ; @66 - mov dword ptr [%ap], %29:w ; @67 - mov %33:l, %28:l ; @77 - jmp @59 ; @78 + mov %27.1:w, %34.2:w ; @64 + lea %28.1:l, qword ptr [%3 + %27.1*1] ; @65 + lea %29.1:w, qword ptr [%34.2 + 0x8] ; @66 + mov %36.3:l, %36.2:l ; @79 + mov %34.3:w, %29.1:w ; @80 + mov %33.1:l, %28.1:l ; @81 + jmp @59 ; @82 @59: - mov %y.1:w, dword ptr [%33] ; @72 - jmp @55 ; @76 + mov %y.1:w, dword ptr [%33.1] ; @72 + jmp @55 ; @78 @55: - lea %1:w, qword ptr [%x.2 + %y.1*1] ; @16 - lea %2:w, qword ptr [%i.2 + 0x1] ; @17 - mov %x.2:w, %1:w ; @73 - mov %i.2:w, %2:w ; @74 - jmp @3 ; @75 + lea %1.1:w, qword ptr [%x.1 + %y.1*1] ; @16 + lea %2.1:w, qword ptr [%i.1 + 0x1] ; @17 + mov %36.2:l, %36.3:l ; @73 + mov %34.2:w, %34.3:w ; @74 + mov %x.1:w, %1.1:w ; @75 + mov %i.1:w, %2.1:w ; @76 + jmp @3 ; @77 } export function $twenty_eight { ; returns: rax @13: mov edi, 0x7_w ; @14 - mov esi, 0x1_w ; @107 - mov edx, 0x2_w ; @108 - mov ecx, 0x3_w ; @109 - mov r8d, 0x4_w ; @110 - mov r9d, 0x5_w ; @111 - sub rsp, 0x10_l ; @112 - mov dword ptr [rsp], 0x6_w ; @113 - mov dword ptr [rsp + 0x8], 0x7_w ; @114 - xor al, al ; @115 - call $sum ; r8 r9 rax rcx rdi rdx rsi ; @116 - add rsp, 0x10_l ; @117 - mov %x.1:w, eax ; @118 - mov eax, %x.1:w ; @105 - ret ; @106 + mov esi, 0x1_w ; @115 + mov edx, 0x2_w ; @116 + mov ecx, 0x3_w ; @117 + mov r8d, 0x4_w ; @118 + mov r9d, 0x5_w ; @119 + sub rsp, 0x10_l ; @120 + mov dword ptr [rsp], 0x6_w ; @121 + mov dword ptr [rsp + 0x8], 0x7_w ; @122 + xor al, al ; @123 + call $sum ; r8 r9 rax rcx rdi rdx rsi ; @124 + add rsp, 0x10_l ; @125 + mov %x.1:w, eax ; @126 + mov eax, %x.1:w ; @113 + ret ; @114 } diff --git a/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc new file mode 100644 index 00000000..7e857eef --- /dev/null +++ b/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc @@ -0,0 +1,72 @@ +module vasum + +export function $sum { ; returns: rax +@19: + push rbp ; @127 + mov rbp, rsp ; @128 + sub rsp, 0xc0_l ; @129 + mov r10d, edi ; @111 + mov qword ptr [rbp - 0xa8], rsi ; @21 + mov qword ptr [rbp - 0xa0], rdx ; @23 + mov qword ptr [rbp - 0x98], rcx ; @25 + mov qword ptr [rbp - 0x90], r8 ; @27 + mov qword ptr [rbp - 0x88], r9 ; @29 + test al, al ; @106 + je @2 ; @107 +@18: + movdqa xmmword ptr [rbp - 0x80], xmm0 ; @32 + movdqa xmmword ptr [rbp - 0x70], xmm1 ; @34 + movdqa xmmword ptr [rbp - 0x60], xmm2 ; @36 + movdqa xmmword ptr [rbp - 0x50], xmm3 ; @38 + movdqa xmmword ptr [rbp - 0x40], xmm4 ; @40 + movdqa xmmword ptr [rbp - 0x30], xmm5 ; @42 + movdqa xmmword ptr [rbp - 0x20], xmm6 ; @44 + movdqa xmmword ptr [rbp - 0x10], xmm7 ; @46 +@2: + lea rax, qword ptr [rbp + 0x10] ; @50 + mov r8d, 0x8_w ; @101 + xor edx, edx ; @102 + xor ecx, ecx ; @103 +@3: + cmp ecx, r10d ; @95 + jb @56 ; @96 +@8: + mov eax, edx ; @93 + leave ; @130 + ret ; @94 +@56: + cmp r8d, 0x28_w ; @87 + jbe @57 ; @88 +@58: + lea rsi, qword ptr [rax + 0x8] ; @70 + mov rdi, rax ; @85 + jmp @59 ; @86 +@57: + lea rdi, qword ptr [rbp + r8*1 - 0xb0] ; @65 + add r8d, 0x8_w ; @66 + mov rsi, rax ; @79 +@59: + add edx, dword ptr [rdi] ; @16 + inc ecx ; @17 + mov rax, rsi ; @73 + jmp @3 ; @77 +} + +export function $twenty_eight { ; returns: rax +@13: + push rbp ; @131 + mov rbp, rsp ; @132 + mov edi, 0x7_w ; @14 + mov esi, 0x1_w ; @115 + mov edx, 0x2_w ; @116 + mov ecx, 0x3_w ; @117 + mov r8d, 0x4_w ; @118 + mov r9d, 0x5_w ; @119 + sub rsp, 0x10_l ; @120 + mov dword ptr [rsp], 0x6_w ; @121 + mov dword ptr [rsp + 0x8], 0x7_w ; @122 + xor al, al ; @123 + call $sum ; r8 r9 rax rcx rdi rdx rsi ; @124 + leave ; @133 + ret ; @114 +} diff --git a/src/test/dune b/src/test/dune index c4688699..a2d5bace 100644 --- a/src/test/dune +++ b/src/test/dune @@ -1,6 +1,34 @@ (tests - (names test_bv_interval test_opt test_type) + (names test_allen test_opt test_type) (libraries ocamldiff ounit2 cgen shexp.process) + (modules test_allen test_opt test_type) (ocamlopt_flags -O2) (deps (glob_files_rec data/*))) + +(library + (name test_float32) + (modules test_float32) + (libraries cgen) + (ocamlopt_flags -O2) + (inline_tests) + (preprocess + (pps ppx_jane))) + +(library + (name test_bv_interval) + (modules test_bv_interval) + (libraries cgen) + (ocamlopt_flags -O2) + (inline_tests) + (preprocess + (pps ppx_jane))) + +(library + (name test_bitset) + (modules test_bitset) + (libraries cgen) + (ocamlopt_flags -O2) + (inline_tests) + (preprocess + (pps ppx_jane))) diff --git a/src/test/test_allen.ml b/src/test/test_allen.ml new file mode 100644 index 00000000..99727129 --- /dev/null +++ b/src/test/test_allen.ml @@ -0,0 +1,47 @@ +open Core +open OUnit2 +open Cgen +open Allen_interval_algebra + +type range = { + lo : int; + hi : int; +} + +let (--) lo hi = {lo; hi} + +module A = Make(struct + type point = int + type t = range + let lo t = t.lo + let hi t = t.hi + include Int.Replace_polymorphic_compare + end) + +let printer = Format.asprintf "%a" pp + +let test expect a b _ = + let got = A.relate a b in + assert_equal ~printer expect got; + let expect' = converse expect in + let got' = A.relate b a in + assert_equal ~msg:"Converse" ~printer expect' got' + +let suite = + "Allen interval algebra" >::: [ + "before" >:: test Before (1--3) (5--7); + "meets" >:: test Meets (1--3) (3--8); + "overlaps" >:: test Overlaps (1--5) (3--8); + "finished_by" >:: test Finished_by (1--10) (4--10); + "contains" >:: test Contains (1--10) (3--7); + "starts" >:: test Starts (1--3) (1--10); + "equal" >:: test Equal (2--7) (2--7); + "started_by" >:: test Started_by (1--10) (1--3); + "during" >:: test During (4--6) (1--10); + "finishes" >:: test Finishes (4--10) (1--10); + "overlapped_by" >:: test Overlapped_by (5--10) (1--7); + "met_by" >:: test Met_by (10--15) (5--10); + "after" >:: test After (10--15) (1--7); + ] + +let () = run_test_tt_main suite diff --git a/src/test/test_bitset.ml b/src/test/test_bitset.ml new file mode 100644 index 00000000..a7121fc3 --- /dev/null +++ b/src/test/test_bitset.ml @@ -0,0 +1,112 @@ +open Core +open Cgen + +module Seq = Sequence +module B = Bitset.Id +module Q = Quickcheck +module G = Q.Generator +module S = Q.Shrinker +module O = Q.Observer +module T = Base_quickcheck.Test + +(* Keys must be positive, but if we include really large ones + in our tests then we might OOM. *) +let key_ok i = i >= 0 && i <= 2000 +let gen_key = Int.quickcheck_generator |> G.filter ~f:key_ok + +let gen_set = + G.(list (tuple2 gen_key bool)) |> G.map ~f:(fun bits -> + List.fold bits ~init:B.empty ~f:(fun acc (k, on) -> + if on then B.set acc k else acc)) + +let shr_set = S.create @@ fun s -> + B.enum s |> Seq.map ~f:(B.clear s) + +let obs_set = O.unmap [%observer : int list] + ~f:(Fn.compose Seq.to_list B.enum) + +module Single = struct + type t = B.t [@@deriving sexp] + let quickcheck_generator = gen_set + let quickcheck_observer = obs_set + let quickcheck_shrinker = shr_set +end + +module Pair = struct + type t = B.t * B.t [@@deriving sexp] + let quickcheck_generator = G.tuple2 gen_set gen_set + let quickcheck_observer = O.tuple2 obs_set obs_set + let quickcheck_shrinker = S.tuple2 shr_set shr_set +end + +module Triple = struct + type t = B.t * B.t * B.t [@@deriving sexp] + let quickcheck_generator = G.tuple3 gen_set gen_set gen_set + let quickcheck_observer = O.tuple3 obs_set obs_set obs_set + let quickcheck_shrinker = S.tuple3 shr_set shr_set shr_set +end + +module Key = struct + type t = int [@@deriving sexp] + let quickcheck_generator = gen_key + let quickcheck_observer = [%observer : int] + let quickcheck_shrinker = S.filter Int.quickcheck_shrinker ~f:key_ok +end + +module With_key = struct + type t = B.t * int [@@deriving sexp] + let quickcheck_generator = G.tuple2 gen_set gen_key + let quickcheck_observer = O.tuple2 obs_set [%observer : int] + let quickcheck_shrinker = S.tuple2 shr_set [%shrinker : int] +end + +let%test_unit "union is commutative" = + T.run_exn (module Pair) ~f:(fun (a, b) -> + [%test_eq : B.t] (B.union a b) (B.union b a)) + +let%test_unit "union is associative" = + T.run_exn (module Triple) ~f:(fun (a, b, c) -> + [%test_eq : B.t] + (B.union a (B.union b c)) + (B.union (B.union a b) c)) + +let%test_unit "inter is commutative" = + T.run_exn (module Pair) ~f:(fun (a, b) -> + [%test_eq : B.t] (B.inter a b) (B.inter b a)) + +let%test_unit "inter is associative" = + T.run_exn (module Triple) ~f:(fun (a, b, c) -> + [%test_eq : B.t] + (B.inter a (B.inter b c)) + (B.inter (B.inter a b) c)) + +let%test_unit "set / mem consistency" = + T.run_exn (module With_key) ~f:(fun (s, k) -> + let s' = B.set s k in + assert (B.mem s' k)) + +let%test_unit "clear / mem consistency" = + T.run_exn (module With_key) ~f:(fun (s, k) -> + let s' = B.clear s k in + assert (not (B.mem s' k))) + +let%test_unit "singleton has exactly one element" = + T.run_exn (module Key) ~f:(fun k -> + [%test_result : int list] + (Seq.to_list (B.enum (B.singleton k))) + ~expect:[k]) + +let%test_unit "init n = [0..n-1]" = + T.run_exn (module Key) ~f:(fun n -> + [%test_result : int list] + (Seq.to_list (B.enum (B.init n))) + ~expect:(List.init n ~f:Fn.id)) + +let%test_unit "min_elt matches smallest element in enum" = + T.run_exn (module Single) ~f:(fun s -> + match B.min_elt s with + | None -> + [%test_eq : int list] (Seq.to_list (B.enum s)) [] + | Some k -> + let hd = List.hd_exn (Seq.to_list (B.enum s)) in + [%test_eq : int] k hd) diff --git a/src/test/test_bv_interval.ml b/src/test/test_bv_interval.ml index 1dcb6363..313817bc 100644 --- a/src/test/test_bv_interval.ml +++ b/src/test/test_bv_interval.ml @@ -1,28 +1,447 @@ open Core -open OUnit2 open Cgen module I = Bv_interval +module Q = Quickcheck -let expect x i = - let size = I.size i in - let i' = match x with - | `empty -> I.create_empty ~size - | `full -> I.create_full ~size - | `def (lo, hi) -> I.create ~lo ~hi ~size in - let msg = Format.asprintf "Expected %a, got %a" I.pp i' I.pp i in - assert_bool msg (I.equal i i') - -let test_1 _ = - let size = 32 in - let module B = (val Bv.modular size) in - let i1 = I.create ~lo:Bv.one ~hi:B.(int 5) ~size in - let i2 = I.create ~lo:B.(int 3) ~hi:B.(int 7) ~size in - expect (`def (B.int 1, B.int 7)) @@ I.union i1 i2; - expect (`def (B.int 3, B.int 5)) @@ I.intersect i1 i2 - -let suite = "Test BV intervals" >::: [ - "Test 1" >:: test_1; - ] - -let () = run_test_tt_main suite +let gen_size = + Q.Generator.small_positive_int |> + Q.Generator.filter ~f:(fun n -> n > 0 && n <= 64) + +let gen_small_size = + Q.Generator.small_positive_int |> + Q.Generator.filter ~f:(fun n -> n > 0 && n <= 8) + +let gen_bv size = + Q.Generator.map Int64.quickcheck_generator ~f:(fun x -> + Bv.(int64 x mod modulus size)) + +let gen_interval size = + Q.Generator.map (gen_bv size) ~f:(fun bv -> + I.create_single ~value:bv ~size) + +let gen_sized_interval = + Q.Generator.bind gen_size ~f:(fun size -> + gen_interval size |> Q.Generator.map + ~f:(fun t -> size, t)) + +let gen_interval_pair = + Q.Generator.bind gen_size ~f:(fun size -> + Q.Generator.both (gen_interval size) (gen_interval size) |> + Q.Generator.map ~f:(fun p -> size, p)) + +let gen_small_sized_interval = + Q.Generator.bind gen_small_size ~f:(fun size -> + gen_interval size |> Q.Generator.map + ~f:(fun t -> size, t)) + +let gen_small_interval_pair = + Q.Generator.bind gen_small_size ~f:(fun size -> + Q.Generator.both (gen_interval size) (gen_interval size) |> + Q.Generator.map ~f:(fun p -> size, p)) + +let all_bvs size = + List.init (1 lsl size) ~f:(fun n -> Bv.(int n mod modulus size)) + +let concrete_values size t = + all_bvs size |> List.filter ~f:(I.contains_value t) + +let oracle_sound_binop ~size ~interval_op ~concrete_op t1 t2 = + let vs1 = concrete_values size t1 in + let vs2 = concrete_values size t2 in + let results = List.bind vs1 ~f:(fun a -> + let ia = Bv.to_int a in + List.filter_map vs2 ~f:(fun b -> + let ib = Bv.to_int b in + concrete_op size ia ib |> + Option.map ~f:(fun n -> + Bv.(int n mod modulus size)))) in + List.is_empty results || + let r_int = interval_op t1 t2 in + List.for_all results ~f:(I.contains_value r_int) + +let oracle_sound_unop ~size ~interval_op ~concrete_op t = + let vs = concrete_values size t in + let results = List.filter_map vs ~f:(fun v -> + let i = Bv.to_int v in + concrete_op size i |> Option.map ~f:(fun r -> + Bv.(int r mod modulus size))) in + List.is_empty results || + let t_int = interval_op t in + List.for_all results ~f:(I.contains_value t_int) + +let prop_size_preserved (size, t) = Int.equal size (I.size t) + +let prop_bounds_valid (_size, t) = + let lo = I.lower t in + let hi = I.upper t in + I.is_full t || I.is_empty t || not (Bv.equal lo hi) + +let prop_empty_singleton_consistency (_size, t) = + not (I.is_empty t) || begin + Option.is_none (I.single_of t) && + not (I.is_single t) + end + +let prop_full_contains_all size = + let t = I.create_full ~size in + Q.test (gen_bv size) + ~trials:100 + ~f:(fun bv -> assert (I.contains_value t bv)); + true + +let prop_single_roundtrip size = + Q.test (gen_bv size) + ~trials:50 + ~f:(fun bv -> + let t = I.create_single ~value:bv ~size in + match I.single_of t with + | Some bv' -> assert (Bv.equal bv bv') + | None -> assert false); + true + +let prop_negative_nonnegative_disjoint (_size, t) = + not (I.is_all_negative t && I.is_all_non_negative t) + +let prop_binop_size_stable op (size, (t1, t2)) = + try + let r = op t1 t2 in + Int.equal (I.size r) size + with _ -> true + +let prop_singleton_contains (_size, t) = + match I.single_of t with + | Some v -> I.contains_value t v + | None -> true + +let prop_contains_implies_single_contains (_size, (t1, t2)) = + not (I.contains t1 t2) || match I.single_of t2 with + | None -> true + | Some v -> I.contains_value t1 v + +let prop_intersect_comm (_size, (t1, t2)) = + let r1 = I.intersect t1 t2 in + let r2 = I.intersect t2 t1 in + I.equal r1 r2 + +let prop_union_sound (_size, (t1, t2)) = + let u1 = I.union t1 t2 in + let u2 = I.union t2 t1 in + I.contains u1 t1 && + I.contains u1 t2 && + I.contains u2 t1 && + I.contains u2 t2 + +let prop_inverse_involutive (_size, t) = + I.equal (I.inverse (I.inverse t)) t + +let prop_extract_shrinks (size,t) = + if size > 1 then + let hi = size - 1 in + let lo = size / 2 in + let t' = I.extract t ~hi ~lo in + I.size t' = hi - lo + 1 + else true + +let prop_concat_size ((size1, t1), (size2, t2)) = + let t = I.concat t1 t2 in + Int.equal (I.size t) (size1 + size2) + +let mask_of_size size = (1 lsl size) - 1 +let add_mod size a b = (a + b) land mask_of_size size +let sub_mod size a b = (a - b) land mask_of_size size +let mul_mod size a b = (a * b) land mask_of_size size +let udiv_opt _size a b = if b = 0 then None else Some (a / b) +let urem_opt _size a b = if b = 0 then None else Some (a mod b) + +let to_signed size n = + let sign_bit = 1 lsl (size - 1) in + if (n land sign_bit) = 0 then n + else n - (1 lsl size) + +let sdiv_opt size a b = + if b = 0 then None else + let sa = to_signed size a in + let sb = to_signed size b in + if sb = 0 then None else + let q = sa / sb in + (* re-wrap as unsigned *) + let res = q land mask_of_size size in + Some res + +let srem_opt size a b = + if b = 0 then None else + let sa = to_signed size a in + let sb = to_signed size b in + if sb = 0 then None else + let m = Bv.modulus size in + let ba = Bv.(int sa mod m) in + let bb = Bv.(int sb mod m) in + let r = Bv.(srem ba bb mod m) in + Some (Bv.to_int r) + +let logand_mod _size a b = a land b +let logor_mod _size a b = a lor b +let logxor_mod _size a b = a lxor b + +let shl_opt size a b = + if b >= size then None else Some ((a lsl b) land mask_of_size size) + +let lshr_opt size a b = + if b >= size then None else Some (a lsr b) + +let ashr_opt size a b = + if b >= size then None else + let sa = to_signed size a in + let shifted = sa asr b in + Some (shifted land mask_of_size size) + +let popcount size a = + let a = a land mask_of_size size in + let rec loop x acc = if x = 0 then acc + else loop (x land (x - 1)) (acc + 1) in + loop a 0 + +let bv_clz x n = + let x = Bv.to_bigint x in + let rec aux i = + if i < 0 then n + else if Z.testbit x i then n - i - 1 + else aux (i - 1) in + aux (n - 1) + +let bv_ctz x n = + let x = Bv.to_bigint x in + let rec aux i = + if i >= n then n + else if Z.testbit x i then i + 1 + else aux (i + 1) in + aux 0 + +let clz size a = bv_clz Bv.(int a mod modulus size) size +let ctz size a = bv_ctz Bv.(int a mod modulus size) size + +let prop_add_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.add + ~concrete_op:(fun size a b -> Some (add_mod size a b)) + t1 t2 + +let prop_sub_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.sub + ~concrete_op:(fun size a b -> Some (sub_mod size a b)) + t1 t2 + +let prop_mul_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.mul + ~concrete_op:(fun size a b -> Some (mul_mod size a b)) + t1 t2 + +let prop_udiv_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.udiv + ~concrete_op:udiv_opt + t1 t2 + +let prop_urem_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.urem + ~concrete_op:urem_opt + t1 t2 + +let prop_sdiv_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.sdiv + ~concrete_op:sdiv_opt + t1 t2 + +let prop_srem_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.srem + ~concrete_op:srem_opt + t1 t2 + +let prop_logand_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logand + ~concrete_op:(fun size a b -> Some (logand_mod size a b)) + t1 t2 + +let prop_logor_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logor + ~concrete_op:(fun size a b -> Some (logor_mod size a b)) + t1 t2 + +let prop_logxor_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logxor + ~concrete_op:(fun size a b -> Some (logxor_mod size a b)) + t1 t2 + +let prop_lsl_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logical_shift_left + ~concrete_op:shl_opt + t1 t2 + +let prop_lsr_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.logical_shift_right + ~concrete_op:lshr_opt + t1 t2 + +let prop_asr_sound (size, (t1, t2)) = + oracle_sound_binop + ~size + ~interval_op:I.arithmetic_shift_right + ~concrete_op:ashr_opt + t1 t2 + +let prop_lnot_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:I.lnot + ~concrete_op:(fun size a -> Some ((lnot a) land mask_of_size size)) + t + +let prop_neg_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:I.neg + ~concrete_op:(fun size a -> Some ((0 - a) land ((1 lsl size) - 1))) + t + +let prop_clz_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:(I.clz ~zero_is_poison:false) + ~concrete_op:(fun size a -> Some (clz size a)) + t + +let prop_ctz_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:(I.ctz ~zero_is_poison:false) + ~concrete_op:(fun size a -> Some (ctz size a)) + t + +let prop_popcnt_sound (size, t) = + oracle_sound_unop + ~size + ~interval_op:I.popcnt + ~concrete_op:(fun size a -> Some (popcount size a)) + t + +let qc1 sexp name gen prop = + match + Q.test_or_error gen + ~seed:(`Deterministic name) + ~trials:3000 + ~f:(fun x -> if prop x then Ok () else + Or_error.error_s Sexp.(List [Atom "failed"; sexp x])) + with Ok () -> () | Error e -> Error.raise e + +let sexp_binop x = [%sexp (x : int * (I.t * I.t))] +let sexp_unop x = [%sexp (x : int * I.t)] + +let%test_unit "add preserves size" = + qc1 sexp_binop "add-size" gen_interval_pair (prop_binop_size_stable I.add) + +let%test_unit "sub preserves size" = + qc1 sexp_binop "sub-size" gen_interval_pair (prop_binop_size_stable I.sub) + +let%test_unit "mul preserves size" = + qc1 sexp_binop "mul-size" gen_interval_pair (prop_binop_size_stable I.mul) + +let%test_unit "size preserved" = + qc1 sexp_unop "size" gen_sized_interval prop_size_preserved + +let%test_unit "bounds valid" = + qc1 sexp_unop "bounds" gen_sized_interval prop_bounds_valid + +let%test_unit "singleton roundtrip" = + qc1 sexp_of_int "single" gen_size prop_single_roundtrip + +let%test_unit "empty singleton consistency" = + qc1 sexp_unop "empty-single" gen_sized_interval prop_empty_singleton_consistency + +let%test_unit "intersect commutative" = + qc1 sexp_binop "intersect-comm" gen_interval_pair prop_intersect_comm + +let%test_unit "union commutative" = + qc1 sexp_binop "union-comm" gen_interval_pair prop_union_sound + +let%test_unit "inverse involutive" = + qc1 sexp_unop "inverse" gen_sized_interval prop_inverse_involutive + +let%test_unit "contains → contains single" = + qc1 sexp_binop "contains-single" gen_interval_pair prop_contains_implies_single_contains + +let%test_unit "add sound" = + qc1 sexp_binop "bv-add" gen_small_interval_pair prop_add_sound + +let%test_unit "sub sound" = + qc1 sexp_binop "bv-sub" gen_small_interval_pair prop_sub_sound + +let%test_unit "mul sound" = + qc1 sexp_binop "bv-mul" gen_small_interval_pair prop_mul_sound + +let%test_unit "udiv sound" = + qc1 sexp_binop "bv-udiv" gen_small_interval_pair prop_udiv_sound + +let%test_unit "urem sound" = + qc1 sexp_binop "bv-urem" gen_small_interval_pair prop_urem_sound + +let%test_unit "sdiv sound" = + qc1 sexp_binop "bv-sdiv" gen_small_interval_pair prop_sdiv_sound + +let%test_unit "srem sound" = + qc1 sexp_binop "bv-srem" gen_small_interval_pair prop_srem_sound + +let%test_unit "logand sound" = + qc1 sexp_binop "bv-logand" gen_small_interval_pair prop_logand_sound + +let%test_unit "logor sound" = + qc1 sexp_binop "bv-logor" gen_small_interval_pair prop_logor_sound + +let%test_unit "logxor sound" = + qc1 sexp_binop "bv-logxor" gen_small_interval_pair prop_logxor_sound + +let%test_unit "lsl sound" = + qc1 sexp_binop "bv-lsl" gen_small_interval_pair prop_lsl_sound + +let%test_unit "lsr sound" = + qc1 sexp_binop "bv-lsr" gen_small_interval_pair prop_lsr_sound + +let%test_unit "asr sound" = + qc1 sexp_binop "bv-asr" gen_small_interval_pair prop_asr_sound + +let%test_unit "neg sound" = + qc1 sexp_unop "bv-neg" gen_small_sized_interval prop_neg_sound + +let%test_unit "lnot sound" = + qc1 sexp_unop "bv-lnot" gen_small_sized_interval prop_lnot_sound + +let%test_unit "clz sound" = + qc1 sexp_unop "bv-clz" gen_small_sized_interval prop_clz_sound + +let%test_unit "ctz sound" = + qc1 sexp_unop "bv-ctz" gen_small_sized_interval prop_ctz_sound + +let%test_unit "popcnt sound" = + qc1 sexp_unop "bv-popcnt" gen_small_sized_interval prop_popcnt_sound diff --git a/src/test/test_float32.ml b/src/test/test_float32.ml new file mode 100644 index 00000000..00051b95 --- /dev/null +++ b/src/test/test_float32.ml @@ -0,0 +1,136 @@ +open Core +open Cgen.Float32 + +module Q = Quickcheck + +let gen_float32 = Q.Generator.map Int32.quickcheck_generator ~f:of_bits +let gen_pair = Q.Generator.both gen_float32 gen_float32 + +let f64 x = to_float x +let near ?(eps = 1e-5) a b = Float.(abs (f64 a - f64 b) <= eps) + +let prop_add_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || + let r = add a b in + let e = of_float Float.(f64 a + f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_sub_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || + let r = sub a b in + let e = of_float Float.(f64 a - f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_mul_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || + let r = mul a b in + let e = of_float Float.(f64 a * f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_div_approx a b = + is_nan a || is_nan b || is_inf a || is_inf b || Float.(f64 b = 0.0) || + let r = div a b in + let e = of_float Float.(f64 a / f64 b) in + is_nan r || is_inf r || is_nan e || is_inf e || near r e + +let prop_neg_approx x = + let r = neg x in + let fx = to_float x in + let e = of_float Float.(-fx) in + if is_nan x then is_nan r + else if is_inf x + then is_inf r && Bool.equal (is_negative r) (not @@ is_negative x) + else near r e + +let prop_compare_consistent a b = + is_unordered a b || + let c = compare a b in + let eq = Int.(c = 0) in + let lt = Int.(c < 0) in + let gt = Int.(c > 0) in + Bool.equal eq (a = b) && + Bool.equal (not eq) (a <> b) && + Bool.equal lt (a < b) && + Bool.equal (lt || eq) (a <= b) && + Bool.equal gt (a > b) && + Bool.equal (gt || eq) (a >= b) + +let prop_is_zero x = Bool.equal (is_zero x) Float.(f64 x = 0.0) + +let prop_is_negative x = + let sign_bit = Int32.(bits x land 0x8000_0000l <> 0l) in + Bool.equal (is_negative x) sign_bit + +let prop_is_inf x = Bool.equal (is_inf x) (Float.is_inf @@ f64 x) +let prop_is_nan x = Bool.equal (is_nan x) (Float.is_nan @@ f64 x) + +let prop_bits_roundtrip x = + let b = bits x in + let x' = of_bits b in + Int32.equal b (bits x') + +let prop_string_roundtrip x = + let s = to_string x in + let x' = of_string s in + if is_nan x then is_nan x' + else if is_inf x + then is_inf x' && Bool.(is_negative x = is_negative x') + else not @@ is_nan x' + +let prop_int_conversions x = + ignore (to_int8 x : int); + ignore (to_int16 x : int); + ignore (to_int32 x : int32); + ignore (to_int64 x : int64); + ignore (to_uint8 x : int); + ignore (to_uint16 x : int); + ignore (to_uint32 x : int32); + ignore (to_uint64 x : int64); + ignore (of_int8 0 : t); + ignore (of_int16 0 : t); + ignore (of_int32 0l : t); + ignore (of_int64 0L : t); + ignore (of_uint8 0 : t); + ignore (of_uint16 0 : t); + ignore (of_uint32 0l : t); + ignore (of_uint64 0L : t); + true + +(* prints failing inputs! *) +let qc1 name gen prop = + match + Q.test_or_error gen + ~seed:(`Deterministic name) + ~trials:5000 + ~f:(fun x -> + if prop x then Ok () + else Or_error.error_s [%sexp "failed", (x : t)]) + with + | Ok () -> () + | Error e -> Error.raise e + +let qc2 name gen prop = + match + Q.test_or_error gen + ~seed:(`Deterministic name) + ~trials:5000 + ~f:(fun (a, b) -> + if prop a b then Ok () + else Or_error.error_s [%sexp "failed", (a : t), (b : t)]) + with + | Ok () -> () + | Error e -> Error.raise e + +let%test_unit "bits roundtrip" = qc1 "bits" gen_float32 prop_bits_roundtrip +let%test_unit "add approx" = qc2 "add" gen_pair prop_add_approx +let%test_unit "sub approx" = qc2 "sub" gen_pair prop_sub_approx +let%test_unit "mul approx" = qc2 "mul" gen_pair prop_mul_approx +let%test_unit "div approx" = qc2 "div" gen_pair prop_div_approx +let%test_unit "neg approx" = qc1 "neg" gen_float32 prop_neg_approx +let%test_unit "compare consistency" = qc2 "cmp" gen_pair prop_compare_consistent +let%test_unit "is_zero" = qc1 "zero" gen_float32 prop_is_zero +let%test_unit "is_negative" = qc1 "neg" gen_float32 prop_is_negative +let%test_unit "is_inf" = qc1 "inf" gen_float32 prop_is_inf +let%test_unit "is_nan" = qc1 "nan" gen_float32 prop_is_nan +let%test_unit "int conversions" = qc1 "int" gen_float32 prop_int_conversions +let%test_unit "string roundtrip" = qc1 "str" gen_float32 prop_string_roundtrip diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index 08b24787..c6a57f0e 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -2,17 +2,6 @@ open Core open OUnit2 open Cgen -let fmt s = - (* Ignore lines starting with a double comment *) - let s = - String.split_lines s |> List.filter ~f:(fun ln -> - not @@ String.is_prefix ln ~prefix:";;") |> - String.concat in - (* Ignore returns/newlines/tabs/spaces *) - String.filter s ~f:(function - | '\r' | '\n' | '\t' | ' ' -> false - | _ -> true) - let from_file filename = let open Context.Syntax in let* m = Parse.Virtual.from_file filename in @@ -22,16 +11,19 @@ let from_file filename = (* Toggle this to overwrite cases that differ. *) let overwrite = false -let compare_outputs filename' expected p' = - if String.(fmt p' <> fmt expected) then +let compare_outputs ?(chop_end = true) filename' expected p' = + let expected' = if chop_end + then String.chop_suffix_if_exists expected ~suffix:"\n" + else expected in + if String.(p' <> expected') then if overwrite then (* Assume we're being tested via `dune test`, which runs with "_build/default/test/" as the CWD. *) Out_channel.write_all ("../../../test/" ^ filename') ~data:(p' ^ "\n") else - let expected = String.chop_suffix_if_exists expected ~suffix:"\n" in - let diff = Odiff.strings_diffs expected p' in - let msg = Format.sprintf "Diff:\n\n%s" (Odiff.string_of_diffs diff) in + let diff = Odiff.strings_diffs expected' p' in + let msg = Format.sprintf "Diff (%s):\n\n%s" + filename' (Odiff.string_of_diffs diff) in assert_failure msg let from_file_abi filename = @@ -41,20 +33,29 @@ let from_file_abi filename = let* () = Context.iter_seq_err (Virtual.Abi.Module.funs m) ~f:Passes.Ssa.check_abi in Passes.optimize_abi m -let test name _ = +let test ?(f = from_file) name _ = let filename = Format.sprintf "data/opt/%s.vir" name in let filename' = filename ^ ".opt" in let expected = In_channel.read_all filename' in Context.init Machine.X86.Amd64_sysv.target |> Context.eval begin let open Context.Syntax in - let* _, m = from_file filename in + let* _, m = f filename in let* () = Virtual.Module.funs m |> Context.iter_seq_err ~f:Passes.Ssa.check in !!(Format.asprintf "%a" Virtual.Module.pp m) end |> function | Ok p' -> compare_outputs filename' expected p' | Error err -> assert_failure @@ Format.asprintf "%a" Error.pp err +let coalesce_only filename = + let open Context.Syntax in + let* m = Parse.Virtual.from_file filename in + let* tenv, m = Passes.initialize m in + let*? m = Virtual.Module.map_funs_err m ~f:Passes.Coalesce_slots.run in + let*? m = Virtual.Module.map_funs_err m ~f:Passes.Resolve_constant_blk_args.run in + let*? m = Virtual.Module.map_funs_err m ~f:Passes.Remove_dead_vars.run in + !!(tenv, m) + let test_abi target ext name _ = let filename = Format.sprintf "data/opt/%s.vir" name in let filename' = Format.sprintf "%s.opt.%s" filename ext in @@ -175,14 +176,7 @@ let test_native target abi ext name _ = end; if Shexp_process.(eval @@ file_exists driver_output) then let contents = In_channel.read_all driver_output in - let msg = Format.asprintf - "Unequal output\n\ - ---------------------------\n\ - Got:\n%s\n\ - ---------------------------\n\ - Expected:\n%s\n" - p.stdout contents in - assert_bool msg @@ String.(contents = p.stdout) + compare_outputs ~chop_end:false driver_output contents p.stdout (* Specific ABI lowering tests. *) let test_sysv = test_abi Machine.X86.Amd64_sysv.target "sysv" @@ -307,6 +301,14 @@ let opt_suite = "Test optimizations" >::: [ "No sinking" >:: test "nosink"; "Spill test 2" >:: test "spill2"; "Parallel moves" >:: test "parallel"; + "SROA" >:: test "sroa"; + "Sink 1" >:: test "sink1"; + "Escape 1" >:: test "esc1"; + "Slot coalesce 1 (no other opts)" >:: test ~f:coalesce_only "coalesce1"; + "Slot coalesce 1 (full opts)" >:: test "coalesce1a"; + "Bad load 1" >:: test "badload1"; + "Bad load 2" >:: test "badload2"; + "Binary search" >:: test "bsearch"; ] let abi_suite = "Test ABI lowering" >::: [ @@ -339,6 +341,7 @@ let abi_suite = "Test ABI lowering" >::: [ "Collatz recursive (SysV)" >:: test_sysv "collatz_rec"; "Ackermann (SysV)" >:: test_sysv "ackermann"; "Quicksort (SysV)" >:: test_sysv "qsort"; + "Binary search (SysV)" >:: test "bsearch"; ] let isel_suite = "Test instruction selection" >::: [ @@ -371,6 +374,7 @@ let isel_suite = "Test instruction selection" >::: [ "Ackermann (SysV AMD64)" >:: test_sysv_amd64 "ackermann"; "Quicksort (SysV AMD64)" >:: test_sysv_amd64 "qsort"; "Parallel moves (SysV AMD64)" >:: test_sysv_amd64 "parallel"; + "Binary search (SysV AMD64)" >:: test_sysv_amd64 "bsearch"; ] let regalloc_suite = "Test register allocation" >::: [ @@ -413,6 +417,9 @@ let regalloc_suite = "Test register allocation" >::: [ "Analyze array (SysV AMD64)" >:: test_sysv_amd64_regalloc "analyze_array"; "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_regalloc "promote2-partial"; "Parallel moves (SysV AMD64)" >:: test_sysv_amd64_regalloc "parallel"; + "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_regalloc "sumphi"; + "Variadic sum (SysV AMD64)" >:: test_sysv_amd64_regalloc "vasum"; + "Binary search (SysV AMD64)" >:: test_sysv_amd64_regalloc "bsearch"; ] let native_suite = "Test native code" >::: [ @@ -430,6 +437,7 @@ let native_suite = "Test native code" >::: [ "Quicksort, swap inlined (SysV AMD64)" >:: test_sysv_amd64_native "qsort_inline_swap"; "Spill test 1 (SysV AMD64)" >:: test_sysv_amd64_native "spill1"; "Variadic function arguments 1 (SysV AMD64)" >:: test_sysv_amd64_native "vaarg1"; + "Variadic function arguments 2 (SysV AMD64)" >:: test_sysv_amd64_native "vaarg2"; "Palindrome (SysV AMD64)" >:: test_sysv_amd64_native "palindrome"; "Integer pow (SysV AMD64)" >:: test_sysv_amd64_native "int_pow"; "AND test (SysV AMD64)" >:: test_sysv_amd64_native "and_test"; @@ -438,6 +446,10 @@ let native_suite = "Test native code" >::: [ "Analyze array (SysV AMD64)" >:: test_sysv_amd64_native "analyze_array"; "Unsigned remainder by 7 (SysV AMD64)" >:: test_sysv_amd64_native "uremby7"; "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_native "promote2-partial"; + "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi"; + "Returning, passing, and dereferencing a struct (SysV AMD64)" >:: test_sysv_amd64_native "unref"; + "Sink 1 (SysV AMD64)" >:: test_sysv_amd64_native "sink1"; + "Binary search (SysV AMD64)" >:: test_sysv_amd64_native "bsearch"; ] let () = run_test_tt_main @@ test_list [ diff --git a/src/test/test_type.ml b/src/test/test_type.ml index a2e230ba..93c1c75d 100644 --- a/src/test/test_type.ml +++ b/src/test/test_type.ml @@ -47,6 +47,45 @@ let c9 : Type.compound = `compound ("c9", None, [ `elt (`i32, 1); ]) +let c10 : Type.compound = `compound ("c10", None, [ + `name ("c1", 2); + ]) + +let c11 : Type.compound = `compound ("c11", None, [ + `elt (`i16, 3); + `elt (`i32, 1); + ]) + +let c12 : Type.compound = `compound ("c12", Some 8, [ + `elt (`i32, 1); + ]) + +let c13 : Type.compound = `compound ("c13", None, [ + `elt (`i8, 3); + ]) + +let c14 : Type.compound = `compound ("c14", Some 8, [ + `elt (`i16, 1); + ]) + +let c15 : Type.compound = `compound ("c15", Some 8, [ + `elt (`i8, 1); + `elt (`i16, 1); + `elt (`i32, 1); + `elt (`i8, 1); + ]) + +let opaque4 : Type.compound = `opaque ("opaque4", 4, 4) + +let c16 : Type.compound = `compound ("c16", Some 4, [ + `name ("opaque4", 1); + `name ("opaque4", 1); + ]) + +let c1arr : Type.compound = `compound ("c1arr", None, [ + `name ("c1", 3); + ]) + let sexp = Fn.compose Type.Layout.t_of_sexp Sexp.of_string let l1 = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2))))" @@ -58,6 +97,15 @@ let l6 = sexp "((align 8) (data ()))" let l7 = sexp "((align 8) (data (i32 (pad 4) i32 (pad 4))))" let l8 = sexp "((align 1) (data ()))" let l9 = sexp "((align 4) (data (f32 i8 i8 (pad 2) i32)))" +let l10 = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2))))" +let l11 = sexp "((align 4) (data (i16 i16 i16 (pad 2) i32)))" +let l12 = sexp "((align 8) (data (i32 (pad 4))))" +let l13 = sexp "((align 1) (data (i8 i8 i8)))" +let l14 = sexp "((align 8) (data (i16 (pad 6))))" +let l15 = sexp "((align 8) (data (i8 (pad 1) i16 i32 i8 (pad 7))))" +let lopaque4 = sexp "((align 4) (data ((opaque 4))))" +let l16 = sexp "((align 4) (data ((opaque 8))))" +let l1arr = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2))))" let gamma = function | "c1" -> l1 @@ -69,6 +117,15 @@ let gamma = function | "c7" -> l7 | "c8" -> l8 | "c9" -> l9 + | "c10" -> l10 + | "c11" -> l11 + | "c12" -> l12 + | "c13" -> l13 + | "c14" -> l14 + | "c15" -> l15 + | "opaque4" -> lopaque4 + | "c16" -> l16 + | "c1arr" -> l1arr | s -> failwithf "gamma: %s is undefined" s () let _sexp_of_layout l = Sexp.List (List.map l ~f:Type.sexp_of_datum) @@ -97,6 +154,15 @@ let test_c6 _ = test_sizeof_compound c6 ~expected:0 let test_c7 _ = test_sizeof_compound c7 ~expected:16 let test_c8 _ = test_sizeof_compound c8 ~expected:0 let test_c9 _ = test_sizeof_compound c9 ~expected:12 +let test_c10 _ = test_sizeof_compound c10 ~expected:40 +let test_c11 _ = test_sizeof_compound c11 ~expected:12 +let test_c12 _ = test_sizeof_compound c12 ~expected:8 +let test_c13 _ = test_sizeof_compound c13 ~expected:3 +let test_c14 _ = test_sizeof_compound c14 ~expected:8 +let test_c15 _ = test_sizeof_compound c15 ~expected:16 +let test_opaque4 _ = test_sizeof_compound opaque4 ~expected:4 +let test_c16 _ = test_sizeof_compound c16 ~expected:8 +let test_c1arr _ = test_sizeof_compound c1arr ~expected:60 let suite = "Test types" >::: [ "Test sizeof compound 1" >:: test_c1; @@ -108,6 +174,15 @@ let suite = "Test types" >::: [ "Test sizeof compound 7" >:: test_c7; "Test sizeof compound 8" >:: test_c8; "Test sizeof compound 9" >:: test_c9; + "Test sizeof compound 10" >:: test_c10; + "Test sizeof compound 11" >:: test_c11; + "Test sizeof compound 12" >:: test_c12; + "Test sizeof compound 13" >:: test_c13; + "Test sizeof compound 14" >:: test_c14; + "Test sizeof compound 15" >:: test_c15; + "Test sizeof compound opaque4" >:: test_opaque4; + "Test sizeof compound 16" >:: test_c16; + "Test sizeof compound 1 (array)" >:: test_c1arr; ] let () = run_test_tt_main suite diff --git a/tools/clang-format-all.sh b/tools/clang-format-all.sh new file mode 100755 index 00000000..516180d1 --- /dev/null +++ b/tools/clang-format-all.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# C/C++ sources +files_to_indent='.*\.\(c\|h\|cc\|cpp\|cxx\|hpp\)$' + +VERSION=$(clang-format --version 2>/dev/null) + +if [ -z "$VERSION" ]; then + echo "Please install clang-format" + exit 1 +else + echo "Reindenting with $VERSION" +fi + +git ls-files | grep -e "$files_to_indent" | while read file +do + clang-format -i "$file" +done diff --git a/tools/ocp-indent-all.sh b/tools/ocp-indent-all.sh index 1020d842..a3564f73 100755 --- a/tools/ocp-indent-all.sh +++ b/tools/ocp-indent-all.sh @@ -1,6 +1,6 @@ #!/bin/sh -# a regex for files to indent +# OCaml sources files_to_indent='.*\.\(ml\|mli\)$' VERSION=$(ocp-indent --version)