diff --git a/.clang-format b/.clang-format
index 03ce0b43..e3263f0e 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,149 +1,40 @@
----
-Language:        Cpp
-# BasedOnStyle:  LLVM
-AccessModifierOffset: -2
-AlignAfterOpenBracket: Align
-AlignConsecutiveMacros: false
-AlignConsecutiveAssignments: false
-AlignConsecutiveBitFields: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Right
-AlignOperands:   Align
-AlignTrailingComments: true
-AllowAllArgumentsOnNextLine: true
-AllowAllConstructorInitializersOnNextLine: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortEnumsOnASingleLine: true
-AllowShortBlocksOnASingleLine: Never
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: All
-AllowShortLambdasOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: Never
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: MultiLine
-BinPackArguments: true
-BinPackParameters: true
+BasedOnStyle: LLVM
+IndentWidth: 2
+ContinuationIndentWidth: 2
+UseTab: Never
+TabWidth: 2
+
+# Function definitions like:
+# static int
+# name(...)
+# {
 BraceWrapping:
-  AfterCaseLabel:  false
-  AfterClass:      false
-  AfterControlStatement: Never
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  AfterExternBlock: false
-  BeforeCatch:     false
-  BeforeElse:      false
-  BeforeLambdaBody: false
-  BeforeWhile:     false
-  IndentBraces:    false
-  SplitEmptyFunction: true
-  SplitEmptyRecord: true
-  SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
+  AfterFunction: true
+  AfterStruct: true
+  AfterClass: true
+  AfterNamespace: true
+  AfterExternBlock: true
+  BeforeCatch: false
+  BeforeElse: false
+
+# K&R control-flow braces (same-line)
 BreakBeforeBraces: Attach
-BreakBeforeInheritanceComma: false
-BreakInheritanceList: BeforeColon
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
-CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DeriveLineEnding: true
-DerivePointerAlignment: false
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:
-  - foreach
-  - Q_FOREACH
-  - BOOST_FOREACH
-IncludeBlocks:   Preserve
-IncludeCategories:
-  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
-    Priority:        2
-    SortPriority:    0
-  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
-    Priority:        3
-    SortPriority:    0
-  - Regex:           '.*'
-    Priority:        1
-    SortPriority:    0
-IncludeIsMainRegex: '(Test)?$'
-IncludeIsMainSourceRegex: ''
-IndentCaseLabels: false
-IndentCaseBlocks: false
-IndentGotoLabels: true
-IndentPPDirectives: None
-IndentExternBlock: AfterExternBlock
-IndentWidth:     2
-IndentWrappedFunctionNames: false
-InsertTrailingCommas: None
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: true
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBinPackProtocolList: Auto
-ObjCBlockIndentWidth: 2
-ObjCBreakBeforeNestedBlockParam: true
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 19
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyBreakTemplateDeclaration: 10
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 60
-PointerAlignment: Right
-ReflowComments:  true
-SortIncludes:    true
-SortUsingDeclarations: true
+
+# Spacing rules
+SpaceBeforeParens: ControlStatements
 SpaceAfterCStyleCast: false
-SpaceAfterLogicalNot: false
-SpaceAfterTemplateKeyword: true
-SpaceBeforeAssignmentOperators: true
 SpaceBeforeCpp11BracedList: false
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
-SpaceBeforeParens: ControlStatements
-SpaceBeforeRangeBasedForLoopColon: true
-SpaceInEmptyBlock: false
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles:  false
-SpacesInConditionalStatement: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-SpaceBeforeSquareBrackets: false
-Standard:        Latest
-StatementMacros:
-  - Q_UNUSED
-  - QT_REQUIRE_VERSION
-TabWidth:        8
-UseCRLF:         false
-UseTab:          Never
-WhitespaceSensitiveMacros:
-  - STRINGIZE
-  - PP_STRINGIZE
-  - BOOST_PP_STRINGIZE
-...
+SpaceBeforeAssignmentOperators: true
+
+# Pointer alignment
+PointerAlignment: Right
 
+# Do not wrap long lines unless needed
+ColumnLimit: 80
+
+# Do not cram short functions onto one line
+AllowShortFunctionsOnASingleLine: None
+
+# Keep return type on its own line if desired
+AlwaysBreakAfterDefinitionReturnType: true
+AlwaysBreakAfterReturnType: None
diff --git a/Makefile b/Makefile
index 34afe147..180bbd9d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
 SRC := ./src/
 
-.PHONY: build clean install uninstall test doc deps indent status-clean check-style
+.PHONY: build	clean	install uninstall test doc deps
+.PHONY: ocaml-indent clang-indent indent status-clean check-style
 
 all: install
 
@@ -25,9 +26,14 @@ doc:
 deps:
 	$(MAKE) deps -C $(SRC)
 
-indent:
+ocaml-indent:
 	sh tools/ocp-indent-all.sh
 
+clang-indent:
+	sh tools/clang-format-all.sh
+
+indent: ocaml-indent clang-indent
+
 status-clean:
 	git diff --quiet --exit-code
 
diff --git a/src/Makefile b/src/Makefile
index 0708372c..daf688b1 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,5 +1,10 @@
 .PHONY: clean install uninstall test doc deps bap-deps
 
+PROFILE = release
+ifeq ($(DEBUG),1)
+	PROFILE = dev
+endif
+
 BAP_DEPS = \
 	bitvec \
 	bitvec-binprot \
@@ -12,7 +17,7 @@ BAP_DEPS = \
 BAP_REPO := git+https://github.com/BinaryAnalysisPlatform/opam-repository\#testing
 
 all:
-	dune build --profile=release
+	dune build --profile=$(PROFILE)
 
 clean:
 	dune clean
@@ -24,7 +29,7 @@ uninstall:
 	dune uninstall
 
 test:
-	dune test --profile=release
+	dune test --profile=$(PROFILE)
 
 doc:
 	dune build @doc
diff --git a/src/bin/cli.ml b/src/bin/cli.ml
index 0c8510b7..cc3f90e1 100644
--- a/src/bin/cli.ml
+++ b/src/bin/cli.ml
@@ -1,23 +1,6 @@
 open Cmdliner
 
-(* Why is this here, when we have `Cgen.Target.find`?
-
-   The targets should be getting declared at the toplevel in their
-   respective modules, but the evaluation of these modules is not
-   guaranteed to happen by the time we start parsing command-line
-   arguments.
-
-   It just so happens that, for the front-end executable, we should
-   know all of the out-of-the-box targets provided by the library,
-   so this feels like a tolerable compromise in the design of extending
-   the supported targets.
-*)
-let targets =
-  Core.Map.of_alist_exn (module Core.String) @@
-  Core.List.map ~f:(fun t -> Cgen.Target.name t, t) @@
-  Cgen.Machine.[
-    X86.Amd64_sysv.target;
-  ]
+let () = Cgen.Machine.force_initialization ()
 
 let bail () = exit Cmd.Exit.ok
 
@@ -100,7 +83,8 @@ let dump_no_comment =
 let man_targets =
   `S "TARGET" ::
   `Pre "Supported target platforms" :: begin
-    Core.Map.data targets |>
+    Cgen.Target.enum_targets () |>
+    Core.Sequence.to_list |>
     Core.List.map ~f:(fun t ->
         `P (Format.asprintf "%a" Cgen.Target.pp t))
   end
@@ -134,7 +118,14 @@ type t = {
   target : Cgen.Target.t;
 }
 
-let go f file output dump nc target =
+let log_env = Cmd.Env.info "CGEN_LOG"
+
+let setup_log level =
+  Logs.set_level level;
+  Logs.set_reporter @@ Logs_fmt.reporter ()
+
+let go f file output dump nc target log_level =
+  setup_log log_level;
   let file = match file with
     | "" -> Istdin
     | _ -> Ifile file in
@@ -144,12 +135,20 @@ let go f file output dump nc target =
   let dump = match dump_of_string_opt dump with
     | None -> fatal "invalid dump option: %s\n%!" dump ()
     | Some d -> d in
-  let target = match Core.Map.find targets target with
+  let target = match Cgen.Target.find target with
     | None -> fatal "invalid target: %s\n%!" target ()
     | Some t -> t in
   f {file; output; dump; nc; target}
 
-let t f = Term.(const (go f) $ file $ output $ dump $ dump_no_comment $ target)
+let t f =
+  let open Term in
+  const (go f) $
+  file $
+  output $
+  dump $
+  dump_no_comment $
+  target $
+  Logs_cli.level ~env:log_env ()
 
 let man = List.concat [
     man_dump;
diff --git a/src/bin/dune b/src/bin/dune
index 879350de..625e4056 100644
--- a/src/bin/dune
+++ b/src/bin/dune
@@ -2,7 +2,7 @@
  (name cgen_main)
  (public_name cgen)
  (package cgen)
- (libraries cgen cmdliner)
+ (libraries cgen cmdliner logs.cli logs.fmt)
  (modules cgen_main cli)
  (flags -w -58)
  (ocamlopt_flags -O2)
diff --git a/src/cgen.opam b/src/cgen.opam
index 06808bff..58624a0e 100644
--- a/src/cgen.opam
+++ b/src/cgen.opam
@@ -17,7 +17,9 @@ depends: [
   "core" {>= "v0.15"}
   "core_kernel" {>= "v0.15"}
   "dune" {>= "3.15"}
+  "fmt"
   "graphlib"
+  "logs"
   "menhir"
   "monads"
   "ocamldiff"
diff --git a/src/dune-project b/src/dune-project
index d54bc4bb..6fa0f2f1 100644
--- a/src/dune-project
+++ b/src/dune-project
@@ -23,7 +23,9 @@
     (core (>= v0.15))
     (core_kernel (>= v0.15))
     dune
+    fmt
     graphlib
+    logs
     menhir
     monads
     ocamldiff
diff --git a/src/lib/allen_interval_algebra.ml b/src/lib/allen_interval_algebra.ml
new file mode 100644
index 00000000..a51e53a0
--- /dev/null
+++ b/src/lib/allen_interval_algebra.ml
@@ -0,0 +1,95 @@
+(** Allen's Interval Algebra.
+
+    {:https://en.wikipedia.org/wiki/Allen%27s_interval_algebra}
+*)
+
+open Core
+
+type t =
+  | Before
+  | Meets
+  | Overlaps
+  | Finished_by
+  | Contains
+  | Starts
+  | Equal
+  | Started_by
+  | During
+  | Finishes
+  | Overlapped_by
+  | Met_by
+  | After
+[@@deriving compare, equal, sexp]
+
+let pp ppf t = Format.fprintf ppf "%a" Sexp.pp @@ sexp_of_t t
+
+(** Returns the converse of the relation. *)
+let converse = function
+  | Before -> After
+  | After -> Before
+  | Meets -> Met_by
+  | Met_by -> Meets
+  | Overlaps -> Overlapped_by
+  | Overlapped_by -> Overlaps
+  | Starts -> Started_by
+  | Started_by -> Starts
+  | Finishes -> Finished_by
+  | Finished_by -> Finishes
+  | During -> Contains
+  | Contains -> During
+  | Equal -> Equal
+
+(** The input interval type. *)
+module type S = sig
+  (** A point in the interval. *)
+  type point
+
+  (** An inclusive interval.
+
+      Invariant: [lo t <= hi t]
+  *)
+  type t
+
+  (** The lower-bound of the interval. *)
+  val lo : t -> point
+
+  (** The upper-bound of the interval. *)
+  val hi : t -> point
+
+  include Base.Comparisons.Infix with type t := point
+end
+
+(** Create the relations for an interval type. *)
+module Make(M : S) = struct
+  open M
+
+  let before a b        = hi a < lo b [@@inline]
+  let meets a b         = hi a = lo b [@@inline]
+  let overlaps a b      = lo a < lo b && hi a > lo b && hi a < hi b [@@inline]
+  let finished_by a b   = lo a < lo b && hi a = hi b [@@inline]
+  let contains a b      = lo a < lo b && hi a > hi b [@@inline]
+  let starts a b        = lo a = lo b && hi a < hi b [@@inline]
+  let equal a b         = lo a = lo b && hi a = hi b [@@inline]
+  let started_by a b    = lo a = lo b && hi a > hi b [@@inline]
+  let during a b        = lo a > lo b && hi a < hi b [@@inline]
+  let finishes a b      = lo a > lo b && hi a = hi b [@@inline]
+  let overlapped_by a b = lo a > lo b && lo a < hi b && hi a > hi b [@@inline]
+  let met_by a b        = lo a = hi b [@@inline]
+  let after a b         = lo a > hi b [@@inline]
+
+  (** Relates two intervals. *)
+  let relate a b =
+    if before a b then Before
+    else if meets a b then Meets
+    else if overlaps a b then Overlaps
+    else if finished_by a b then Finished_by
+    else if contains a b then Contains
+    else if starts a b then Starts
+    else if equal a b then Equal
+    else if started_by a b then Started_by
+    else if during a b then During
+    else if finishes a b then Finishes
+    else if overlapped_by a b then Overlapped_by
+    else if met_by a b then Met_by
+    else After
+end
diff --git a/src/lib/bitset.ml b/src/lib/bitset.ml
new file mode 100644
index 00000000..6fb43afc
--- /dev/null
+++ b/src/lib/bitset.ml
@@ -0,0 +1,45 @@
+open Core
+open Regular.Std
+open Bitset_intf
+
+module Make(K : Key) = struct
+  type t = Z.t [@@deriving compare, equal]
+
+  let sexp_of_t t = Sexp.Atom (Z.to_string t)
+
+  let t_of_sexp = function
+    | Sexp.Atom s -> Z.of_string s
+    | x ->
+      let exn = Failure "Bitset.t_of_sexp: atom needed" in
+      raise @@ Sexplib.Conv.Of_sexp_error (exn, x)
+
+  let empty = Z.zero
+  let is_empty t = Z.equal t empty [@@inline]
+  let singleton k = Z.(one lsl K.to_int k) [@@inline]
+  let union = Z.logor
+  let inter = Z.logand
+  let set t k = union t @@ singleton k [@@inline]
+  let clear t k = inter t @@ Z.lognot @@ singleton k [@@inline]
+  let mem t k = Z.testbit t @@ K.to_int k [@@inline]
+  let init n = Z.(pred (one lsl n)) [@@inline]
+
+  let min_elt_exn t =
+    if is_empty t then
+      invalid_arg "Bitset.min_elt_exn: empty set";
+    K.of_int @@ Z.trailing_zeros t
+
+  let min_elt t = if is_empty t then None
+    else Some (K.of_int @@ Z.trailing_zeros t)
+
+  let enum =
+    let open Seq.Generator in
+    let pop_min t = Z.(t land pred t) in
+    let rec go t =
+      if is_empty t then return () else
+        let i = min_elt_exn t in
+        yield i >>= fun () ->
+        go (pop_min t) in
+    fun t -> run @@ go t
+end
+
+module Id = Make(Int)
diff --git a/src/lib/bitset.mli b/src/lib/bitset.mli
new file mode 100644
index 00000000..4b8cec7c
--- /dev/null
+++ b/src/lib/bitset.mli
@@ -0,0 +1,4 @@
+open Bitset_intf
+
+module Make(K : Key) : S with type key := K.t
+module Id : S with type key := int
diff --git a/src/lib/bitset_intf.ml b/src/lib/bitset_intf.ml
new file mode 100644
index 00000000..08d16fab
--- /dev/null
+++ b/src/lib/bitset_intf.ml
@@ -0,0 +1,59 @@
+open Regular.Std
+
+module type Key = sig
+  (** The key. *)
+  type t
+
+  val to_int : t -> int
+  val of_int : int -> t
+end
+
+module type S = sig
+  (** The key for indexing into the set. *)
+  type key
+
+  (** The bitset. *)
+  type t = private Z.t [@@deriving compare, equal, sexp]
+
+  (** The empty set. *)
+  val empty : t
+
+  (** Returns [true] if the set is empty. *)
+  val is_empty : t -> bool
+
+  (** Returns the singleton set of a key. *)
+  val singleton : key -> t
+
+  (** Set union. *)
+  val union : t -> t -> t
+
+  (** Set intersection. *)
+  val inter : t -> t -> t
+
+  (** Add an element to the set. *)
+  val set : t -> key -> t
+
+  (** Remove an element from the set. *)
+  val clear : t -> key -> t
+
+  (** Returns [true] if the element is in the set. *)
+  val mem : t -> key -> bool
+
+  (** Constructs the set where the first [n] elements
+      are set. *)
+  val init : int -> t
+
+  (** Returns the least element of the set.
+
+      @raise Invalid_argument if the set is empty
+  *)
+  val min_elt_exn : t -> key
+
+  (** Same as [min_elt_exn], but returns [None] if
+      the set is empty. *)
+  val min_elt : t -> key option
+
+  (** Produces the sequence of elements in the set,
+      from lowest to highest. *)
+  val enum : t -> key seq
+end
diff --git a/src/lib/bv_interval.ml b/src/lib/bv_interval.ml
index 542cf621..325bee54 100644
--- a/src/lib/bv_interval.ml
+++ b/src/lib/bv_interval.ml
@@ -347,43 +347,36 @@ let sext t ~size =
     else create ~lo:(sext t.lo) ~hi:(sext t.hi) ~size
 
 let trunc t ~size =
-  (* This semantics is relaxed. A truncate to the same size should
-     just be a no-op. *)
+  (* Same width: no-op. *)
   if t.size = size then t
   else if t.size < size then
-    invalid_arg "trunc: `size` is not strictly less than `t.size`"
+    invalid_argf
+      "trunc: `size` %d is not strictly less than `size t` %d"
+      size t.size ()
+  else if is_empty t then
+    create_empty ~size
+  else if is_full t then
+    create_full ~size
+  else if is_wrapped_hi t then
+    (* XXX: this could be more precise *)
+    create_full ~size
   else
-    let module B = (val Bitvec.modular t.size) in
-    let default un lower_div upper_div =
-      let lower_div, upper_div =
-        let lz = bv_clz lower_div t.size in
-        if t.size - lz > size then
-          let adj = B.(lower_div land pred (one lsl int Int.(size - 1))) in
-          B.(lower_div - adj), B.(upper_div - adj)
-        else lower_div, upper_div in
-      let w = t.size - bv_clz upper_div t.size in
-      if w <= size then union (create ~lo:lower_div ~hi:upper_div ~size) un
-      else if w = size + 1 then
-        let upper_div = B.(upper_div land (lnot (one lsl int size))) in
-        if Bv.(upper_div < lower_div)
-        then union (create ~lo:lower_div ~hi:upper_div ~size) un
-        else create_full ~size
-      else create_full ~size in
-    if is_empty t then create_empty ~size
-    else if is_full t then create_full ~size
-    else if is_wrapped_hi t then
-      let lz = bv_clz t.hi t.size in
-      if t.size - lz > size
-      || bv_cto t.hi t.size = size
-      then create_full ~size
-      else
-        let un = create
-            ~lo:Bv.(max_unsigned_value size)
-            ~hi:(Bv.extract ~hi:(size - 1) ~lo:0 t.hi)
-            ~size in
-        let upper_div = Bv.max_unsigned_value t.size in
-        if Bv.(t.lo = upper_div) then un else default un t.lo upper_div
-    else default (create_empty ~size) t.lo t.hi
+    let lo_big = Bv.to_bigint t.lo in
+    let hi_big = Bv.to_bigint t.hi in
+    let modulus_dst = Z.shift_left Z.one size in
+    let span = Z.sub hi_big lo_big in
+    if Z.geq span modulus_dst then
+      create_full ~size
+    else
+      let block_lo = Z.div lo_big modulus_dst in
+      let block_hi_minus1 = Z.div (Z.pred hi_big) modulus_dst in
+      if Z.equal block_lo block_hi_minus1 then
+        let lo_mod = Bv.extract ~hi:(size - 1) ~lo:0 t.lo in
+        let hi_mod = Bv.extract ~hi:(size - 1) ~lo:0 t.hi in
+        if Bv.(lo_mod = hi_mod)
+        then create_full ~size
+        else create ~lo:lo_mod ~hi:hi_mod ~size
+      else create_full ~size
 
 let add t1 t2 =
   if t1.size <> t2.size then
@@ -417,9 +410,9 @@ let sub t1 t2 =
     else if is_full t1 || is_full t2 then
       create_full ~size
     else
-      let m = Bv.modulus size in
-      let lo = Bv.(succ ((t1.lo - t2.hi) mod m) mod m) in
-      let hi = Bv.((t1.hi - t2.lo) mod m) in
+      let module B = (val Bv.modular size) in
+      let lo = B.(t1.lo - t2.hi + one) in
+      let hi = B.(t1.hi - t2.lo) in
       if Bv.(lo = hi)
       then create_full ~size
       else
@@ -429,6 +422,14 @@ let sub t1 t2 =
         then create_full ~size
         else x
 
+let mul_single t1 t2 =
+  let size = t1.size in
+  match single_of t1 with
+  | Some s when Bv.(s = one) -> Some t2
+  | Some s when Bv.(s = (ones mod modulus size)) ->
+    Some (sub (create_single ~value:Bv.zero ~size) t2)
+  | _ -> None
+
 let mul t1 t2 =
   if t1.size <> t2.size then
     invalid_arg "mul: sizes must be equal"
@@ -436,42 +437,46 @@ let mul t1 t2 =
     let size = t1.size in
     if is_empty t1 || is_empty t2 then
       create_empty ~size
-    else
-      let min1 = unsigned_min t1 in
-      let max1 = unsigned_max t1 in
-      let min2 = unsigned_min t2 in
-      let max2 = unsigned_max t2 in
-      let m2 = Bv.modulus (size * 2) in
-      let result = create
-          ~lo:Bv.(min1 * min2 mod m2)
-          ~hi:Bv.(succ (max1 * max2 mod m2) mod m2)
-          ~size:(size * 2) in
-      let ur = trunc result ~size in
-      let m = Bv.modulus size in
-      if not (is_wrapped_hi ur)
-      && (Bv.(signed_compare ur.hi zero m) >= 0 ||
-          Bv.(ur.hi = min_signed_value size))
-      then ur
-      else
-        let min1 = signed_min t1 in
-        let max1 = signed_max t1 in
-        let min2 = signed_min t2 in
-        let max2 = signed_max t2 in
-        let p = [|
-          Bv.(min1 * min2 mod m2);
-          Bv.(min1 * max2 mod m2);
-          Bv.(max1 * min2 mod m2);
-          Bv.(max1 * max2 mod m2);
-        |] in
-        let compare x y = Bv.signed_compare x y m2 in
-        let lo = Array.min_elt p ~compare in
-        let hi = Array.max_elt p ~compare in
-        let lo, hi = match lo, hi with
-          | None, _ | _, None -> assert false
-          | Some lo, Some hi -> (lo, Bv.(succ hi mod m2)) in
-        let result = create ~lo ~hi ~size:(size * 2) in
-        let sr = trunc result ~size in
-        if is_strictly_smaller_than ur sr then ur else sr
+    else match mul_single t1 t2 with
+      | Some t -> t
+      | None -> match mul_single t2 t1 with
+        | Some t -> t
+        | None ->
+          let min1 = unsigned_min t1 in
+          let max1 = unsigned_max t1 in
+          let min2 = unsigned_min t2 in
+          let max2 = unsigned_max t2 in
+          let m2 = Bv.modulus (size * 2) in
+          let result = create
+              ~lo:Bv.(min1 * min2 mod m2)
+              ~hi:Bv.(succ (max1 * max2 mod m2) mod m2)
+              ~size:(size * 2) in
+          let ur = trunc result ~size in
+          let m = Bv.modulus size in
+          if not (is_wrapped_hi ur)
+          && (Bv.(signed_compare ur.hi zero m) >= 0 ||
+              Bv.(ur.hi = min_signed_value size))
+          then ur
+          else
+            let min1 = signed_min t1 in
+            let max1 = signed_max t1 in
+            let min2 = signed_min t2 in
+            let max2 = signed_max t2 in
+            let p = [|
+              Bv.(min1 * min2 mod m2);
+              Bv.(min1 * max2 mod m2);
+              Bv.(max1 * min2 mod m2);
+              Bv.(max1 * max2 mod m2);
+            |] in
+            let compare x y = Bv.signed_compare x y m2 in
+            let lo = Array.min_elt p ~compare in
+            let hi = Array.max_elt p ~compare in
+            let lo, hi = match lo, hi with
+              | None, _ | _, None -> assert false
+              | Some lo, Some hi -> (lo, Bv.(succ hi mod m2)) in
+            let result = create ~lo ~hi ~size:(size * 2) in
+            let sr = trunc result ~size in
+            if is_strictly_smaller_than ur sr then ur else sr
 
 let smax t1 t2 =
   if t1.size <> t2.size then
@@ -848,8 +853,8 @@ let logical_shift_right t1 t2 =
       let max2 = unsigned_max t2 in
       let min1 = unsigned_min t1 in
       let min2 = unsigned_min t2 in
-      let lo = Bv.(succ ((max1 lsr min2) mod m) mod m) in
-      let hi = Bv.((min1 lsr max2) mod m) in
+      let hi = Bv.(succ ((max1 lsr min2) mod m) mod m) in
+      let lo = Bv.((min1 lsr max2) mod m) in
       create ~lo ~hi ~size
 
 let arithmetic_shift_right t1 t2 =
@@ -982,7 +987,7 @@ let ctz_range lo hi size =
     else if Bv.(lo = zero) then
       create ~lo:Bv.zero ~hi:(B.int (size + 1)) ~size
     else
-      let len = bv_clz B.(lo lxor succ hi) size in
+      let len = bv_clz B.(lo lxor pred hi) size in
       let hi = B.int (max (size - len - 1) (bv_ctz lo size) + 1) in
       create ~size ~lo:Bv.zero ~hi
 
diff --git a/src/lib/dune b/src/lib/dune
index 539cebce..4e58fd24 100644
--- a/src/lib/dune
+++ b/src/lib/dune
@@ -20,6 +20,7 @@
   core_kernel.pairing_heap
   core_kernel.uopt
   graphlib
+  logs
   monads
   regular
   zarith)
@@ -30,6 +31,8 @@
   hashmap
   hashmap_intf
   isel_internal
+  interval_tree
+  interval_tree_intf
   last_stores
   live_intf
   loops
@@ -39,8 +42,11 @@
   patricia_tree_intf
   phi_values
   resolver_intf
+  scalars
+  slot_initialization
   sm
   subst_mapper
+  subst_mapper_abi
   tags
   var_internal
   virtual_lexer
diff --git a/src/lib/egraph/egraph.ml b/src/lib/egraph/egraph.ml
index 04d27c6c..10dc6589 100644
--- a/src/lib/egraph/egraph.ml
+++ b/src/lib/egraph/egraph.ml
@@ -1,4 +1,5 @@
 open Core
+open Regular.Std
 open Virtual
 
 include Egraph_common
@@ -17,7 +18,7 @@ let init input depth_limit match_limit rules = {
   typs = Vec.create ();
   lmoved = Label.Table.create ();
   imoved = Vec.create ();
-  pinned = Z.zero;
+  pinned = Bitset.Id.empty;
   ilbl = Vec.create ();
   lval = Label.Table.create ();
   depth_limit;
@@ -29,14 +30,52 @@ let check_ssa fn =
   if Dict.mem (Func.dict fn) Tags.ssa then
     Ok ()
   else
-    Input.E.failf "Expected SSA form for function %s"
+    Input.E.failf "Expected SSA form for function $%s"
       (Func.name fn) ()
 
+let debug_dump t =
+  Logs.debug (fun m ->
+      let pp_lmoved ppf (l, s) =
+        Format.fprintf ppf "  %a: %s%!" Label.pp l
+          (Iset.to_list s |>
+           List.to_string ~f:Id.to_string) in
+      let pp_sep ppf () = Format.fprintf ppf "\n" in
+      m "%s: $%s lmoved:\n%a%!"
+        __FUNCTION__ (Func.name t.input.fn)
+        (Format.pp_print_list ~pp_sep pp_lmoved)
+        (Hashtbl.to_alist t.lmoved));
+  Logs.debug (fun m ->
+      let pp_lmoved ppf (id, s) =
+        Format.fprintf ppf "  %d: %s%!" id
+          (Lset.to_list s |>
+           List.to_string ~f:Label.to_string) in
+      let pp_sep ppf () = Format.fprintf ppf "\n" in
+      m "%s: $%s imoved:\n%a%!"
+        __FUNCTION__ (Func.name t.input.fn)
+        (Format.pp_print_list ~pp_sep pp_lmoved)
+        (Vec.to_sequence_mutable t.imoved |>
+         Seq.mapi ~f:Tuple2.create |>
+         Seq.to_list));
+  Logs.debug (fun m ->
+      let pp_ilbl ppf (id, l) =
+        Format.fprintf ppf "  %d: %a%!" id
+          (Format.pp_print_option
+             ~none:(fun ppf () -> Format.fprintf ppf "<none>")
+             Label.pp) (Uopt.to_option l) in
+      let pp_sep ppf () = Format.fprintf ppf "\n" in
+      m "%s: $%s ilbl:\n%a%!"
+        __FUNCTION__ (Func.name t.input.fn)
+        (Format.pp_print_list ~pp_sep pp_ilbl)
+        (Vec.to_sequence_mutable t.ilbl |>
+         Seq.mapi ~f:Tuple2.create |>
+         Seq.to_list))
+
 let run ?(depth_limit = 6) ?(match_limit = 20) fn tenv rules =
   let open Context.Syntax in
   let*? () = check_ssa fn in
   let*? input = Input.init fn tenv in
   let t = init input depth_limit match_limit rules in
   let*? () = Builder.run t in
+  debug_dump t;
   let ex = Extractor.init t in
   Extractor.cfg ex
diff --git a/src/lib/egraph/egraph.mli b/src/lib/egraph/egraph.mli
index 43775154..5d348352 100644
--- a/src/lib/egraph/egraph.mli
+++ b/src/lib/egraph/egraph.mli
@@ -55,7 +55,7 @@ type rule
 type rules
 
 (** Compiles the rules. *)
-val compile : rule list -> rules
+val compile : name:string -> rule list -> rules
 
 (** [run fn tenv rules ?depth_limit ?match_limit] constructs an e-graph
     from a function [fn] and applies the [rules] eagerly to produce a
diff --git a/src/lib/egraph/egraph_common.ml b/src/lib/egraph/egraph_common.ml
index 89b0a4d8..539c1270 100644
--- a/src/lib/egraph/egraph_common.ml
+++ b/src/lib/egraph/egraph_common.ml
@@ -107,7 +107,7 @@ type t = {
   memo           : (enode, id) Hashtbl.t; (* The hash-cons for optimized terms. *)
   lmoved         : Iset.t Label.Table.t;  (* Set of IDs that were moved to a given label. *)
   imoved         : Lset.t Vec.t;          (* Set of labels that were moved for a given ID. *)
-  mutable pinned : Z.t;                   (* IDs that should not be rescheduled. *)
+  mutable pinned : Bitset.Id.t;           (* IDs that should not be rescheduled. *)
   ilbl           : Label.t Uopt.t Vec.t;  (* Maps IDs to labels. *)
   lval           : id Label.Table.t;      (* Maps labels to IDs. *)
   depth_limit    : int;                   (* Maximum rewrite depth. *)
@@ -117,11 +117,11 @@ type t = {
 
 type egraph = t
 
-let compile rules =
+let compile ~name rules =
   (* XXX: the previous implementation also reversed the rule order,
      and now the testsuite relies on that behavior. Maybe the real
      fix is in changing how the extractor breaks ties. *)
-  Matcher.compile @@ List.rev_map rules
+  Matcher.compile ~name @@ List.rev_map rules
     ~f:(fun r -> r.pre, (r.post, r.subsume))
 
 let find t id = Uf.find t.classes id
@@ -140,8 +140,8 @@ let add_moved t id = function
     let init = Vec.get_exn t.imoved id in
     Vec.set_exn t.imoved id @@ List.fold ls ~init ~f:Lset.add
 
-let set_pinned t id = t.pinned <- Z.(t.pinned lor (one lsl id))
-let is_pinned t id = Z.testbit t.pinned id
+let set_pinned t id = t.pinned <- Bitset.Id.set t.pinned id
+let is_pinned t id = Bitset.Id.mem t.pinned id
 
 let typeof_var t x =
   Typecheck.Env.typeof_var t.input.fn x t.input.tenv |> Or_error.ok
diff --git a/src/lib/egraph/egraph_input.ml b/src/lib/egraph/egraph_input.ml
index 6c479930..20ac64d7 100644
--- a/src/lib/egraph/egraph_input.ml
+++ b/src/lib/egraph/egraph_input.ml
@@ -12,27 +12,38 @@ module Operands = Set.Make(struct
     type t = operand [@@deriving compare, sexp]
   end)
 
-module Phis = Phi_values.Make(struct
-    type t = Operands.t [@@deriving equal]
-    let one = Operands.singleton
-    let join = Set.union
-  end)
+module Phis_lang = struct
+  module Ctrl = struct
+    type t = ctrl
+    let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _)
+  end
+  module Blk = Blk
+  module Func = Func
+  module Cfg = Cfg
+end
+
+module Phis_domain = struct
+  type t = Operands.t [@@deriving equal]
+  let one = Operands.singleton
+  let join = Set.union
+end
+
+module Phis = Phi_values.Make(Phis_lang)(Phis_domain)
 
 (* General information about the function we're translating. *)
 type t = {
-  fn   : func;                      (* The function itself. *)
-  loop : loops;                     (* Loops analysis. *)
-  reso : resolver;                  (* Labels to blocks/insns. *)
-  cfg  : Cfg.t;                     (* The CFG. *)
-  dom  : Label.t Semi_nca.tree;     (* Dominator tree. *)
-  domd : int Label.Table.t;         (* Dominator tree depths. *)
-  pdom : Label.t Semi_nca.tree;     (* Post-dominator tree. *)
-  rdom : Dominance.t;               (* Fine-grained dominance relation. *)
-  df   : Label.t Semi_nca.frontier; (* Dominance frontiers. *)
-  lst  : Last_stores.t;             (* Last stores analysis. *)
-  tenv : Typecheck.env;             (* Typing environment. *)
-  phis : Phis.state;                (* Block argument value sets. *)
-  rpo  : Label.t -> int;            (* Reverse post-order number. *)
+  fn   : func;                  (* The function itself. *)
+  loop : loops;                 (* Loops analysis. *)
+  reso : resolver;              (* Labels to blocks/insns. *)
+  cfg  : Cfg.t;                 (* The CFG. *)
+  dom  : Label.t Semi_nca.tree; (* Dominator tree. *)
+  domd : int Label.Table.t;     (* Dominator tree depths. *)
+  pdom : Label.t Semi_nca.tree; (* Post-dominator tree. *)
+  rdom : Dominance.t;           (* Fine-grained dominance relation. *)
+  lst  : Last_stores.t;         (* Last stores analysis. *)
+  tenv : Typecheck.env;         (* Typing environment. *)
+  phis : Phis.state;            (* Block argument value sets. *)
+  rpo  : Label.t -> int;        (* Reverse post-order number. *)
 }
 
 let init_dom_relation reso dom =
@@ -83,10 +94,9 @@ let init fn tenv =
   let loop = Loops.analyze ~name:(Func.name fn) cfg in
   let dom = Semi_nca.compute (module Cfg) cfg Label.pseudoentry in
   let pdom = Semi_nca.compute (module Cfg) cfg Label.pseudoexit ~rev:true in
-  let df = Semi_nca.frontier (module Cfg) cfg dom in
   let rdom = init_dom_relation reso dom in
   let lst = init_last_stores cfg reso in
   let domd = dom_depths dom in
   let phis = Phis.analyze ~blk:(resolve_blk reso) cfg in
   let rpo = init_rpo cfg in
-  {fn; loop; reso; cfg; dom; domd; pdom; rdom; df; lst; tenv; phis; rpo}
+  {fn; loop; reso; cfg; dom; domd; pdom; rdom; lst; tenv; phis; rpo}
diff --git a/src/lib/egraph/egraph_rewrite.ml b/src/lib/egraph/egraph_rewrite.ml
index bd4df29b..822d232a 100644
--- a/src/lib/egraph/egraph_rewrite.ml
+++ b/src/lib/egraph/egraph_rewrite.ml
@@ -50,6 +50,9 @@ let union ?ty t id oid =
 
 (* Called when a duplicate node is inserted. *)
 let duplicate ?l t id =
+  Logs.debug (fun m ->
+      m "%s: inserted already hash-consed term %d:\n  %a%!"
+        __FUNCTION__ id (Enode.pp ~node:(node t)) (node t id));
   Option.iter l ~f:(Sched.duplicate t id);
   id
 
@@ -109,7 +112,7 @@ and optimize ?ty ?l ~d t n id =
     | U _ -> assert false
     | N _ when d <= 0 -> id
     | N _ when Matcher.is_empty t.rules -> id
-    | N (o, cs) ->
+    | N _ ->
       let vm = VM.create () in
       let rws = {id; budget = t.match_limit} in
       if VM.init ~lookup:(node t) vm t.rules id then begin
@@ -154,6 +157,14 @@ and rewrite ?ty ?l ~d t rws y =
     Uf.union t.classes rws.id oid;
     rws.id <- oid in
   try
+    let prev = rws.id in
+    Logs.debug (fun m ->
+        let s = Map.to_alist @@ Y.subst y in
+        m "%s: matched on rule %d for term %d\n  pat: %a\n  subst: %s%!"
+          __FUNCTION__ (Y.rule y) prev
+          Matcher.pp_pat (Y.pat y)
+          (List.to_string s ~f:(fun (x, id) ->
+               Format.asprintf "%s=%d" x id)));
     let env = Map.map (Y.subst y) ~f:(subst_info t) in
     let action, subsume = Y.payload y in
     let go env p = check env p; assemble env p in
@@ -171,5 +182,10 @@ and rewrite ?ty ?l ~d t rws y =
     (* Rewrite is OK, integrate with the current e-class. *)
     let continue = not (subsume || Enode.is_const (node t oid)) in
     if continue then default oid else optimal oid;
+    Logs.debug (fun m ->
+        m "%s: rewrote term %d to %d, continue=%b:\n  prev: %a\n  curr: %a%!"
+          __FUNCTION__ prev oid continue
+          (Enode.pp ~node:(node t)) (node t prev)
+          (Enode.pp ~node:(node t)) (node t oid));
     continue
   with Mismatch -> true
diff --git a/src/lib/egraph/egraph_sched.ml b/src/lib/egraph/egraph_sched.ml
index a61984fa..e86bda6c 100644
--- a/src/lib/egraph/egraph_sched.ml
+++ b/src/lib/egraph/egraph_sched.ml
@@ -49,13 +49,23 @@ let lca t a b =
 
 (* Note that `id` must be the canonical e-class. *)
 let move t old l id =
+  Logs.debug (fun m ->
+      let pp_old ppf old =
+        if List.is_empty old
+        then Format.fprintf ppf ""
+        else Format.fprintf ppf "from %s "
+            (List.to_string ~f:Label.to_string old) in
+      m "%s: moving term %d %ato %a%!"
+        __FUNCTION__ id pp_old old Label.pp l);
   add_moved t id old;
   set_label t id l;
   Hashtbl.update t.lmoved l ~f:(function
       | None -> Iset.singleton id
       | Some s -> Iset.add s id)
 
-let mark_use t id a = add_moved t id [a]
+let mark_use t id a =
+  Logs.debug (fun m -> m "%s: id=%d, a=%a%!" __FUNCTION__ id Label.pp a);
+  add_moved t id [a]
 
 (* Update when we union two nodes together. Should not be
    called if both IDs are the same. *)
@@ -63,19 +73,30 @@ let merge t a b u =
   assert (a <> b);
   let cid = find t a in
   (* Link the ID to the label, along with the union ID. *)
-  let link ?p l =
+  let link ?p dir l =
+    Logs.debug (fun m ->
+        m "%s: merge dominated %s: a=%d, b=%d, cid=%d, u=%d, l=%a, p=%a%!"
+          __FUNCTION__
+          (match dir with `left -> "left" | `right -> "right")
+          a b cid u Label.pp l
+          (Format.pp_print_option
+             ~none:(fun ppf () -> Format.fprintf ppf "<none>")
+             Label.pp) p);
     Option.iter p ~f:(mark_use t cid);
     set_label t cid l;
     set_label t u l in
   match labelof t a, labelof t b with
   | None, None -> ()
-  | None, Some pb -> link pb
-  | Some pa, None -> link pa
+  | None, Some pb -> link `right pb
+  | Some pa, None -> link `left pa
   | Some pa, Some pb when Label.(pa = pb) -> ()
-  | Some pa, Some pb when dominates t ~parent:pb pa -> link pb ~p:pa
-  | Some pa, Some pb when dominates t ~parent:pa pb -> link pa ~p:pb
+  | Some pa, Some pb when dominates t ~parent:pb pa -> link `right pb ~p:pa
+  | Some pa, Some pb when dominates t ~parent:pa pb -> link `left pa ~p:pb
   | Some pa, Some pb ->
     let pc = lca t pa pb in
+    Logs.debug (fun m ->
+        m "%s: merge LCA: a=%d, b=%d, cid=%d, u=%d, pa=%a, pb=%a, pc=%a%!"
+          __FUNCTION__ a b cid u Label.pp pa Label.pp pb Label.pp pc);
     assert (cid = find t b);
     assert (cid = find t u);
     clear_label t a;
@@ -94,6 +115,10 @@ let rec useof t l : enode -> unit = function
         useof t l @@ node t c)
 
 let default_placement t id l n =
+  Logs.debug (fun m ->
+      m "%s: placing term %d at %a:\n  node: %a%!"
+        __FUNCTION__ id Label.pp l
+        (Enode.pp ~node:(node t)) n);
   move t [] l id;
   useof t l n
 
@@ -103,15 +128,28 @@ let duplicate t id a =
   let cid = find t id in
   match labelof t cid with
   | Some b when Label.(b = a) -> ()
-  | Some b when dominates t ~parent:b a -> mark_use t cid a
+  | Some b when dominates t ~parent:b a ->
+    Logs.debug (fun m ->
+        m "%s: %d at %a dominated by previous term %d at %a%!"
+          __FUNCTION__ id Label.pp a cid Label.pp b);
+    mark_use t cid a
   | Some b when dominates t ~parent:a b ->
+    Logs.debug (fun m ->
+        m "%s: %d at %a dominates previous term %d at %a%!"
+          __FUNCTION__ id Label.pp a cid Label.pp b);
     mark_use t cid b;
     set_label t id a
   | Some b ->
     let c = lca t a b in
+    Logs.debug (fun m ->
+        m "%s: %d at %a LCA with previous term %d at %a: %a%!"
+          __FUNCTION__ id Label.pp a cid Label.pp b Label.pp c);
     clear_label t cid;
     move t [a; b] c cid;
   | None ->
+    Logs.debug (fun m ->
+        m "%s: %d at %a has no previous term, cid=%d%!"
+          __FUNCTION__ id Label.pp a cid);
     (* This e-class wasn't moved, though it wasn't registered
        to begin with (even though it was hash-consed). *)
     default_placement t id a @@ node t id
@@ -220,6 +258,11 @@ module Licm = struct
   let exists_in_loop t parent = List.exists ~f:(is_child_loop ~parent t)
 
   let licm_move t l l' lp id lhs =
+    Logs.debug (fun m ->
+        m "%s: LICM for term %d: l=%a, l'=%a:\n  loop: %a\n  node: %a%!"
+          __FUNCTION__ id Label.pp l Label.pp l'
+          Loops.pp_data (Loops.get t.input.loop lp)
+          (Enode.pp ~node:(node t)) (node t id));
     let l' = match lhs with
       | None -> l'
       | Some x -> match partition_uses t x with
diff --git a/src/lib/egraph/extractor/extractor_cfg.ml b/src/lib/egraph/extractor/extractor_cfg.ml
index 57c4af4e..2159b57a 100644
--- a/src/lib/egraph/extractor/extractor_cfg.ml
+++ b/src/lib/egraph/extractor/extractor_cfg.ml
@@ -22,25 +22,24 @@ let empty_scope : scope = Id.Tree.empty
 
 type placed = {
   seq : Label.t Ftree.t;
-  ids : Z.t;
+  ids : Bitset.Id.t;
 }
 
 let create_placed id l = {
   seq = Ftree.singleton l;
-  ids = Z.(one lsl id);
+  ids = Bitset.Id.singleton id;
 }
 
 let add_placed p id l =
-  assert (not @@ Z.testbit p.ids id); {
+  assert (not @@ Bitset.Id.mem p.ids id); {
     seq = Ftree.snoc p.seq l;
-    ids = Z.(p.ids lor (one lsl id));
+    ids = Bitset.Id.set p.ids id;
   }
 
 type env = {
   insn        : Insn.op Label.Table.t;
   ctrl        : ctrl Label.Table.t;
   news        : placed Label.Table.t;
-  closure     : Lset.t Label.Table.t;
   mutable cur : Label.t;
   mutable scp : scope;
 }
@@ -49,7 +48,6 @@ let init () = {
   insn = Label.Table.create ();
   ctrl = Label.Table.create ();
   news = Label.Table.create();
-  closure = Label.Table.create ();
   cur = Label.pseudoentry;
   scp = empty_scope;
 }
@@ -93,6 +91,9 @@ let fresh env canon real =
     let* x = Context.Var.fresh in
     let+ l = Context.Label.fresh in
     env.scp <- Id.Tree.set env.scp ~key:canon ~data:(x, l);
+    Logs.debug (fun m ->
+        m "%s: placing fresh %a at %a: env.cur=%a canon=%d, real=%d%!"
+          __FUNCTION__ Var.pp x Label.pp l Label.pp env.cur canon real);
     Hashtbl.update env.news env.cur ~f:(function
         | Some p -> add_placed p real l
         | None -> create_placed real l);
@@ -107,14 +108,17 @@ let insn t env a f =
   let+ op = f x in
   upd env.insn l op;
   `var x
+[@@specialise]
 
 let insn' env l f =
   let+ op = f () in
   upd env.insn l op
+[@@specialise]
 
 let ctrl env l f =
   let+ c = f () in
   upd env.ctrl l c
+[@@specialise]
 
 let sel e x ty c y n = match c with
   | `var c -> !!(`sel (x, ty, c, y, n))
@@ -122,6 +126,8 @@ let sel e x ty c y n = match c with
   | `bool false -> !!(`uop (x, `copy ty, n))
   | _ -> invalid_pure e
 
+let (let@) x f = x f [@@specialise] [@@inline]
+
 let rec pure t env e : operand Context.t =
   let pure = pure t env in
   let insn = insn t env in
@@ -130,7 +136,7 @@ let rec pure t env e : operand Context.t =
   | E (a, Obinop b, [l; r]) ->
     let* l = pure l in
     let* r = pure r in
-    insn a @@ fun x ->
+    let@ x = insn a in
     !!(`bop (x, b, l, r))
   | E (_, Obool b, []) -> !!(`bool b)
   | E (_, Ocall (x, _), _) -> !!(`var x)
@@ -141,13 +147,13 @@ let rec pure t env e : operand Context.t =
     let* c = pure c in
     let* y = pure y in
     let* n = pure n in
-    insn a @@ fun x ->
+    let@ x = insn a in
     sel e x ty c y n
   | E (_, Osingle s, []) -> !!(`float s)
   | E (_, Osym (s, n), []) -> !!(`sym (s, n))
   | E (a, Ounop u, [y]) ->
     let* y = pure y in
-    insn a @@ fun x ->
+    let@ x = insn a in
     !!(`uop (x, u, y))
   | E (_, Ovaarg (x, _), [_]) -> !!(`var x)
   | E (_, Ovar x, []) -> !!(`var x)
@@ -257,51 +263,51 @@ let exp t env l e =
     let* c = pure c in
     let* y = dst y in
     let* n = dst n in
-    ctrl @@ fun () ->
+    let@ () = ctrl in
     br l e c y n
   | E (_, Ocall0 _, [f; args; vargs]) ->
     let* f = global t env f in
     let* args = callargs t env args in
     let* vargs = callargs t env vargs in
-    insn @@ fun () ->
+    let@ () = insn in
     !!(`call (None, f, args, vargs))
   | E (_, Ocall (x, ty), [f; args; vargs]) ->
     let* f = global t env f in
     let* args = callargs t env args in
     let* vargs = callargs t env vargs in
-    insn @@ fun () ->
+    let@ () = insn in
     !!(`call (Some (x, ty), f, args, vargs))
   | E (_, Oload (x, t), [y]) ->
     let* y = pure y in
-    insn @@ fun () ->
+    let@ () = insn in
     !!(`load (x, t, y))
   | E (_, Ojmp, [d]) ->
     let* d = dst d in
-    ctrl @@ fun () ->
+    let@ () = ctrl in
     !!(`jmp d)
   | E (_, Oret, [x]) ->
     let* x = pure x in
-    ctrl @@ fun () ->
+    let@ () = ctrl in
     !!(`ret (Some x))
   | E (_, Oset _, [y]) -> pure y >>| ignore
   | E (_, Ostore (t, _), [v; x]) ->
     let* v = pure v in
     let* x = pure x in
-    insn @@ fun () ->
+    let@ () = insn in
     !!(`store (t, v, x))
   | E (_, Osw ty, i :: d :: tbl) ->
     let* i = pure i in
     let* d = sw_default t env l d in
     let* tbl = table t env tbl ty in
-    ctrl @@ fun () ->
+    let@ () = ctrl in
     sw l e ty i d tbl
   | E (_, Ovaarg (x, t), [a]) ->
     let* a = pure a in
-    insn @@ fun () ->
+    let@ () = insn in
     vaarg l e x t a
   | E (_, Ovastart _, [a]) ->
     let* a = pure a in
-    insn @@ fun () ->
+    let@ () = insn in
     vastart l e a
   (* The rest are rejected. *)
   | E (_, Oaddr _, _)
@@ -330,38 +336,6 @@ let exp t env l e =
   | E (_, Ovastart _, _) -> invalid l e
 
 module Hoisting = struct
-  let (++) = Lset.union
-  let not_pseudo = Fn.non Label.is_pseudo
-  let descendants t = Semi_nca.Tree.descendants t.eg.input.dom
-  let dominates t = Semi_nca.Tree.is_descendant_of t.eg.input.dom
-  let frontier t = Semi_nca.Frontier.enum t.eg.input.df
-  let to_set = Fn.compose Lset.of_sequence @@ Seq.filter ~f:not_pseudo
-
-  (* Effectively, we are computing the dominator-subtree closure
-     over the iterated dominance frontier of `l`.
-
-     This helps answer the question of where the computation of a
-     given instruction is "available" in the CFG.
-  *)
-  let rec closure ?(self = true) t env l =
-    let c = match Hashtbl.find env.closure l with
-      | Some c -> c
-      | None ->
-        let c =
-          frontier t l |> Seq.filter ~f:not_pseudo |>
-          (* A block can be in its own dominance frontier, so
-             we need to avoid an infinite loop. *)
-          Seq.filter ~f:(Fn.non @@ Label.equal l) |>
-          (* Additionally, we don't want to follow back-edges. This
-             can happen when a node in our frontier is, for example,
-             a loop header. *)
-          Seq.filter ~f:(fun parent -> not @@ dominates t ~parent l) |>
-          Seq.map ~f:(closure t env) |>
-          Seq.fold ~init:(to_set @@ descendants t l) ~f:(++) in
-        Hashtbl.set env.closure ~key:l ~data:c;
-        c in
-    if self then Lset.add c l else Lset.remove c l
-
   (* Try the real ID first before moving on to the canonical ID. This could
      happen if we rescheduled a newer term before we unioned it with an older
      term. *)
@@ -371,12 +345,14 @@ module Hoisting = struct
       if id = cid then s else Common.movedof t.eg cid
     else s
 
+  let resolve_label t l =
+    match Resolver.resolve t.eg.input.reso l with
+    | Some `insn (_, b, _, _) -> Blk.label b
+    | Some `blk b -> Blk.label b
+    | None -> assert false
+
   let moved_blks t id cid =
-    find_moved t id cid |> Lset.map ~f:(fun l ->
-        match Resolver.resolve t.eg.input.reso l with
-        | Some `insn (_, b, _, _) -> Blk.label b
-        | Some `blk b -> Blk.label b
-        | None -> assert false)
+    find_moved t id cid |> Lset.map ~f:(resolve_label t)
 
   let rec post_dominated t l bs =
     match Semi_nca.Tree.parent t.eg.input.pdom l with
@@ -385,7 +361,7 @@ module Hoisting = struct
 
   (* See if there exists a cycle starting from `l` that does not intersect
      with `bs`. *)
-  let exists_bypass t l bs =
+  let exists_bypass t l id cid bs =
     let rec loop q = match Stack.pop q with
       | None -> false
       | Some (n, vis) when Lset.mem vis n ->
@@ -395,28 +371,77 @@ module Hoisting = struct
         Cfg.Node.succs n t.eg.input.cfg |>
         Seq.iter ~f:(fun s -> Stack.push q (s, vis));
         loop q in
-    Loops.mem t.eg.input.loop l &&
-    loop (Stack.singleton (l, Lset.empty))
+    let res =
+      Loops.mem t.eg.input.loop l &&
+      loop (Stack.singleton (l, Lset.empty)) in
+    Logs.debug (fun m ->
+        m "%s: %a is post-dominated: bs=%s, id=%d, cid=%d, res=%b%!"
+          __FUNCTION__ Label.pp l
+          (Lset.to_list bs |> List.to_string ~f:Label.to_string)
+          id cid res);
+    res
+
+  (* Given all of the uses we know of, compute the points
+     where the definition will be killed:
+
+     kills = {k | ∃ u ∈ uses s.t. k ∈ succs(u) ∧ k ∉ uses}
+  *)
+  let compute_kills t uses =
+    Lset.fold uses ~init:Lset.empty ~f:(fun init u ->
+        Cfg.Node.succs u t.eg.input.cfg |>
+        Seq.filter ~f:(Fn.non @@ Lset.mem uses) |>
+        Seq.fold ~init ~f:Lset.add)
+
+  (* See if there exists a path, starting from `l`, that avoids
+     touching any block in `uses` and either reaches the end of
+     the function, or one of the blocks in `kills`.
+
+     `kills` is the set of blocks where, if we were to place the
+     instruction at `l`, its live range would end.
+
+     pre: `kills` and `uses` are disjoint
+  *)
+  let is_partial_redundancy_pathwise t l id cid ~uses =
+    let kills = compute_kills t uses in
+    let rec loop q = match Stack.pop q with
+      | None -> false
+      | Some (n, vis) when Lset.mem vis n -> loop q
+      | Some (n, _) when Lset.mem uses n -> loop q
+      | Some (n, _) when Lset.mem kills n -> true
+      | Some (n, _) when Label.(n = pseudoexit) -> true
+      | Some (n, vis) ->
+        let vis = Lset.add vis n in
+        Cfg.Node.succs n t.eg.input.cfg |>
+        Seq.iter ~f:(fun s -> Stack.push q (s, vis));
+        loop q in
+    let res = loop @@ Stack.singleton (l, Lset.empty) in
+    Logs.debug (fun m ->
+        m "%s: l=%a, id=%d cid=%d, uses=%s, kills=%s, res=%b%!"
+          __FUNCTION__ Label.pp l id cid
+          (Lset.to_list uses |> List.to_string ~f:Label.to_string)
+          (Lset.to_list kills |> List.to_string ~f:Label.to_string)
+          res);
+    res
 
   (* When we "move" duplicate nodes up to the LCA (lowest common ancestor)
      in the dominator tree, we might be introducing a partial redundancy.
      This means that, at the LCA, the node is not going to be used on all
      paths that are dominated by it, so we need to do a simple analysis to
      see if this is the case. *)
-  let is_partial_redundancy t env l id cid =
+  let is_partial_redundancy t l id cid =
     (* If this node is deliberately placed here then allow it. *)
     not (Common.is_pinned t.eg id) && begin
+      Logs.debug (fun m ->
+          m "%s: checking %a, id=%d, cid=%d%!"
+            __FUNCTION__ Label.pp l id cid);
       (* Get the blocks associated with the labels that were
          "moved" for this node. *)
-      let l = match Resolver.resolve t.eg.input.reso l with
-        | Some `insn (_, b, _, _) -> Blk.label b
-        | Some `blk _ -> l
-        | None -> assert false in
       let bs = moved_blks t id cid in
       (* An empty set means that nobody uses this value. *)
       Lset.is_empty bs || begin
         (* If we're being used in the candidate block then this is trivially
            not a partial redundancy. *)
+        let l = resolve_label t l in
         not (Lset.mem bs l) && begin
           (* If one of these blocks post-dominates the block that we're
              moving to, then it is safe to allow the move to happen,
@@ -428,28 +453,18 @@ module Hoisting = struct
           if post_dominated t l bs then
             (* Check if we can reach the target block via a backedge
                without visiting any of the blocks we moved from. *)
-            exists_bypass t l bs
+            exists_bypass t l id cid bs
           else
-            (* For each of these blocks, get its reflexive transitive
-               closure in the dominator tree, and union them together. *)
-            let a = Lset.fold bs ~init:Lset.empty
-                ~f:(fun acc l -> acc ++ closure t env l) in
-            (* Get the non-reflexive transitive closure of the block
-               that we moved to. *)
-            let b = closure t env l ~self:false in
-            (* If these sets are not equal, then we have a partial
-               redundancy, and thus need to duplicate code. In the
-               case where the closure includes our target block `l`,
-               we want to exclude it, since the closure for `l` will
-               exclude itself. *)
-            not @@ Lset.equal (Lset.remove a l) b
+            (* Check if `bs` intersects on all paths where `id` can
+               be used. *)
+            is_partial_redundancy_pathwise t l id cid ~uses:bs
         end
       end
     end
 
-  let should_skip t env l id cid =
-    Z.testbit t.impure cid ||
-    is_partial_redundancy t env l id cid
+  let should_skip t l id cid =
+    Bitset.Id.mem t.impure cid ||
+    is_partial_redundancy t l id cid
 
   (* If any nodes got moved up to this label, then we should check
      to see if it is eligible for this code motion optimization.
@@ -471,7 +486,10 @@ module Hoisting = struct
       Context.Seq.iter ~f:(fun (id, cid) -> match extract t id with
           | None -> extract_fail l id
           | Some e ->
-            Context.unless (should_skip t env l id cid) @@ fun () ->
+            let@ () = Context.unless (should_skip t l id cid) in
+            Logs.debug (fun m ->
+                m "%s: id=%d, cid=%d was moved to l=%a, OK to hoist%!"
+                  __FUNCTION__ id cid Label.pp l);
             pure t env e >>| ignore)
 end
 
@@ -483,7 +501,7 @@ let reify t env l =
   | Some id -> match extract t id with
     | None -> extract_fail l @@ Common.find t.eg id
     | Some (E (Id {canon; _}, op, args) as e)
-      when not @@ Z.testbit t.impure canon ->
+      when not @@ Bitset.Id.mem t.impure canon ->
       (* There may be an opportunity to "sink" this instruction,
          which is the dual of the "hoisting" optimization below.
          Since this is a pure operation, we can wait until it is
@@ -492,10 +510,21 @@ let reify t env l =
       begin match op, args with
         | Oset _, [E (Id {canon; _}, _, _)]
           when Common.is_pinned t.eg canon ->
+          Logs.debug (fun m ->
+              m "%s: pinned: id=%d, l=%a%!"
+                __FUNCTION__ id Label.pp l);
           exp t env l e
-        | _ -> !!()
+        | _ ->
+          Logs.debug (fun m ->
+              m "%s: delaying CFG extraction of id=%d, l=%a%!"
+                __FUNCTION__ id Label.pp l);
+          !!()
       end
-    | Some e -> exp t env l e
+    | Some e ->
+      Logs.debug (fun m ->
+          m "%s: eagerly extracting id=%d to l=%a in CFG%!"
+            __FUNCTION__ id Label.pp l);
+      exp t env l e
 
 (* Rewrite a single instruction. *)
 let step_insn t env i =
@@ -533,8 +562,7 @@ let find_insn env l =
   Hashtbl.find env.insn l |> Option.map ~f:(Insn.create ~label:l)
 
 (* Find any new instructions to be prepended directly before label `l`.
-   Note that the ordering is, by default, from oldest to newest ID, but
-   this can be reversed for an efficient `rev_append` as seen below. *)
+   The order can be reversed for an efficient `rev_append` as seen below. *)
 let find_news ?(rev = false) env l =
   Hashtbl.find env.news l |>
   Option.value_map ~default:[] ~f:(fun p ->
diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml
index 00e8bf55..596cbf76 100644
--- a/src/lib/egraph/extractor/extractor_core.ml
+++ b/src/lib/egraph/extractor/extractor_core.ml
@@ -3,14 +3,14 @@ open Egraph_common
 open Monads.Std
 open Virtual
 
-(* We store the canonical and real IDs to help us determine
-   the ordering when reifying back to the CFG representation.
-
-   Canonical IDs help us extract the best term, but the real
-   ID of the term determines the ordering; in particular, we
-   order from oldest to newest. This makes sense since the way
-   we build terms in the e-graph will be such that terms with
-   a newer ID will always depend on an ID that is older.
+(* Keep track of the provenance of extracted nodes.
+
+   We were previously using the real IDs for determining
+   the order by which we place new instructions, but that
+   is no longer guaranteed. In principle, lower IDs never
+   depend on higher IDs to compute their results, but when
+   we reschedule instructions in the CFG this is no longer
+   useful.
 *)
 type prov =
   | Label of Label.t
@@ -37,28 +37,41 @@ module Cost : sig
   val pure : int -> t
   val incr : t -> t
   val add : t -> t -> t
+  val opc : t -> Int63.t
+  val depth : t -> Int63.t
+  val pp : Format.formatter -> t -> unit
 end = struct
-  include Int63
+  open Int63
+
+  type nonrec t = t
+
+  (* We want an unsigned comparison *)
+  let compare_u a b = compare (a lxor min_value) (b lxor min_value) [@@inline]
+  let (<) a b = Int.(compare_u a b < 0) [@@inline]
 
   let depth_bits = 12
   let depth_mask = pred (one lsl depth_bits)
   let opc_mask = lnot depth_mask
 
-  let opc c = c lsr depth_bits
-  let depth c = c land depth_mask
-  let create o d = (o lsl depth_bits) lor (d land depth_mask)
-  let pure o = of_int o lsl depth_bits
+  let opc c = c lsr depth_bits [@@inline]
+  let depth c = c land depth_mask [@@inline]
+  let create o d = (o lsl depth_bits) lor (d land depth_mask) [@@inline]
+  let pure o = of_int o lsl depth_bits [@@inline]
 
   (* Make sure the increment doesn't wrap around. *)
   let incr c =
     let d = depth c in
     (c land opc_mask) lor
     (if d = depth_mask then d else succ d)
+  [@@inline]
 
   let add x y =
     let o = opc x + opc y in
     let d = max (depth x) (depth y) in
     create o d
+  [@@inline]
+
+  let pp = pp
 end
 
 type cost = Cost.t
@@ -67,7 +80,7 @@ type t = {
   eg             : egraph;
   table          : (cost * enode) Id.Table.t;
   memo           : ext Id.Table.t;
-  mutable impure : Z.t;
+  mutable impure : Bitset.Id.t;
 }
 
 let rec pp_ext ppf = function
@@ -141,14 +154,28 @@ end = struct
           | Some _ | None -> term))
 end
 
+let debug_dump t =
+  Logs.debug (fun m ->
+      let pp ppf (cid, (c, n)) =
+        Format.fprintf ppf
+          "  %d:\n    cost: %a\n      depth: %a\n      opc: %a\n    node: %a%!"
+          cid Cost.pp c Int63.pp (Cost.depth c) Int63.pp (Cost.opc c)
+          (Enode.pp ~node:(node t.eg)) n in
+      m "%s: $%s cost table:\n%a"
+        __FUNCTION__ (Func.name t.eg.input.fn)
+        (Format.pp_print_list pp
+           ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n"))
+        (Hashtbl.to_alist t.table))
+
 let init eg =
   let t = {
     eg;
     table = Id.Table.create ();
     memo = Id.Table.create ();
-    impure = Z.zero;
+    impure = Bitset.Id.empty;
   } in
   Saturation.go t;
+  debug_dump t;
   t
 
 let rec must_remain_fixed op args = match (op : Enode.op) with
@@ -184,15 +211,15 @@ let rec must_remain_fixed op args = match (op : Enode.op) with
   | _ -> false
 
 let prov t cid id op args =
-  if must_remain_fixed op args then begin
-    t.impure <- Z.(t.impure lor (one lsl cid));
+  if must_remain_fixed op args then
+    let () = t.impure <- Bitset.Id.set t.impure cid in
     match labelof t.eg cid with
     | Some l -> Label l
     | None when id = cid -> Id {canon = cid; real = id}
     | None -> match labelof t.eg id with
       | None -> Id {canon = cid; real = id}
       | Some l -> Label l
-  end else Id {canon = cid; real = id}
+  else Id {canon = cid; real = id}
 
 module O = Monad.Option
 
diff --git a/src/lib/float_stubs.c b/src/lib/float_stubs.c
index d271cb17..f42c7d7f 100644
--- a/src/lib/float_stubs.c
+++ b/src/lib/float_stubs.c
@@ -12,16 +12,19 @@
 
 #define Float_val(v) (*(float *)(Data_custom_val(v)))
 
-intnat cgen_float32_compare_unboxed(float f, float g) {
+intnat
+cgen_float32_compare_unboxed(float f, float g) {
   return (intnat)(f > g) - (intnat)(f < g) + (intnat)(f == f) -
          (intnat)(g == g);
 }
 
-int cgen_float32_compare_to_untagged(value f, value g) {
+int
+cgen_float32_compare_to_untagged(value f, value g) {
   return cgen_float32_compare_unboxed(Float_val(f), Float_val(g));
 }
 
-value cgen_float32_compare(value f, value g) {
+value
+cgen_float32_compare(value f, value g) {
   return Val_int(cgen_float32_compare_to_untagged(f, g));
 }
 
@@ -30,7 +33,8 @@ value cgen_float32_compare(value f, value g) {
    - ocaml/runtime/hash.c
    - ocaml/stdlib/float.ml
 */
-value cgen_float32_hash(value x) {
+value
+cgen_float32_hash(value x) {
   uint32_t h = caml_hash_mix_float(0, Float_val(x));
   h ^= h >> 16;
   h *= 0x85ebca6b;
@@ -41,22 +45,23 @@ value cgen_float32_hash(value x) {
 }
 
 static struct custom_operations cgen_float32_custom_ops = {
-    .identifier = (char *)"cgen_float32_custom_ops",
-    .finalize = custom_finalize_default,
-    .compare = cgen_float32_compare_to_untagged,
-    .hash = cgen_float32_hash,
-    .serialize = custom_serialize_default,
-    .deserialize = custom_deserialize_default,
-    .compare_ext = custom_compare_ext_default,
+  .identifier = (char *)"cgen_float32_custom_ops",
+  .finalize = custom_finalize_default,
+  .compare = cgen_float32_compare_to_untagged,
+  .hash = cgen_float32_hash,
+  .serialize = custom_serialize_default,
+  .deserialize = custom_deserialize_default,
+  .compare_ext = custom_compare_ext_default,
 #if OCAML_VERSION_MAJOR >= 4 && OCAML_VERSION_MINOR >= 8
-    .fixed_length = NULL,
+  .fixed_length = NULL,
 #endif
 };
 
 #define Alloc_float()                                                          \
   caml_alloc_custom(&cgen_float32_custom_ops, sizeof(float), 0, 1)
 
-value cgen_float32_of_float(value x) {
+value
+cgen_float32_of_float(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -64,21 +69,38 @@ value cgen_float32_of_float(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float_of_float32(value x) { return caml_copy_double(Float_val(x)); }
-value cgen_float32_is_zero(value x) { return Val_bool(Float_val(x) == 0.0f); }
-value cgen_float32_is_inf(value x) { return Val_bool(isinf(Float_val(x))); }
+value
+cgen_float_of_float32(value x) {
+  return caml_copy_double(Float_val(x));
+}
+
+value
+cgen_float32_is_zero(value x) {
+  return Val_bool(Float_val(x) == 0.0f);
+}
+
+value
+cgen_float32_is_inf(value x) {
+  return Val_bool(isinf(Float_val(x)));
+}
 
-value cgen_float32_is_negative(value x) {
+value
+cgen_float32_is_negative(value x) {
   return Val_bool(signbit(Float_val(x)));
 }
 
-value cgen_float32_is_nan(value x) { return Val_bool(isnan(Float_val(x))); }
+value
+cgen_float32_is_nan(value x) {
+  return Val_bool(isnan(Float_val(x)));
+}
 
-value cgen_float32_is_unordered(value x, value y) {
+value
+cgen_float32_is_unordered(value x, value y) {
   return Val_bool(isunordered(Float_val(x), Float_val(y)));
 }
 
-value cgen_float32_add(value x, value y) {
+value
+cgen_float32_add(value x, value y) {
   CAMLparam2(x, y);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -86,7 +108,8 @@ value cgen_float32_add(value x, value y) {
   CAMLreturn(f);
 }
 
-value cgen_float32_div(value x, value y) {
+value
+cgen_float32_div(value x, value y) {
   CAMLparam2(x, y);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -94,7 +117,8 @@ value cgen_float32_div(value x, value y) {
   CAMLreturn(f);
 }
 
-value cgen_float32_mul(value x, value y) {
+value
+cgen_float32_mul(value x, value y) {
   CAMLparam2(x, y);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -102,7 +126,8 @@ value cgen_float32_mul(value x, value y) {
   CAMLreturn(f);
 }
 
-value cgen_float32_neg(value x) {
+value
+cgen_float32_neg(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -110,7 +135,8 @@ value cgen_float32_neg(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_sub(value x, value y) {
+value
+cgen_float32_sub(value x, value y) {
   CAMLparam2(x, y);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -118,7 +144,8 @@ value cgen_float32_sub(value x, value y) {
   CAMLreturn(f);
 }
 
-value cgen_bits_of_float32(value x) {
+value
+cgen_bits_of_float32(value x) {
   CAMLparam1(x);
   CAMLlocal1(i);
   float f = Float_val(x);
@@ -126,7 +153,8 @@ value cgen_bits_of_float32(value x) {
   CAMLreturn(i);
 }
 
-value cgen_float32_of_bits(value x) {
+value
+cgen_float32_of_bits(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   uint32_t i = Int32_val(x);
@@ -135,32 +163,48 @@ value cgen_float32_of_bits(value x) {
   CAMLreturn(f);
 }
 
-value cgen_int8_of_float32(value x) { return Val_int((int8_t)Float_val(x)); }
-value cgen_int16_of_float32(value x) { return Val_int((int16_t)Float_val(x)); }
+value
+cgen_int8_of_float32(value x) {
+  return Val_int((int8_t)Float_val(x));
+}
 
-value cgen_int32_of_float32(value x) {
+value
+cgen_int16_of_float32(value x) {
+  return Val_int((int16_t)Float_val(x));
+}
+
+value
+cgen_int32_of_float32(value x) {
   return caml_copy_int32((int32_t)Float_val(x));
 }
 
-value cgen_int64_of_float32(value x) {
+value
+cgen_int64_of_float32(value x) {
   return caml_copy_int64((int64_t)Float_val(x));
 }
 
-value cgen_uint8_of_float32(value x) { return Val_int((uint8_t)Float_val(x)); }
+value
+cgen_uint8_of_float32(value x) {
+  return Val_int((uint8_t)Float_val(x));
+}
 
-value cgen_uint16_of_float32(value x) {
+value
+cgen_uint16_of_float32(value x) {
   return Val_int((uint16_t)Float_val(x));
 }
 
-value cgen_uint32_of_float32(value x) {
+value
+cgen_uint32_of_float32(value x) {
   return caml_copy_int32((uint32_t)Float_val(x));
 }
 
-value cgen_uint64_of_float32(value x) {
+value
+cgen_uint64_of_float32(value x) {
   return caml_copy_int64((uint64_t)Float_val(x));
 }
 
-value cgen_float32_of_int8(value x) {
+value
+cgen_float32_of_int8(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -168,7 +212,8 @@ value cgen_float32_of_int8(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_int16(value x) {
+value
+cgen_float32_of_int16(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -176,7 +221,8 @@ value cgen_float32_of_int16(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_int32(value x) {
+value
+cgen_float32_of_int32(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -184,7 +230,8 @@ value cgen_float32_of_int32(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_int64(value x) {
+value
+cgen_float32_of_int64(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -192,7 +239,8 @@ value cgen_float32_of_int64(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_uint8(value x) {
+value
+cgen_float32_of_uint8(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -200,7 +248,8 @@ value cgen_float32_of_uint8(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_uint16(value x) {
+value
+cgen_float32_of_uint16(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -208,7 +257,8 @@ value cgen_float32_of_uint16(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_uint32(value x) {
+value
+cgen_float32_of_uint32(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -216,7 +266,8 @@ value cgen_float32_of_uint32(value x) {
   CAMLreturn(f);
 }
 
-value cgen_float32_of_uint64(value x) {
+value
+cgen_float32_of_uint64(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = Alloc_float();
@@ -224,7 +275,8 @@ value cgen_float32_of_uint64(value x) {
   CAMLreturn(f);
 }
 
-value cgen_string_of_float32(value x) {
+value
+cgen_string_of_float32(value x) {
   CAMLparam1(x);
   CAMLlocal1(s);
   char buf[MAX_FLOAT_DIGITS] = {0};
@@ -233,27 +285,33 @@ value cgen_string_of_float32(value x) {
   CAMLreturn(s);
 }
 
-value cgen_float32_equal(value x, value y) {
+value
+cgen_float32_equal(value x, value y) {
   return Val_bool(Float_val(x) == Float_val(y));
 }
 
-value cgen_float32_ge(value x, value y) {
+value
+cgen_float32_ge(value x, value y) {
   return Val_bool(Float_val(x) >= Float_val(y));
 }
 
-value cgen_float32_gt(value x, value y) {
+value
+cgen_float32_gt(value x, value y) {
   return Val_bool(Float_val(x) > Float_val(y));
 }
 
-value cgen_float32_le(value x, value y) {
+value
+cgen_float32_le(value x, value y) {
   return Val_bool(Float_val(x) <= Float_val(y));
 }
 
-value cgen_float32_lt(value x, value y) {
+value
+cgen_float32_lt(value x, value y) {
   return Val_bool(Float_val(x) < Float_val(y));
 }
 
-value cgen_bits_of_float(value x) {
+value
+cgen_bits_of_float(value x) {
   CAMLparam1(x);
   CAMLlocal1(d);
   double f = Double_val(x);
@@ -261,7 +319,8 @@ value cgen_bits_of_float(value x) {
   CAMLreturn(d);
 }
 
-value cgen_float_of_bits(value x) {
+value
+cgen_float_of_bits(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   uint64_t i = Int64_val(x);
@@ -269,88 +328,115 @@ value cgen_float_of_bits(value x) {
   CAMLreturn(f);
 }
 
-value cgen_int8_of_float(value x) { return Val_int((int8_t)Double_val(x)); }
-value cgen_int16_of_float(value x) { return Val_int((int16_t)Double_val(x)); }
+value
+cgen_int8_of_float(value x) {
+  return Val_int((int8_t)Double_val(x));
+}
 
-value cgen_int32_of_float(value x) {
+value
+cgen_int16_of_float(value x) {
+  return Val_int((int16_t)Double_val(x));
+}
+
+value
+cgen_int32_of_float(value x) {
   return caml_copy_int32((int32_t)Double_val(x));
 }
 
-value cgen_int64_of_float(value x) {
+value
+cgen_int64_of_float(value x) {
   return caml_copy_int64((int64_t)Double_val(x));
 }
 
-value cgen_uint8_of_float(value x) { return Val_int((uint8_t)Double_val(x)); }
-value cgen_uint16_of_float(value x) { return Val_int((uint16_t)Double_val(x)); }
+value
+cgen_uint8_of_float(value x) {
+  return Val_int((uint8_t)Double_val(x));
+}
+value
+cgen_uint16_of_float(value x) {
+  return Val_int((uint16_t)Double_val(x));
+}
 
-value cgen_uint32_of_float(value x) {
+value
+cgen_uint32_of_float(value x) {
   return caml_copy_int32((uint32_t)Double_val(x));
 }
 
-value cgen_uint64_of_float(value x) {
+value
+cgen_uint64_of_float(value x) {
   return caml_copy_int64((uint64_t)Double_val(x));
 }
 
-value cgen_float_of_int8(value x) {
+value
+cgen_float_of_int8(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((int8_t)Int_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_int16(value x) {
+value
+cgen_float_of_int16(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((int16_t)Int_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_int32(value x) {
+value
+cgen_float_of_int32(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((int32_t)Int32_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_int64(value x) {
+value
+cgen_float_of_int64(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((int64_t)Int64_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_uint8(value x) {
+value
+cgen_float_of_uint8(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((uint8_t)Int_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_uint16(value x) {
+value
+cgen_float_of_uint16(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((uint16_t)Int_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_uint32(value x) {
+value
+cgen_float_of_uint32(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((uint32_t)Int32_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_of_uint64(value x) {
+value
+cgen_float_of_uint64(value x) {
   CAMLparam1(x);
   CAMLlocal1(f);
   f = caml_copy_double((double)((uint64_t)Int64_val(x)));
   CAMLreturn(f);
 }
 
-value cgen_float_is_unordered(value x, value y) {
+value
+cgen_float_is_unordered(value x, value y) {
   return Val_bool(isunordered(Double_val(x), Double_val(y)));
 }
 
-value cgen_float_is_ordered(value x, value y) {
+value
+cgen_float_is_ordered(value x, value y) {
   return Val_bool(!isunordered(Double_val(x), Double_val(y)));
 }
diff --git a/src/lib/hashmap.ml b/src/lib/hashmap.ml
index 297f6267..b069c846 100644
--- a/src/lib/hashmap.ml
+++ b/src/lib/hashmap.ml
@@ -4,8 +4,8 @@ open Regular.Std
 module Make(K : Hashtbl.Key) = struct
   module M = Map.Make(K)
   module P = Patricia_tree.Make(struct
-      include Int63
-      let size = 63
+      include Int
+      let size = Sys.int_size_in_bits
     end)
 
   type +'a t = 'a M.t P.t
@@ -21,10 +21,8 @@ module Make(K : Hashtbl.Key) = struct
   let empty = P.empty
   let is_empty = P.is_empty
 
-  let hash' k = Int63.of_int @@ K.hash k
-
   let find_exn t k =
-    hash' k |> P.find_exn t |>
+    K.hash k |> P.find_exn t |>
     Fn.flip Map.find k |> function
     | None -> raise Not_found
     | Some v -> v
@@ -35,17 +33,17 @@ module Make(K : Hashtbl.Key) = struct
   let mem t k = try ignore (find_exn t k); true with
     | Not_found -> false
 
-  let singleton k v = P.singleton (hash' k) (M.singleton k v)
+  let singleton k v = P.singleton (K.hash k) (M.singleton k v)
 
-  let set t ~key ~data = P.update t (hash' key) ~f:(function
+  let set t ~key ~data = P.update t (K.hash key) ~f:(function
       | Some m -> Map.set m ~key ~data
       | None -> M.singleton key data)
 
-  let add_multi t ~key ~data = P.update t (hash' key) ~f:(function
+  let add_multi t ~key ~data = P.update t (K.hash key) ~f:(function
       | Some m -> Map.add_multi m ~key ~data
       | None -> M.singleton key [data])
 
-  let add_exn t ~key ~data = P.update t (hash' key) ~f:(function
+  let add_exn t ~key ~data = P.update t (K.hash key) ~f:(function
       | None -> M.singleton key data
       | Some m -> match Map.add m ~key ~data with
         | `Duplicate -> raise Duplicate
@@ -54,23 +52,23 @@ module Make(K : Hashtbl.Key) = struct
   let add t ~key ~data = try `Ok (add_exn t ~key ~data) with
     | Duplicate -> `Duplicate
 
-  let remove t k = P.change t (hash' k)
+  let remove t k = P.change t (K.hash k)
       ~f:(Option.bind ~f:(fun m ->
           let m' = Map.remove m k in
           Option.some_if (not @@ Map.is_empty m') m'))
 
-  let update t k ~f = P.update t (hash' k) ~f:(function
+  let update t k ~f = P.update t (K.hash k) ~f:(function
       | None -> M.singleton k @@ f None
       | Some m -> Map.update m k ~f)
 
   let update_with t k ~has ~nil =
-    P.update_with t (hash' k)
+    P.update_with t (K.hash k)
       ~nil:(fun () -> M.singleton k @@ nil ())
       ~has:(fun m -> Map.update m k ~f:(function
           | Some v -> has v
           | None -> nil ()))
 
-  let change t k ~f = P.change t (hash' k) ~f:(function
+  let change t k ~f = P.change t (K.hash k) ~f:(function
       | None -> f None |> Option.map ~f:(fun v -> M.singleton k v)
       | Some m ->
         let m' = Map.change m k ~f in
diff --git a/src/lib/interval_tree.ml b/src/lib/interval_tree.ml
new file mode 100644
index 00000000..ac48b0b9
--- /dev/null
+++ b/src/lib/interval_tree.ml
@@ -0,0 +1,270 @@
+(* Implementation borrowed from BAP *)
+
+open Core
+open Regular.Std
+open Interval_tree_intf
+
+module Make(I : Interval) = struct
+  type key = I.t [@@deriving compare, sexp]
+  type point = I.point [@@deriving compare, sexp]
+
+  module P = Comparable.Make_plain(struct
+      type t = point [@@deriving compare, sexp]
+    end)
+
+  type +'a node = {
+    rhs      : 'a t;
+    lhs      : 'a t;
+    key      : key;
+    data     : 'a;
+    height   : int;
+    greatest : point;
+    least    : point;
+  } [@@deriving fields, sexp]
+
+  and +'a t = 'a node option [@@deriving sexp]
+
+  let height = Option.value_map ~default:0 ~f:height
+  let least = Option.map ~f:least
+  let greatest = Option.map ~f:greatest
+  let empty = None
+
+  let bound f lhs top rhs =
+    Option.merge lhs rhs ~f |>
+    Option.value_map ~default:top ~f:(f top)
+
+  let create lhs key data rhs =
+    let hl = height lhs   and hr = height rhs in
+    let mn = I.lower key  and mx = I.upper key in
+    let ll = least lhs    and lr = least rhs in
+    let gl = greatest lhs and gr = greatest rhs in
+    Some {
+      lhs; rhs; key; data;
+      height = max hl hr + 1;
+      least = bound P.min ll mn lr;
+      greatest = bound P.max gl mx gr;
+    }
+
+  let singleton key data = create None key data None
+
+  let rec min_binding = function
+    | Some {lhs = None; key; data; _} -> Some (key, data)
+    | Some {lhs; _} -> min_binding lhs
+    | None -> None
+
+  let rec max_binding = function
+    | Some {rhs = None; key; data; _} -> Some (key, data)
+    | Some {rhs; _} -> max_binding rhs
+    | None -> None
+
+  let bal l x d r =
+    let hl, hr = height l, height r in
+    if hl > hr + 2 then match l with
+      | None -> assert false
+      | Some t when height t.lhs >= height t.rhs ->
+        create t.lhs t.key t.data (create t.rhs x d r)
+      | Some t -> match t.rhs with
+        | None -> assert false
+        | Some rhs ->
+          create (create t.lhs t.key t.data rhs.lhs)
+            rhs.key rhs.data (create rhs.rhs x d r)
+    else if hr > hl + 2 then match r with
+      | None -> assert false
+      | Some t when height t.rhs >= height t.lhs ->
+        create (create l x d t.lhs) t.key t.data t.rhs
+      | Some t -> match t.lhs with
+        | None -> assert false
+        | Some lhs ->
+          create (create l x d lhs.lhs) lhs.key lhs.data
+            (create lhs.rhs t.key t.data t.rhs)
+    else create l x d r
+
+  let rec add map key data = match map with
+    | None -> singleton key data
+    | Some t ->
+      let c = I.compare key t.key in
+      if c = 0 then bal map key data None
+      else if c < 0
+      then bal (add t.lhs key data) t.key t.data t.rhs
+      else bal t.lhs t.key t.data (add t.rhs key data)
+
+  let is_inside key p =
+    let low = I.lower key and high = I.upper key in
+    P.between ~low ~high p
+
+  let lookup start a =
+    let open Seq.Generator in
+    let rec go = function
+      | None -> return ()
+      | Some t when P.(a < t.least || a > t.greatest) -> return ()
+      | Some t ->
+        if is_inside t.key a then
+          go t.lhs >>= fun () ->
+          yield (t.key, t.data) >>= fun () ->
+          go t.rhs
+        else
+          go t.lhs >>= fun () ->
+          go t.rhs in
+    start |> go |> run
+
+  let is_dominated t r =
+    let open I in
+    P.(lower r >= lower t.key && upper r <= upper t.key)
+
+  let has_intersections t r =
+    let open I in
+    let open P in
+    let p, q = lower t.key, upper t.key
+    and x, y = lower r, upper r in
+    if p <= x
+    then x <= q && p <= y
+    else p <= y && x <= q
+
+  let can't_be_in_tree t r =
+    let open I in
+    P.(lower r > t.greatest || upper r < t.least)
+
+  let can't_be_dominated t r =
+    let open I in
+    P.(lower r < t.least || upper r > t.greatest)
+
+  let query ~skip_if ~take_if start r =
+    let open Seq.Generator in
+    let rec go = function
+      | None -> return ()
+      | Some t when skip_if t r -> return ()
+      | Some t when take_if t r ->
+        go t.lhs >>= fun () ->
+        yield (t.key, t.data) >>= fun () ->
+        go t.rhs
+      | Some t ->
+        go t.lhs >>= fun () ->
+        go t.rhs in
+    start |> go |> run
+
+  let dominators m r = query m r
+      ~skip_if:can't_be_dominated
+      ~take_if:is_dominated
+
+  let intersections m r = query m r
+      ~skip_if:can't_be_in_tree
+      ~take_if:has_intersections
+
+  let dominates m r = not (Seq.is_empty (dominators m r))
+  let intersects m r = not (Seq.is_empty (intersections m r))
+  let contains m a = not (Seq.is_empty (lookup m a))
+
+  let[@tail_mod_cons] rec map m ~f = match m with
+    | None -> None
+    | Some m -> Some {
+        m with
+        lhs = (map[@tailcall false]) m.lhs ~f;
+        data = f m.data;
+        rhs = (map[@tailcall]) m.rhs ~f;
+      }
+
+  let[@tail_mod_cons] rec mapi m ~f = match m with
+    | None -> None
+    | Some m -> Some {
+        m with
+        lhs = (mapi[@tailcall false]) m.lhs ~f;
+        data = f m.key m.data;
+        rhs = (mapi[@tailcall]) m.rhs ~f;
+      }
+
+  let rec remove_min_binding = function
+    | Some {lhs = None; rhs; _} -> rhs
+    | Some t -> bal (remove_min_binding t.lhs) t.key t.data t.rhs
+    | None -> assert false
+
+  let splice t1 t2 = match t1,t2 with
+    | None, t | t, None -> t
+    | _ -> match min_binding t2 with
+      | Some (key, data) -> bal t1 key data (remove_min_binding t2)
+      | None -> assert false
+
+  let mem_equal x y =
+    let open I in
+    P.(lower x = lower y && upper x = upper y)
+
+  let rec remove map mem = match map with
+    | None -> None
+    | Some t when mem_equal t.key mem ->
+      splice (remove t.lhs mem) (remove t.rhs mem)
+    | Some t -> match I.compare mem t.key with
+      | 1 -> bal t.lhs t.key t.data (remove t.rhs mem)
+      | _ -> bal (remove t.lhs mem) t.key t.data t.rhs
+
+  let remove_if ~leave_if ~remove_if map mem =
+    let rec remove = function
+      | None -> None
+      | Some t when leave_if t mem -> Some t
+      | Some t when remove_if t mem ->
+        splice (remove t.lhs) (remove t.rhs)
+      | Some t ->
+        bal (remove t.lhs) t.key t.data (remove t.rhs) in
+    remove map
+
+  let remove_intersections map mem = remove_if map mem
+      ~leave_if:can't_be_in_tree
+      ~remove_if:has_intersections
+
+  let remove_dominators map mem = remove_if map mem
+      ~leave_if:can't_be_dominated
+      ~remove_if:is_dominated
+
+  let filter_mapi map ~f =
+    let rec fmap = function
+      | None -> None
+      | Some t -> match f t.key t.data with
+        | None -> splice (fmap t.lhs) (fmap t.rhs)
+        | Some data ->
+          bal (fmap t.lhs) t.key data (fmap t.rhs) in
+    fmap map
+
+  let filter_map map ~f = filter_mapi map ~f:(fun _ x -> f x)
+  let filter map ~f : _ t = filter_map map ~f:(fun x -> Option.some_if (f x) x)
+
+  let to_sequence start =
+    let open Seq.Generator in
+    let rec go = function
+      | None -> return ()
+      | Some t ->
+        go t.lhs >>= fun () ->
+        yield (t.key, t.data) >>= fun () ->
+        go t.rhs in
+    start |> go |> run
+
+  module C = Container.Make(struct
+      type 'a t = 'a node option
+
+      let rec fold m ~init ~f = Option.fold m ~init:init ~f:(fun acc m ->
+          fold m.rhs ~init:(f (fold m.lhs ~init:acc ~f) m.data) ~f)
+
+      let rec iter m ~f = Option.iter m ~f:(fun m ->
+          iter m.lhs ~f;
+          f m.data;
+          iter m.rhs ~f)
+
+      let iter  = `Custom iter
+      let length = `Define_using_fold
+    end)
+
+  let fold = C.fold
+  let count = C.count
+  let sum = C.sum
+  let iter = C.iter
+  let length = C.length
+  let is_empty = C.is_empty
+  let exists = C.exists
+  let mem = C.mem
+  let for_all = C.for_all
+  let find_map = C.find_map
+  let find = C.find
+  let to_list = C.to_list
+  let to_array = C.to_array
+  let min_elt = C.min_elt
+  let max_elt = C.max_elt
+  let fold_until = C.fold_until
+  let fold_result = C.fold_result
+end
diff --git a/src/lib/interval_tree.mli b/src/lib/interval_tree.mli
new file mode 100644
index 00000000..7fde5076
--- /dev/null
+++ b/src/lib/interval_tree.mli
@@ -0,0 +1,5 @@
+open Interval_tree_intf
+
+module Make(I : Interval) : S
+  with type key := I.t
+   and type point := I.point
diff --git a/src/lib/interval_tree_intf.ml b/src/lib/interval_tree_intf.ml
new file mode 100644
index 00000000..c59e7701
--- /dev/null
+++ b/src/lib/interval_tree_intf.ml
@@ -0,0 +1,40 @@
+open Core
+open Regular.Std
+
+module type Interval = sig
+  type t [@@deriving compare, sexp]
+  type point [@@deriving compare, sexp]
+  val lower : t -> point
+  val upper : t -> point
+end
+
+module type S = sig
+  type 'a t [@@deriving sexp_of]
+  type key
+  type point
+
+  val empty : 'a t
+  val singleton : key -> 'a -> 'a t
+  val least : 'a t -> point option
+  val greatest : 'a t -> point option
+  val min_binding : 'a t -> (key * 'a) option
+  val max_binding : 'a t -> (key * 'a) option
+  val add : 'a t -> key -> 'a -> 'a t
+  val dominators : 'a t -> key -> (key * 'a) seq
+  val intersections : 'a t -> key -> (key * 'a) seq
+  val intersects : 'a t -> key -> bool
+  val dominates : 'a t -> key -> bool
+  val contains : 'a t -> point -> bool
+  val lookup : 'a t -> point -> (key * 'a) seq
+  val map : 'a t -> f:('a -> 'b) -> 'b t
+  val mapi : 'a t -> f:(key -> 'a -> 'b) -> 'b t
+  val filter : 'a t -> f:('a -> bool) -> 'a t
+  val filter_map : 'a t -> f:('a -> 'b option) -> 'b t
+  val filter_mapi : 'a t -> f:(key -> 'a -> 'b option) -> 'b t
+  val remove : 'a t -> key -> 'a t
+  val remove_intersections : 'a t -> key -> 'a t
+  val remove_dominators : 'a t -> key -> 'a t
+  val to_sequence : 'a t -> (key * 'a) seq
+
+  include Container.S1 with type 'a t := 'a t
+end
diff --git a/src/lib/isel/isel.ml b/src/lib/isel/isel.ml
index 15b45f8c..93a169ec 100644
--- a/src/lib/isel/isel.ml
+++ b/src/lib/isel/isel.ml
@@ -34,12 +34,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
       fn;
       node = Vec.create ();
       typs = Vec.create ();
+      id2r = Vec.create ();
       cfg;
       dom = Semi_nca.compute (module Cfg) cfg Label.pseudoentry;
       rpo = init_rpo cfg;
       blks = Func.map_of_blks fn;
       v2id = Var.Table.create ();
-      id2r = Id.Table.create ();
       insn = Label.Table.create ();
       extra = Label.Table.create ();
       frame = needs_stack_frame fn;
diff --git a/src/lib/isel/isel_builder.ml b/src/lib/isel/isel_builder.ml
index d56bf05b..4b7512ff 100644
--- a/src/lib/isel/isel_builder.ml
+++ b/src/lib/isel/isel_builder.ml
@@ -21,20 +21,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
         "In Isel_builder.reg: invalid register %s in function $%s"
         r (Func.name t.fn) ()
 
-  let var t x = match Hashtbl.find t.v2id x with
+  let var t x = match getvar t x with
     | Some id -> !!id
     | None ->
       C.failf
         "In Isel_builder.var: unbound variable %a in function $%s"
         Var.pp x (Func.name t.fn) ()
 
-  let new_var t x ty = Hashtbl.find_or_add t.v2id x ~default:(fun () ->
-      let v = Rv.var (regcls ty) x in
-      let id = new_node ~ty t @@ Rv v in
-      Hashtbl.set t.v2id ~key:x ~data:id;
-      Hashtbl.set t.id2r ~key:id ~data:v;
-      id)
-
+  let newvar = newvar ~f:Rv.var
   let word = (Target.word M.target :> ty)
 
   let typeof_operand t : Virtual.operand -> ty C.t = function
@@ -122,7 +116,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
       | src -> src in
     let emit ?i dst src =
       let+ ty, id = operand' t @@ rewrite src in
-      let n = N (Omove, [new_var t dst ty; id]) in
+      let n = N (Omove, [newvar t dst ty; id]) in
       ignore @@ new_node ~l t n;
       Option.iter i ~f:(fun i -> status.(i) <- `moved) in
     let rec move_one i =
@@ -168,7 +162,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
          moves to be inserted into. *)
       let* l', ld' = if br then
           let+ l' = C.Label.fresh in
-          Hashtbl.add_multi t.extra ~key:l ~data:l';
+          addextra t l l';
           l', l'
         else !!(l, ld) in
       let+ () = windmill t l' moves in
@@ -189,17 +183,38 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     | #Virtual.global as g -> global t g
     | #Virtual.local as loc -> local ~br t l loc >>| snd
 
+  let eval_binop o a b = match a, b with
+    | `int (a, _), `int (b, _) ->
+      (Eval.binop_int o a b :> Virtual.const option)
+    | `float a, `float b ->
+      (Eval.binop_single o a b :> Virtual.const option)
+    | `double a, `double b ->
+      (Eval.binop_double o a b :> Virtual.const option)
+    | _ -> None
+
+  let eval_unop o = function
+    | `int (a, ty) ->
+      (Eval.unop_int o a ty :> Virtual.const option)
+    | `float a ->
+      (Eval.unop_single o a :> Virtual.const option)
+    | `double a ->
+      (Eval.unop_double o a :> Virtual.const option)
+    | _ -> None
+
   let binop t l x o a b =
-    let* a = operand t a in
-    let+ b = operand t b in
-    let n = N (Obinop o, [a; b]) in
     let ty = infer_ty_binop o in
-    let id = new_node ~ty t n in
+    let+ id = match eval_binop o a b with
+      | Some c -> !!(constant t c)
+      | None ->
+        let* a = operand t a in
+        let+ b = operand t b in
+        let n = N (Obinop o, [a; b]) in
+        new_node ~ty t n in
     let r = Rv.var (regcls ty) x in
     let rid = new_node ~ty t @@ Rv r in
     ignore @@ new_node ~l t @@ N (Omove, [rid; id]);
-    Hashtbl.set t.v2id ~key:x ~data:id;
-    Hashtbl.set t.id2r ~key:id ~data:r
+    setvar t x id;
+    setrv t id r
 
   let unop t l x o a =
     let* () = match o with
@@ -208,19 +223,20 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
           "In Isel_builder.unop: uitof is not supported by target %a"
           Target.pp M.target ()
       | _ -> !!() in
-    let+ a = operand t a in
     let ty = infer_ty_unop o in
-    (* Copy propagation *)
-    let id = match o with
-      | `copy _ -> a
-      | _ ->
-        let n = N (Ounop o, [a]) in
-        new_node ~ty t n in
+    let+ id = match eval_unop o a with
+      | Some c -> !!(constant t c)
+      | None -> match o with
+        | `copy _ -> operand t a (* copy propagation *)
+        | _ ->
+          let+ a = operand t a in
+          let n = N (Ounop o, [a]) in
+          new_node ~ty t n in
     let r = Rv.var (regcls ty) x in
     let rid = new_node ~ty t @@ Rv r in
     ignore @@ new_node ~l t @@ N (Omove, [rid; id]);
-    Hashtbl.set t.v2id ~key:x ~data:id;
-    Hashtbl.set t.id2r ~key:id ~data:r
+    setvar t x id;
+    setrv t id r
 
   let sel t l x ty c y n =
     let* c = var t c in
@@ -232,8 +248,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let r = Rv.var (regcls ty) x in
     let rid = new_node ~ty t @@ Rv r in
     ignore @@ new_node ~l t @@ N (Omove, [rid; id]);
-    Hashtbl.set t.v2id ~key:x ~data:id;
-    Hashtbl.set t.id2r ~key:id ~data:r
+    setvar t x id;
+    setrv t id r
 
   let call_args_stack_size t l f args =
     C.List.fold args ~init:0 ~f:(fun sz -> function
@@ -282,7 +298,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let+ r = reg t r in
     let ty = (ty :> ty) in
     let rid = new_node ~ty t @@ Rv (Rv.reg r) in
-    let xid = new_var t x ty in
+    let xid = newvar t x ty in
     let n = N (Omove, [xid; rid]) in
     ignore @@ new_node ~l t n
 
@@ -317,7 +333,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let ty' = (ty :> ty) in
     let n = N (Oload ty, [a]) in
     let lid = new_node ~ty:ty' t n in
-    let vid = new_var t x ty' in
+    let vid = newvar t x ty' in
     (* TODO: see if we can do a pessimistic alias analysis to forward
        the `Oload` node where this var appears, where possible. *)
     ignore @@ new_node ~l t @@ N (Omove, [vid; lid])
@@ -333,7 +349,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let+ r = reg t r in
     let ty = (ty :> ty) in
     let rid = new_node ~ty t @@ Rv (Rv.reg r) in
-    let xid = new_var t x ty in
+    let xid = newvar t x ty in
     let n = N (Omove, [xid; rid]) in
     ignore @@ new_node ~l t n
 
@@ -356,7 +372,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
   let stkargs t l x =
     assert t.frame;
     let rid = new_node ~ty:word t @@ Rv (Rv.reg R.fp) in
-    let xid = new_var t x word in
+    let xid = newvar t x word in
     let w = Target.word M.target in
     let off = Bv.(int M.stack_args_offset mod modulus (Type.sizeof_imm_base w)) in
     let oid = new_node ~ty:word t @@ N (Oint (off, (w :> Type.imm)), []) in
@@ -402,7 +418,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
   let sw t l ty i d tbl =
     let ty' = (ty :> ty) in
     let i = match i with
-      | `var x -> new_var t x ty'
+      | `var x -> newvar t x ty'
       | `sym (s, o) -> new_node ~ty:ty' t @@ N (Osym (s, o), []) in
     let* d, _ = local ~br:true t l d in
     let+ tbl =
@@ -423,7 +439,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let+ r = reg t r in
     let ty = (ty :> ty) in
     let rid = new_node ~ty t @@ Rv (Rv.reg r) in
-    let xid = new_var t x ty in
+    let xid = newvar t x ty in
     ignore @@ new_node ~l t @@ N (Omove, [xid; rid])
 
   let stkparam t l (x, o, ty) =
@@ -431,7 +447,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let w = Target.word M.target in
     let wi = (w :> Type.imm) in
     let wb = (w :> Type.basic) in
-    let xid = new_var t x ty' in
+    let xid = newvar t x ty' in
     (* Use the frame pointer. It will make our lives much easier. *)
     let rid = new_node ~ty:word t @@ Rv (Rv.reg R.fp) in
     let o' = o + M.stack_args_offset in
@@ -449,7 +465,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
      at that point we will have more information on how to lay out the stack.
   *)
   let slot t _l s =
-    let _sid = new_var t (Slot.var s) word in
+    let _sid = newvar t (Slot.var s) word in
     ()
 
   let step t l = match Label.Tree.find t.blks l with
diff --git a/src/lib/isel/isel_common.ml b/src/lib/isel/isel_common.ml
index 55db348d..654617dc 100644
--- a/src/lib/isel/isel_common.ml
+++ b/src/lib/isel/isel_common.ml
@@ -23,12 +23,12 @@ type 'r t = {
   fn    : func;
   node  : 'r node Vec.t;
   typs  : ty Uopt.t Vec.t;
+  id2r  : 'r Uopt.t Vec.t;
   cfg   : Cfg.t;
   dom   : Label.t Semi_nca.tree;
   rpo   : Label.t -> int;
   blks  : blk Label.Tree.t;
   v2id  : Id.t Var.Table.t;
-  id2r  : 'r Id.Table.t;
   insn  : Id.t list Label.Table.t;
   extra : Label.t list Label.Table.t;
   frame : bool;
@@ -41,12 +41,24 @@ let new_node ?l ?ty t n : Id.t =
   assert (id = Vec.length t.typs);
   Vec.push t.node n;
   Vec.push t.typs @@ Uopt.of_option ty;
+  Vec.push t.id2r Uopt.none;
   Option.iter l ~f:(fun key ->
       Hashtbl.add_multi t.insn ~key ~data:id);
   id
 
 let node t id = Vec.get_exn t.node id
 let typeof t id = Uopt.to_option @@ Vec.get_exn t.typs id
+let setvar t x id = Hashtbl.set t.v2id ~key:x ~data:id
+let getvar t x = Hashtbl.find t.v2id x
+let setrv t id r = Vec.set_exn t.id2r id @@ Uopt.some r
+let getrv t id = Uopt.to_option @@ Vec.get_exn t.id2r id
+let addextra t key data = Hashtbl.add_multi t.extra ~key ~data
+
+let newvar ~f t x ty = Hashtbl.find_or_add t.v2id x ~default:(fun () ->
+    let v = f (regcls ty) x in
+    let id = new_node ~ty t @@ Rv v in
+    setrv t id v;
+    id)
 
 let rec pp_node t ppr ppf id = match node t id with
   | Rv r -> Format.fprintf ppf "%a" ppr r
diff --git a/src/lib/isel/isel_match.ml b/src/lib/isel/isel_match.ml
index 2984d7a0..32703a2c 100644
--- a/src/lib/isel/isel_match.ml
+++ b/src/lib/isel/isel_match.ml
@@ -16,6 +16,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
   let wordi = (word :> Type.imm)
   let wordb = (word :> Type.basic)
 
+  type subst = Rv.t S.t
   type rule = (Rv.t, M.Insn.t) R.t
   type callback = (Rv.t, M.Insn.t) R.callback
 
@@ -65,18 +66,23 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     (* XXX: don't try this at home! The representations are exactly the
        same, but we need to erase the type constraints on the input. *)
     let pats : (Matcher.pat * callback list) list = Obj.magic rules in
-    let prog = Matcher.compile pats ~commute:true in
+    let name = Format.asprintf "%a-isel" Target.pp M.target in
+    let prog = Matcher.compile ~name pats ~commute:true in
     prog, VM.create ()
 
   (* Translate a substitution we got from the matcher
      into one that our rule callbacks can understand. *)
-  let map_subst_terms t (s : Matcher.subst) : Rv.t S.t =
+  let map_subst_terms t (s : Matcher.subst) : subst =
     let open S in
     let regvar id r = match typeof t id with
       | Some (#Type.basic as ty) -> Regvar (r, ty)
       | Some `v128 -> Regvar_v r
       | Some `flag -> Regvar (r, wordb)
-      | None -> raise_notrace Mismatch in
+      | None ->
+        Logs.debug (fun m ->
+            m "%s: no regvar for term %d: %a%!"
+              __FUNCTION__ id (pp_node t) id);
+        raise_notrace Mismatch in
     Map.map s ~f:(fun id ->
         let tm = match node t id with
           | N (Oaddr a, []) -> Imm (a, wordi)
@@ -89,9 +95,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
           | Callargs rs -> Callargs rs
           | Tbl (d, tbl) -> Table (d, tbl)
           | Rv r -> regvar id r
-          | N _ -> match Hashtbl.find t.id2r id with
-            | None -> raise_notrace Mismatch
-            | Some r -> regvar id r in
+          | N _ -> match getrv t id with
+            | Some r -> regvar id r
+            | None ->
+              Logs.debug (fun m ->
+                  m "%s: no regvar for term %d: %a%!"
+                    __FUNCTION__ id (pp_node t) id);
+              raise_notrace Mismatch
+        in
         S.{id; tm})
 
   let fail_init_matcher t l id =
@@ -112,13 +123,19 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
      result of computing ?y because future instructions may want to match on
      it.
   *)
-  let check_blank_move s n (p : Matcher.pat) = match n, p with
+  let check_blank_move t (s : subst) id (p : Matcher.pat) =
+    match node t id, p with
     | N (Omove, [_; _]),
       P (Omove, [V x; V y]) ->
       begin match Map.(find s x, find s y) with
         | Some x, Some y
-          when x.S.id = y.S.id
+          when x.id = y.id
             || S.equal_term Rv.equal x.tm y.tm ->
+          Logs.debug (fun m ->
+              m "%s: term %d: blank move to x=%d: %a from y=%d: %a%!"
+                __FUNCTION__ id
+                x.id (pp_node t) x.id
+                y.id (pp_node t) y.id);
           raise_notrace Mismatch
         | _ -> ()
       end
@@ -129,13 +146,23 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let* () = C.unless init @@ fail_init_matcher t l id in
     let rec loop () = match VM.one vm prog with
       | None -> !!None
-      | Some y ->
-        try
+      | Some y -> try
+          Logs.debug (fun m ->
+              let s = Map.to_alist @@ Y.subst y in
+              m "%s: insn %a, term %d, yielded:\n  rule: %d\n  subst: %s\n  pat: %a%!"
+                __FUNCTION__ Label.pp l id (Y.rule y)
+                (List.to_string s ~f:(fun (x, id) ->
+                     Format.asprintf "%s=%d" x id))
+                Matcher.pp_pat (Y.pat y));
           let s = map_subst_terms t @@ Y.subst y in
-          check_blank_move s (node t id) @@ Y.pat y;
+          check_blank_move t s id @@ Y.pat y;
           R.try_ s (Y.payload y) >>= function
           | Some _ as is -> !!is
-          | None -> loop ()
+          | None ->
+            Logs.debug (fun m ->
+                m "%s: no callbacks succeeded, looping again%!"
+                  __FUNCTION__);
+            loop ()
         with Mismatch -> loop () in
     loop () >>= function
     | None -> fail_match t l id ()
@@ -200,8 +227,8 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     >>| List.dedup_and_sort ~compare:M.Reg.compare
 
   let run t =
-    let* blks = transl_blks t in
     let* rets = transl_rets t in
+    let* blks = transl_blks t in
     let dict = Func.dict t.fn in
     let dict = if not t.frame then dict
       else Dict.set dict Pseudo.Func.Tag.needs_stack_frame () in
diff --git a/src/lib/machine/machine.ml b/src/lib/machine/machine.ml
index 65729865..df1d9ca0 100644
--- a/src/lib/machine/machine.ml
+++ b/src/lib/machine/machine.ml
@@ -1 +1,11 @@
+open Core
+
 module X86 = X86
+
+let targets = [
+  X86.Amd64_sysv.target;
+]
+
+let force_initialization () =
+  Logs.debug (fun m -> m "forcing initialization of targets%!");
+  ignore (Sys.opaque_identity targets)
diff --git a/src/lib/machine/machine.mli b/src/lib/machine/machine.mli
index d842c8f8..3a890dbd 100644
--- a/src/lib/machine/machine.mli
+++ b/src/lib/machine/machine.mli
@@ -7,3 +7,6 @@ module X86 : sig
     val target : Target.t
   end
 end
+
+(** Ensures that all targets are registered. *)
+val force_initialization : unit -> unit
diff --git a/src/lib/machine/x86/x86_amd64_common.ml b/src/lib/machine/x86/x86_amd64_common.ml
index 643228ab..828d8008 100644
--- a/src/lib/machine/x86/x86_amd64_common.ml
+++ b/src/lib/machine/x86/x86_amd64_common.ml
@@ -412,11 +412,13 @@ module Insn = struct
     | CMOVcc of cc
     | CMP
     | CVTSD2SI
+    | CVTTSD2SI
     | CVTSD2SS
     | CVTSI2SD
     | CVTSI2SS
     | CVTSS2SD
     | CVTSS2SI
+    | CVTTSS2SI
     | DIVSD
     | DIVSS
     | IMUL2
@@ -463,11 +465,13 @@ module Insn = struct
     | CMOVcc cc -> Format.fprintf ppf "cmov%a" pp_cc cc
     | CMP -> Format.fprintf ppf "cmp"
     | CVTSD2SI -> Format.fprintf ppf "cvtsd2si"
+    | CVTTSD2SI -> Format.fprintf ppf "cvttsd2si"
     | CVTSD2SS -> Format.fprintf ppf "cvtsd2ss"
     | CVTSI2SD -> Format.fprintf ppf "cvtsi2sd"
     | CVTSI2SS -> Format.fprintf ppf "cvtsi2ss"
     | CVTSS2SD -> Format.fprintf ppf "cvtss2sd"
     | CVTSS2SI -> Format.fprintf ppf "cvtss2si"
+    | CVTTSS2SI -> Format.fprintf ppf "cvttss2si"
     | DIVSD -> Format.fprintf ppf "divsd"
     | DIVSS -> Format.fprintf ppf "divss"
     | IMUL2 -> Format.fprintf ppf "imul"
@@ -608,107 +612,109 @@ module Insn = struct
     Regvar.Set.of_list @@
     List.map regs ~f:Regvar.reg
 
-  (* Registers read by an instruction. *)
-  let reads = function
-    | Two (op, a, b) ->
-      begin match op with
-        | ADD
-        | ADDSD
-        | ADDSS
-        | AND
-        | CMP
-        | DIVSD
-        | DIVSS
-        | IMUL2
-        | MULSD
-        | MULSS
-        | OR
-        | ROL
-        | ROR
-        | SAR
-        | SHL
-        | SHR
-        | SUB
-        | SUBSD
-        | SUBSS
-        | TEST_
-        | UCOMISD
-        | UCOMISS
-          -> rset [a; b]
-        | CMOVcc _
-          (* We introduce a dependency on `a` so that any previous
-             writes to it will be preserved.
-
-             This is morally equivalent to:
-
-             `x := cc ? b : a`
-          *)
-          ->
-          Set.union (rset' [`rflags])
-            (Set.union (rset_reg [a]) (rset [b]))
-        | BSF
-        | BSR
-        | CVTSD2SI
-        | CVTSD2SS
-        | CVTSI2SD
-        | CVTSI2SS
-        | CVTSS2SD
-        | CVTSS2SI
-        | LEA
-        | MOVSX
-        | MOVSXD
-        | MOVZX
-        | POPCNT
-          -> rset [b]
-        | MOV
-        | MOV_
-        | MOVD
-        | MOVDQA
-        | MOVQ
-        | MOVSD
-        | MOVSS
-          -> Set.union (rset_mem [a]) (rset [b])
-        | XOR
-        | XORPD
-        | XORPS ->
-          begin match a, b with
-            | Oreg (a, _), Oreg (b, _) when Regvar.(a = b)
-              (* Special case for XORing with self: this isn't really a use
-                 of the register, since we're just assigning 0. *)
-              -> Regvar.Set.empty
-            | _ -> rset [a; b]
-          end
+  let binop_reads op a b = match op with
+    | ADD
+    | ADDSD
+    | ADDSS
+    | AND
+    | CMP
+    | DIVSD
+    | DIVSS
+    | IMUL2
+    | MULSD
+    | MULSS
+    | OR
+    | ROL
+    | ROR
+    | SAR
+    | SHL
+    | SHR
+    | SUB
+    | SUBSD
+    | SUBSS
+    | TEST_
+    | UCOMISD
+    | UCOMISS
+      -> rset [a; b]
+    | CMOVcc _
+      (* We introduce a dependency on `a` so that any previous
+         writes to it will be preserved.
+
+         This is morally equivalent to:
+
+         `x := cc ? b : a`
+      *)
+      ->
+      Set.union (rset' [`rflags])
+        (Set.union (rset_reg [a]) (rset [b]))
+    | BSF
+    | BSR
+    | CVTSD2SI
+    | CVTTSD2SI
+    | CVTSD2SS
+    | CVTSI2SD
+    | CVTSI2SS
+    | CVTSS2SD
+    | CVTSS2SI
+    | CVTTSS2SI
+    | LEA
+    | MOVSX
+    | MOVSXD
+    | MOVZX
+    | POPCNT
+      -> rset [b]
+    | MOV
+    | MOV_
+    | MOVD
+    | MOVDQA
+    | MOVQ
+    | MOVSD
+    | MOVSS
+      -> Set.union (rset_mem [a]) (rset [b])
+    | XOR
+    | XORPD
+    | XORPS ->
+      begin match a, b with
+        | Oreg (a, _), Oreg (b, _) when Regvar.(a = b)
+          (* Special case for XORing with self: this isn't really a use
+             of the register, since we're just assigning 0. *)
+          -> Regvar.Set.empty
+        | _ -> rset [a; b]
       end
-    | One (op, a) ->
-      begin match op with
-        | CALL args ->
-          Set.union (rset' [`rsp]) @@
-          Set.union (rset_mem [a]) @@
-          Regvar.Set.of_list args
-        | DEC
-        | INC
-        | NEG
-        | NOT
-          -> rset [a]
-        | DIV
-        | IDIV ->
-          begin match a with
-            | Oreg (_, `i8)
-              -> Set.union (rset' [`rax]) (rset [a])
-            | _ 
-              -> Set.union (rset' [`rax; `rdx]) (rset [a])
-          end
-        | IMUL1
-        | MUL
+
+  let unop_reads op a = match op with
+    | CALL args ->
+      Set.union (rset' [`rsp]) @@
+      Set.union (rset [a]) @@
+      Regvar.Set.of_list args
+    | DEC
+    | INC
+    | NEG
+    | NOT
+      -> rset [a]
+    | DIV
+    | IDIV ->
+      begin match a with
+        | Oreg (_, `i8)
           -> Set.union (rset' [`rax]) (rset [a])
-        | SETcc _
-          (* SETcc will "read" the destination in the sense that
-             only the lower 8 bits are changed. *)
-          -> Set.union (rset' [`rflags]) (rset [a])
-        | POP
-        | PUSH
-          -> Set.union (rset' [`rsp]) (rset_mem [a])
+        | _
+          -> Set.union (rset' [`rax; `rdx]) (rset [a])
       end
+    | IMUL1
+    | MUL
+      -> Set.union (rset' [`rax]) (rset [a])
+    | SETcc _
+      (* SETcc will "read" the destination in the sense that
+         only the lower 8 bits are changed. *)
+      -> Set.union (rset' [`rflags]) (rset [a])
+    | POP
+    | PUSH
+      -> Set.union (rset' [`rsp]) (rset_mem [a])
+
+  (* Registers read by an instruction. *)
+  let reads = function
+    | Two (op, a, b) -> binop_reads op a b
+    | One (op, a) -> unop_reads op a
     | JMP (Jind a) -> rset [a]
     | CDQ
     | CQO
@@ -730,86 +736,88 @@ module Insn = struct
     | FP64 _
       -> Regvar.Set.empty
 
+  let binop_writes op a _b = match op with
+    | ADD
+    | AND
+    | BSF
+    | BSR
+    | IMUL2
+    | OR
+    | POPCNT
+    | ROL
+    | ROR
+    | SAR
+    | SHL
+    | SHR
+    | SUB
+    | XOR
+      -> Set.union (rset' [`rflags]) (rset_reg [a])
+    | ADDSD
+    | ADDSS
+    | CMOVcc _
+    | CVTSD2SI
+    | CVTTSD2SI
+    | CVTSD2SS
+    | CVTSI2SD
+    | CVTSI2SS
+    | CVTSS2SD
+    | CVTSS2SI
+    | CVTTSS2SI
+    | DIVSD
+    | DIVSS
+    | LEA
+    | MOV
+    | MOV_
+    | MOVD
+    | MOVDQA
+    | MOVQ
+    | MOVSD
+    | MOVSS
+    | MOVSX
+    | MOVSXD
+    | MOVZX
+    | MULSD
+    | MULSS
+    | SUBSD
+    | SUBSS
+    | XORPD
+    | XORPS
+      -> rset_reg [a]
+    | CMP
+    | TEST_
+    | UCOMISD
+    | UCOMISS
+      -> rset' [`rflags]
+
+  let unop_writes call op a = match op with
+    | DEC
+    | INC
+    | NEG
+    | NOT
+      -> Set.union (rset' [`rflags]) (rset_reg [a])
+    | SETcc _
+      -> rset_reg [a]
+    | CALL _
+      -> Set.union (rset' [`rsp; `rflags]) call
+    | PUSH
+      -> rset' [`rsp]
+    | DIV
+    | IDIV
+    | IMUL1
+    | MUL ->
+      begin match a with
+        | Oreg (_, `i8)
+          -> rset' [`rax; `rflags]
+        | _
+          -> rset' [`rax; `rdx; `rflags]
+      end
+    | POP
+      -> Set.union (rset' [`rsp]) (rset_reg [a])
+
   (* Registers written to by an instruction. *)
   let writes call = function
-    | Two (op, a, _) ->
-      begin match op with
-        | ADD
-        | AND
-        | BSF
-        | BSR
-        | IMUL2
-        | OR
-        | POPCNT
-        | ROL
-        | ROR
-        | SAR
-        | SHL
-        | SHR
-        | SUB
-        | XOR
-          -> Set.union (rset' [`rflags]) (rset_reg [a])
-        | ADDSD
-        | ADDSS
-        | CMOVcc _
-        | CVTSD2SI
-        | CVTSD2SS
-        | CVTSI2SD
-        | CVTSI2SS
-        | CVTSS2SD
-        | CVTSS2SI
-        | DIVSD
-        | DIVSS
-        | LEA
-        | MOV
-        | MOV_
-        | MOVD
-        | MOVDQA
-        | MOVQ
-        | MOVSD
-        | MOVSS
-        | MOVSX
-        | MOVSXD
-        | MOVZX
-        | MULSD
-        | MULSS
-        | SUBSD
-        | SUBSS
-        | XORPD
-        | XORPS
-          -> rset_reg [a]
-        | CMP
-        | TEST_
-        | UCOMISD
-        | UCOMISS
-          -> rset' [`rflags]
-      end
-    | One (op, a) ->
-      begin match op with
-        | DEC
-        | INC
-        | NEG
-        | NOT
-          -> Set.union (rset' [`rflags]) (rset_reg [a])
-        | SETcc _
-          -> rset_reg [a]
-        | CALL _
-          -> Set.union (rset' [`rsp; `rflags]) call
-        | PUSH
-          -> rset' [`rsp]
-        | DIV
-        | IDIV
-        | IMUL1
-        | MUL ->
-          begin match a with
-            | Oreg (_, `i8)
-              -> rset' [`rax; `rflags]
-            | _
-              -> rset' [`rax; `rdx; `rflags]
-          end
-        | POP
-          -> Set.union (rset' [`rsp]) (rset_reg [a])
-      end
+    | Two (op, a, b) -> binop_writes op a b
+    | One (op, a) -> unop_writes call op a
     | IMUL3 (a, _, _)
       -> Set.union (rset' [`rflags]) (rset_reg [a])
     | RET
@@ -837,78 +845,80 @@ module Insn = struct
     | Omem _ -> true
     | _ -> false
 
+  let binop_writes_to_memory op a _b = match op with
+    | ADD
+    | ADDSD
+    | ADDSS
+    | AND
+    | DIVSD
+    | DIVSS
+    | MOV
+    | MOV_
+    | MOVD
+    | MOVDQA
+    | MOVQ
+    | MOVSD
+    | MOVSS
+    | MULSD
+    | MULSS
+    | OR
+    | ROL
+    | ROR
+    | SAR
+    | SHL
+    | SHR
+    | SUB
+    | SUBSD
+    | SUBSS
+    | XOR
+      -> is_mem a
+    | BSF (* illegal *)
+    | BSR (* illegal *)
+    | CMOVcc _ (* illegal *)
+    | CMP
+    | CVTSD2SI (* illegal *)
+    | CVTTSD2SI (* illegal *)
+    | CVTSD2SS (* illegal *)
+    | CVTSI2SD (* illegal *)
+    | CVTSI2SS (* illegal *)
+    | CVTSS2SI (* illegal *)
+    | CVTTSS2SI (* illegal *)
+    | CVTSS2SD (* illegal *)
+    | IMUL2
+    | LEA  (* illegal *)
+    | MOVSX (* illegal *)
+    | MOVZX (* illegal *)
+    | MOVSXD (* illegal *)
+    | POPCNT (* illegal *)
+    | TEST_
+    | UCOMISD
+    | UCOMISS
+    | XORPD (* illegal *)
+    | XORPS (* illegal *)
+      -> false
+
+  let unop_writes_to_memory op a = match op with
+    | DEC
+    | INC
+    | NEG
+    | NOT
+    | POP
+    | SETcc _
+      -> is_mem a
+    | CALL _ (* writes to stack *)
+    | PUSH (* writes to stack *)
+      -> true
+    | DIV
+    | IDIV
+    | IMUL1
+    | MUL
+      -> false
+
   (* "illegal" here means that it is illegal to have a memory
      operand in the destination. *)
   let writes_to_memory = function
-    | Two (op, a, _) ->
-      begin match op with
-        | ADD
-        | ADDSD
-        | ADDSS
-        | AND
-        | DIVSD
-        | DIVSS
-        | MOV
-        | MOV_
-        | MOVD
-        | MOVDQA
-        | MOVQ
-        | MOVSD
-        | MOVSS
-        | MULSD
-        | MULSS
-        | OR
-        | ROL
-        | ROR
-        | SAR
-        | SHL
-        | SHR
-        | SUB
-        | SUBSD
-        | SUBSS
-        | XOR
-          -> is_mem a
-        | BSF (* illegal *)
-        | BSR (* illegal *)
-        | CMOVcc _ (* illegal *)
-        | CMP
-        | CVTSD2SI (* illegal *)
-        | CVTSD2SS (* illegal *)
-        | CVTSI2SD (* illegal *)
-        | CVTSI2SS (* illegal *)
-        | CVTSS2SI (* illegal *)
-        | CVTSS2SD (* illegal *)
-        | IMUL2
-        | LEA  (* illegal *)
-        | MOVSX (* illegal *)
-        | MOVZX (* illegal *)
-        | MOVSXD (* illegal *)
-        | POPCNT (* illegal *)
-        | TEST_
-        | UCOMISD
-        | UCOMISS
-        | XORPD (* illegal *)
-        | XORPS (* illegal *)
-          -> false
-      end
-    | One (op, a) ->
-      begin match op with
-        | DEC
-        | INC
-        | NEG
-        | NOT
-        | POP
-        | SETcc _
-          -> is_mem a
-        | CALL _ (* writes to stack *)
-        | PUSH (* writes to stack *)
-          -> true
-        | DIV
-        | IDIV
-        | IMUL1
-        | MUL
-          -> false
-      end
+    | Two (op, a, b) -> binop_writes_to_memory op a b
+    | One (op, a) -> unop_writes_to_memory op a
     | CDQ
     | CQO
     | CWD
@@ -963,11 +973,13 @@ module Insn = struct
     let cmov cc a b = Two (CMOVcc cc, a, b)
     let cmp a b = Two (CMP, a, b)
     let cvtsd2si a b = Two (CVTSD2SI, a, b)
+    let cvttsd2si a b = Two (CVTTSD2SI, a, b)
     let cvtsd2ss a b = Two (CVTSD2SS, a, b)
     let cvtsi2sd a b = Two (CVTSI2SD, a, b)
     let cvtsi2ss a b = Two (CVTSI2SS, a, b)
     let cvtss2sd a b = Two (CVTSS2SD, a, b)
     let cvtss2si a b = Two (CVTSS2SI, a, b)
+    let cvttss2si a b = Two (CVTTSS2SI, a, b)
     let divsd a b = Two (DIVSD, a, b)
     let divss a b = Two (DIVSS, a, b)
     let imul2 a b = Two (IMUL2, a, b)
diff --git a/src/lib/machine/x86/x86_amd64_emit.ml b/src/lib/machine/x86/x86_amd64_emit.ml
index 6182031f..e5fa631c 100644
--- a/src/lib/machine/x86/x86_amd64_emit.ml
+++ b/src/lib/machine/x86/x86_amd64_emit.ml
@@ -66,36 +66,35 @@ let emit_func ppf (name, lnk) =
   Format.fprintf ppf ".section %s\n" section;
   if Linkage.export lnk then global ppf name;
   Format.fprintf ppf ".p2align 4\n";
-  Format.fprintf ppf "%s:\n" name;
-  Format.fprintf ppf "  endbr64\n"
+  Format.fprintf ppf "%s:\n" name
 
 let emit_blk ppf (l : Label.t) =
   Format.fprintf ppf "%a:\n" label l
 
+let emit_reg t ppf r = match r, t with
+  | (#Reg.sse as r), (#Type.fp | `v128) ->
+    Format.fprintf ppf "%a" Reg.pp_sse r
+  | (#Reg.gpr as r), `i8 ->
+    Format.fprintf ppf "%a" Reg.pp_gpr8 r
+  | (#Reg.gpr as r), `i16 ->
+    Format.fprintf ppf "%a" Reg.pp_gpr16 r
+  | (#Reg.gpr as r), `i32 ->
+    Format.fprintf ppf "%a" Reg.pp_gpr32 r
+  | (#Reg.gpr as r), `i64 ->
+    Format.fprintf ppf "%a" Reg.pp_gpr r
+  | `rip, `i64 ->
+    Format.fprintf ppf "rip"
+  | _ ->
+    invalid_argf "invalid register/type combo: %s/%s"
+      (Format.asprintf "%a" Reg.pp r)
+      (match t with
+       | `v128 -> "v128"
+       | #Type.basic as t ->
+         Format.asprintf "%a" Type.pp_basic t)
+      ()
+
 let emit_regvar t ppf rv = match Regvar.which rv with
-  | First r ->
-    begin match r, t with
-      | (#Reg.sse as r), (#Type.fp | `v128) ->
-        Format.fprintf ppf "%a" Reg.pp_sse r
-      | (#Reg.gpr as r), `i8 ->
-        Format.fprintf ppf "%a" Reg.pp_gpr8 r
-      | (#Reg.gpr as r), `i16 ->
-        Format.fprintf ppf "%a" Reg.pp_gpr16 r
-      | (#Reg.gpr as r), `i32 ->
-        Format.fprintf ppf "%a" Reg.pp_gpr32 r
-      | (#Reg.gpr as r), `i64 ->
-        Format.fprintf ppf "%a" Reg.pp_gpr r
-      | `rip, `i64 ->
-        Format.fprintf ppf "rip"
-      | _ ->
-        invalid_argf "invalid register/type combo: %s/%s"
-          (Format.asprintf "%a" Reg.pp r)
-          (match t with
-           | `v128 -> "v128"
-           | #Type.basic as t ->
-             Format.asprintf "%a" Type.pp_basic t)
-          ()
-    end
+  | First r -> emit_reg t ppf r
   | Second (x, _) ->
     invalid_argf "tried to emit a variable %s"
       (Format.asprintf "%a" Var.pp x) ()
diff --git a/src/lib/machine/x86/x86_amd64_isel.ml b/src/lib/machine/x86/x86_amd64_isel.ml
index 898abb80..9ec94c4c 100644
--- a/src/lib/machine/x86/x86_amd64_isel.ml
+++ b/src/lib/machine/x86/x86_amd64_isel.ml
@@ -58,6 +58,14 @@ let can_lea_ty = function
   | `i16 | `i32 | `i64 -> true
   | _ -> false
 
+let compare_u a b =
+  Int64.(compare (a lxor min_value) (b lxor min_value))
+
+let lt_u a b = compare_u a b <  0 [@@inline] [@@ocaml.warning "-32"]
+let le_u a b = compare_u a b <= 0 [@@inline] [@@ocaml.warning "-32"]
+let gt_u a b = compare_u a b >  0 [@@inline] [@@ocaml.warning "-32"]
+let ge_u a b = compare_u a b >= 0 [@@inline] [@@ocaml.warning "-32"]
+
 (* pre: `tbl` is non-empty
 
    TODO:
@@ -75,9 +83,10 @@ let adjust_table d tbl =
   let acc = Vec.create () in
   let _ = List.fold tbl ~init:lowest ~f:(fun p (v, l) ->
       let diff = Int64.(v - p) in
-      for _ = 0 to Int64.to_int_trunc diff - 1 do
-        Vec.push acc d;
-      done;
+      let rec loop i = if lt_u i diff then
+          let () = Vec.push acc d in
+          loop @@ Int64.succ i in
+      loop 0L;
       Vec.push acc l;
       Int64.succ v) in
   Vec.to_list acc, lowest, highest
@@ -128,10 +137,10 @@ end = struct
       !!![I.movsd (Oreg (x, xt)) (Oreg (y, yt))]
     | _ -> !!None
 
-  let move_ri_x_y env =
+  let move_ri_x_y ?(zx = false) env =
     let*! x, xt = S.regvar env "x" in
     let*! y, yt = S.imm env "y" in
-    let*! () = guard @@ Type.equal_basic xt (bty yt) in
+    let*! () = guard (zx || Type.equal_basic xt (bty yt)) in
     if Bv.(y = zero) then
       !!![xor_gpr_self x xt]
     else
@@ -1738,9 +1747,9 @@ end = struct
     let xt' = ftosi_ty xt in
     match tf with
     | `f32 ->
-      !!![I.cvtss2si (Oreg (x, xt')) (Oreg (y, yt))]
+      !!![I.cvttss2si (Oreg (x, xt')) (Oreg (y, yt))]
     | `f64 ->
-      !!![I.cvtsd2si (Oreg (x, xt')) (Oreg (y, yt))]
+      !!![I.cvttsd2si (Oreg (x, xt')) (Oreg (y, yt))]
     | _ -> !!None
 
   let ftosi_rf32_x_y ti env =
@@ -1769,9 +1778,9 @@ end = struct
     let xt' = ftoui_ty xt in
     match tf with
     | `f32 ->
-      !!![I.cvtss2si (Oreg (x, xt')) (Oreg (y, yt))]
+      !!![I.cvttss2si (Oreg (x, xt')) (Oreg (y, yt))]
     | `f64 ->
-      !!![I.cvtsd2si (Oreg (x, xt')) (Oreg (y, yt))]
+      !!![I.cvttsd2si (Oreg (x, xt')) (Oreg (y, yt))]
     | _ -> !!None
 
   let ftoui_rf32_x_y ti env =
@@ -1935,11 +1944,16 @@ end = struct
       ]
     | _ -> !!None
 
-  let call_sym_x env =
+  let call_sym_x_y env =
     let*! args = S.callargs env "x" in
     let*! s, o = S.sym env "y" in
     !!![I.call args (Osym (s, o))]
 
+  let call_r_x_y env =
+    let*! args = S.callargs env "x" in
+    let*! y, yt = S.regvar env "y" in
+    !!![I.call args (Oreg (y, yt))]
+
   let jmp_tbl_x_y env =
     let*! x, xt = S.regvar env "x" in
     let*! d, tbl = S.table env "y" in
@@ -1954,6 +1968,10 @@ end = struct
       let tbl, lowest, highest = adjust_table d tbl in
       let highest' = Int64.(highest - lowest) in
       let diff = Int64.(highest - highest') in
+      let diff_fits = fits_int32_pos diff in
+      let* tdiff = if diff_fits then !!None else
+          let+ r = C.Var.fresh >>| Rv.var GPR in
+          Some r in
       let* tl, tladdr = fresh_label_addr in
       let* tbase = C.Var.fresh >>| Rv.var GPR in
       let* tidx = C.Var.fresh >>| Rv.var GPR in
@@ -1981,11 +1999,16 @@ end = struct
             I.mov (Oreg (tidx, xt)) (Oreg (x, xt))
         ];
         (* Subtract the difference from the index if needed. *)
-        ( if Int64.(diff = 1L)
-          then [I.dec (Oreg (tidx, `i64))]
-          else if Int64.(diff > 0L)
-          then [I.sub (Oreg (tidx, `i64)) (Oimm (diff, `i64))]
-          else []
+        ( match diff with
+          | 0L -> []
+          | 1L -> [I.dec (Oreg (tidx, `i64))]
+          | _ when diff_fits ->
+            [I.sub (Oreg (tidx, `i64)) (Oimm (diff, `i64))]
+          | _ ->
+            let tdiff = Option.value_exn tdiff in [
+              I.mov (Oreg (tdiff, `i64)) (Oimm (diff, `i64));
+              I.sub (Oreg (tidx, `i64)) (Oreg (tdiff, `i64));
+            ]
         );
         [ (* Compare against highest value. *)
           I.cmp (Oreg (tidx, `i64)) (Oimm (highest', `i64));
@@ -2217,7 +2240,7 @@ end = struct
 
     let zext = [
       move_rr_x_y ~zx:true;
-      move_ri_x_y;
+      move_ri_x_y ~zx:true;
     ]
 
     let fext ty = [
@@ -2318,7 +2341,8 @@ end = struct
     ]
 
     let call = [
-      call_sym_x;
+      call_sym_x_y;
+      call_r_x_y;
     ]
   end
 
diff --git a/src/lib/machine/x86/x86_amd64_peephole.ml b/src/lib/machine/x86/x86_amd64_peephole.ml
index d351eb7b..af7724dc 100644
--- a/src/lib/machine/x86/x86_amd64_peephole.ml
+++ b/src/lib/machine/x86/x86_amd64_peephole.ml
@@ -10,20 +10,22 @@ module Lset = Label.Tree_set
 
 let decomp i = Insn.label i, Insn.insn i
 
-let map_insns fn t =
-  if Ltree.is_empty t then fn
-  else Func.map_blks fn ~f:(fun b ->
-      Blk.map_insns b ~f:(fun i ->
-          match Ltree.find t @@ Insn.label i with
-          | Some insn -> Insn.with_insn i insn
-          | None -> i))
-
-let filter_not_in fn t =
-  if Lset.is_empty t then fn
-  else Func.map_blks fn ~f:(fun b ->
-      Blk.insns b |> Seq.filter ~f:(fun i ->
-          not @@ Lset.mem t @@ Insn.label i) |>
-      Seq.to_list |> Blk.with_insns b)
+let map_insns changed fn t =
+  if Ltree.is_empty t then fn else
+    let () = changed := true in
+    Func.map_blks fn ~f:(fun b ->
+        Blk.map_insns b ~f:(fun i ->
+            match Ltree.find t @@ Insn.label i with
+            | Some insn -> Insn.with_insn i insn
+            | None -> i))
+
+let filter_not_in changed fn t =
+  if Lset.is_empty t then fn else
+    let () = changed := true in
+    Func.map_blks fn ~f:(fun b ->
+        Blk.insns b |> Seq.filter ~f:(fun i ->
+            not @@ Lset.mem t @@ Insn.label i) |>
+        Seq.to_list |> Blk.with_insns b)
 
 (* Blocks that consist of a single instruction of the form:
 
@@ -46,10 +48,10 @@ let collect_singles fn =
         | _ -> acc)
 
 (* Union-find with path compression. *)
-let find_with_compression m l =
+let find_with_compression changed m l =
   let parent l = Ltree.find !m l |> Option.value ~default:l in
-  let l = ref l in
-  let p = ref @@ parent !l in
+  let l = ref l and orig = l in
+  let p = ref @@ parent orig in
   while Label.(!l <> !p) do
     let g = parent !p in
     if Label.(g <> !p) then begin
@@ -58,6 +60,7 @@ let find_with_compression m l =
     end;
     l := g
   done;
+  if Label.(!p <> orig) then changed := true;
   !p
 
 (* For blocks collected in the above analysis, thread them through to
@@ -83,10 +86,10 @@ let find_with_compression m l =
    @3:
      ...
 *)
-let jump_threading fn =
+let jump_threading changed fn =
   let singles = collect_singles fn in
   if not @@ Label.Tree.is_empty singles then
-    let find = find_with_compression @@ ref singles in
+    let find = find_with_compression changed @@ ref singles in
     Func.map_blks fn ~f:(fun b ->
         Blk.map_insns b ~f:(fun i ->
             Insn.with_insn i @@ match Insn.insn i with
@@ -96,8 +99,42 @@ let jump_threading fn =
             | insn -> insn))
   else fn
 
+let is_merge_candidate cfg b1 b2 =
+  match Seq.hd @@ Blk.insns ~rev:true b1 with
+  | Some i when is_barrier (Insn.insn i) -> false
+  | Some _ | None ->
+    let l1 = Blk.label b1 and l2 = Blk.label b2 in Seq.(
+      equal Label.equal (Cfg.Node.succs l1 cfg) (singleton l2) &&
+      equal Label.equal (Cfg.Node.preds l2 cfg) (singleton l1))
+
+(* find m l = None ==> no change
+   find m l = Some None ==> delete
+   find m l = Some (Some b) ==> replace *)
+let collect_merge_blks fn =
+  let cfg = Cfg.create ~is_barrier ~dests fn in
+  let rec go m = function
+    | [] | [_] -> m
+    | b1 :: b2 :: rest  when is_merge_candidate cfg b1 b2 ->
+      let label = Blk.label b1 in
+      let insns = Seq.(to_list @@ append (Blk.insns b1) (Blk.insns b2)) in
+      let b1' = Blk.create ~label ~insns in
+      let m = Ltree.add_exn m ~key:label ~data:(Some b1') in
+      let m = Ltree.add_exn m ~key:(Blk.label b2) ~data:None in
+      go m rest
+    | _ :: rest -> go m rest in
+  Func.blks fn |> Seq.to_list |> go Ltree.empty
+
+let merge_blks changed fn =
+  let m = collect_merge_blks fn in
+  if Ltree.is_empty m then fn else
+    let () = changed := true in
+    Func.blks fn |> Seq.filter_map ~f:(fun b ->
+        match Ltree.find m @@ Blk.label b with
+        | None -> Some b | Some b' -> b') |>
+    Seq.to_list |> Func.with_blks fn
+
 (* Remove blocks that are not reachable from the entry block. *)
-let remove_disjoint fn =
+let remove_disjoint changed fn =
   let reachable = with_return @@ fun {return} ->
     let cfg = Cfg.create ~is_barrier ~dests fn in
     let start = Func.entry fn in
@@ -106,9 +143,12 @@ let remove_disjoint fn =
       ~start_tree:(fun n s ->
           if Label.(n = start) then s else return s)
       ~enter_node:(fun _ n s -> Lset.add s n) in
-  Func.blks fn |> Seq.map ~f:Blk.label |>
-  Seq.filter ~f:(Fn.non @@ Lset.mem reachable) |>
-  Seq.to_list |> Func.remove_blks_exn fn
+  let dead =
+    Func.blks fn |> Seq.map ~f:Blk.label |>
+    Seq.filter ~f:(Fn.non @@ Lset.mem reachable) |>
+    Seq.to_list in
+  if not @@ List.is_empty dead then changed := true;
+  Func.remove_blks_exn fn dead
 
 (* Invert conditional branches based on the block layout.
 
@@ -146,8 +186,8 @@ let collect_invert_branches afters fn =
               ~key:lb ~data:(JMP (Jlbl a))
           | _ -> acc))
 
-let invert_branches afters fn =
-  map_insns fn @@ collect_invert_branches afters fn
+let invert_branches changed afters fn =
+  map_insns changed fn @@ collect_invert_branches afters fn
 
 (* Eliminate useless unconditional jumps where a fallthrough would suffice.
 
@@ -176,8 +216,8 @@ let collect_implicit_fallthroughs afters fn =
             Lset.add acc la
           | _ -> acc))
 
-let implicit_fallthroughs afters fn =
-  filter_not_in fn @@ collect_implicit_fallthroughs afters fn
+let implicit_fallthroughs changed afters fn =
+  filter_not_in changed fn @@ collect_implicit_fallthroughs afters fn
 
 (* Deallocating the stack pointer followed by a LEAVE instruction
    is redundant.
@@ -212,8 +252,8 @@ let collect_dealloc_stack_before_leave fn =
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let dealloc_stack_before_leave fn =
-  filter_not_in fn @@ collect_dealloc_stack_before_leave fn
+let dealloc_stack_before_leave changed fn =
+  filter_not_in changed fn @@ collect_dealloc_stack_before_leave fn
 
 let collect_redundant_spill_after_reload fn =
   Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b ->
@@ -229,8 +269,8 @@ let collect_redundant_spill_after_reload fn =
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let redundant_spill_after_reload fn =
-  filter_not_in fn @@ collect_redundant_spill_after_reload fn
+let redundant_spill_after_reload changed fn =
+  filter_not_in changed fn @@ collect_redundant_spill_after_reload fn
 
 (* If we have a LEA of the same address as a subsequent load,
    then use the result of the LEA as the address for the load.
@@ -252,8 +292,19 @@ let collect_reuse_lea fn =
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let reuse_lea fn =
-  map_insns fn @@ collect_reuse_lea fn
+let reuse_lea changed fn =
+  map_insns changed fn @@ collect_reuse_lea fn
+
+let and_test_cc = function
+  | Ce | Cne | Cs | Cns -> true
+  | _ -> false
+
+let and_test_act = function
+  | Jcc (cc, _)
+  | One (SETcc cc, _)
+  | Two (CMOVcc cc, _, _)
+    -> and_test_cc cc
+  | _ -> false
 
 let collect_and_test fn =
   Func.blks fn |> Seq.fold ~init:Lset.empty ~f:(fun acc b ->
@@ -261,17 +312,22 @@ let collect_and_test fn =
         | [] | [_] -> acc
         | (_, Two (AND, Oreg (r1, _), _))
           :: (l, Two (TEST_, Oreg (r1', _), Oreg (r2', _)))
-          :: xs when Rv.(r1 = r1') && Rv.(r1 = r2') ->
+          :: (_, act)
+          :: xs when Rv.(r1 = r1')
+                  && Rv.(r1 = r2')
+                  && and_test_act act ->
           go (Lset.add acc l) xs
         | (_, Two (AND, Oreg (r1, _), _))
           :: (l, Two (CMP, Oreg (r1', _), Oimm (0L, _)))
-          :: xs when Rv.equal r1 r1' ->
+          :: (_, act)
+          :: xs when Rv.equal r1 r1'
+                  && and_test_act act ->
           go (Lset.add acc l) xs
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let and_test fn =
-  filter_not_in fn @@ collect_and_test fn
+let and_test changed fn =
+  filter_not_in changed fn @@ collect_and_test fn
 
 let immty = function
   | #Type.imm as imm -> imm
@@ -293,8 +349,8 @@ let collect_lea_mov fn =
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let lea_mov fn =
-  map_insns fn @@ collect_lea_mov fn
+let lea_mov changed fn =
+  map_insns changed fn @@ collect_lea_mov fn
 
 (* TODO: fill me in *)
 let combinable_binop = function
@@ -308,6 +364,15 @@ let combinable_binop = function
   | CMP -> true
   | _ -> false
 
+(* Combine with unary ops that don't write to their operand. *)
+let combinable_unop = function
+  | CALL _
+  | DIV
+  | IDIV
+  | IMUL1
+  | MUL -> true
+  | _ -> false
+
 let rset_mem' o l = List.exists l ~f:(Set.mem @@ rset [o])
 
 let collect_mov_op fn =
@@ -331,11 +396,18 @@ let collect_mov_op fn =
                   && not (Set.mem (rset [o]) r1) ->
           let i = Two (op, Oreg (r1', r1t), o) in
           go (Ltree.set acc ~key:l ~data:i) xs
+        | (_, Two (MOV, Oreg (r1, _), o))
+          :: (l, One (op, Oreg (r2', _)))
+          :: xs when combinable_unop op
+                  && Rv.(r1 = r2')
+                  && not (Set.mem (rset [o]) r1) ->
+          let i = One (op, o) in
+          go (Ltree.set acc ~key:l ~data:i) xs
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let mov_op fn =
-  map_insns fn @@ collect_mov_op fn
+let mov_op changed fn =
+  map_insns changed fn @@ collect_mov_op fn
 
 let collect_mov_to_store fn =
   Func.blks fn |> Seq.fold ~init:Ltree.empty ~f:(fun acc b ->
@@ -350,20 +422,29 @@ let collect_mov_to_store fn =
         | _ :: xs -> go acc xs in
       go acc @@ Seq.to_list @@ Seq.map ~f:decomp @@ Blk.insns b)
 
-let mov_to_store fn =
-  map_insns fn @@ collect_mov_to_store fn
+let mov_to_store changed fn =
+  map_insns changed fn @@ collect_mov_to_store fn
+
+let max_rounds = 5
 
 let run fn =
-  let fn = jump_threading fn in
-  let fn = remove_disjoint fn in
-  let afters = Func.collect_afters fn in
-  let fn = invert_branches afters fn in
-  let fn = implicit_fallthroughs afters fn in
-  let fn = dealloc_stack_before_leave fn in
-  let fn = redundant_spill_after_reload fn in
-  let fn = reuse_lea fn in
-  let fn = and_test fn in
-  let fn = lea_mov fn in
-  let fn = mov_op fn in
-  let fn = mov_to_store fn in
-  fn
+  let rec loop i fn =
+    if i > max_rounds then fn else
+      let () = Logs.debug (fun m ->
+          m "%s: peephole round %d%!" __FUNCTION__ i) in
+      let changed = ref false in
+      let fn = jump_threading changed fn in
+      let fn = remove_disjoint changed fn in
+      let fn = merge_blks changed fn in
+      let afters = Func.collect_afters fn in
+      let fn = invert_branches changed afters fn in
+      let fn = implicit_fallthroughs changed afters fn in
+      let fn = dealloc_stack_before_leave changed fn in
+      let fn = redundant_spill_after_reload changed fn in
+      let fn = reuse_lea changed fn in
+      let fn = and_test changed fn in
+      let fn = lea_mov changed fn in
+      let fn = mov_op changed fn in
+      let fn = mov_to_store changed fn in
+      if !changed then loop (i + 1) fn else fn in
+  loop 1 fn
diff --git a/src/lib/machine/x86/x86_amd64_regalloc.ml b/src/lib/machine/x86/x86_amd64_regalloc.ml
index a0998ab4..ac44aad1 100644
--- a/src/lib/machine/x86/x86_amd64_regalloc.ml
+++ b/src/lib/machine/x86/x86_amd64_regalloc.ml
@@ -100,46 +100,46 @@ let substitute_operand f = function
   | Osym _ as s -> s
   | Oah -> Oah
 
+(* Attempt some peephole optimizations after the substitution.
+   XXX: what if the FLAGS register is live? *)
+let substitute_binop' o a b op = match o, op a, op b with
+  | LEA, Oreg (x, ty), Omem (Abd (y, d), _) when Regvar.(x = y) ->
+    (* lea x, [x+d] => add x, d or sub x, -d *)
+    if Int32.(d < 0l) then
+      let d = Int32.neg d in
+      let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in
+      if Int64.(d' = 1L) then
+        One (DEC, Oreg (x, ty))
+      else
+        Two (SUB, Oreg (x, ty), Oimm (d', immty ty))
+    else
+      let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in
+      if Int64.(d' = 1L) then
+        One (INC, Oreg (x, ty))
+      else
+        Two (ADD, Oreg (x, ty), Oimm (d', immty ty))
+  | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = y) ->
+    (* lea x, [x+y*1] => add x, y *)
+    Two (ADD, Oreg (x, ty), Oreg (z, ty))
+  | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = z) ->
+    (* lea x, [y+x*1] => add x, y *)
+    Two (ADD, Oreg (x, ty), Oreg (y, ty))
+  | LEA, Oreg (x, ty), Omem (Aisd (y, S2, 0l), _) when Regvar.(x = y) ->
+    (* lea x, [x*2] => shl x, 1 *)
+    Two (SHL, Oreg (x, ty), Oimm (1L, `i8))
+  | LEA, Oreg (x, ty), Omem (Aisd (y, S4, 0l), _) when Regvar.(x = y) ->
+    (* lea x, [x*4] => shl x, 2 *)
+    Two (SHL, Oreg (x, ty), Oimm (2L, `i8))
+  | LEA, Oreg (x, ty), Omem (Aisd (y, S8, 0l), _) when Regvar.(x = y) ->
+    (* lea x, [x*8] => shl x, 3 *)
+    Two (SHL, Oreg (x, ty), Oimm (3L, `i8))
+  | o, a, b ->
+    (* Default case. *)
+    Two (o, a, b)
+
 let substitute' i op = match i with
   | One (o, a) -> One (o, op a)
-  | Two (o, a, b) ->
-    (* Attempt some peephole optimizations after the substitution.
-       XXX: what if the FLAGS register is live? *)
-    begin match o, op a, op b with
-      | LEA, Oreg (x, ty), Omem (Abd (y, d), _) when Regvar.(x = y) ->
-        (* lea x, [x+d] => add x, d or sub x, -d *)
-        if Int32.(d < 0l) then
-          let d = Int32.neg d in
-          let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in
-          if Int64.(d' = 1L) then
-            One (DEC, Oreg (x, ty))
-          else
-            Two (SUB, Oreg (x, ty), Oimm (d', immty ty))
-        else
-          let d' = Int64.(of_int32 d land 0xFFFFFFFFL) in
-          if Int64.(d' = 1L) then
-            One (INC, Oreg (x, ty))
-          else
-            Two (ADD, Oreg (x, ty), Oimm (d', immty ty))
-      | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = y) ->
-        (* lea x, [x+y*1] => add x, y *)
-        Two (ADD, Oreg (x, ty), Oreg (z, ty))
-      | LEA, Oreg (x, ty), Omem (Abis (y, z, S1), _) when Regvar.(x = z) ->
-        (* lea x, [y+x*1] => add x, y *)
-        Two (ADD, Oreg (x, ty), Oreg (y, ty))
-      | LEA, Oreg (x, ty), Omem (Aisd (y, S2, 0l), _) when Regvar.(x = y) ->
-        (* lea x, [x*2] => shl x, 1 *)
-        Two (SHL, Oreg (x, ty), Oimm (1L, `i8))
-      | LEA, Oreg (x, ty), Omem (Aisd (y, S4, 0l), _) when Regvar.(x = y) ->
-        (* lea x, [x*4] => shl x, 2 *)
-        Two (SHL, Oreg (x, ty), Oimm (2L, `i8))
-      | LEA, Oreg (x, ty), Omem (Aisd (y, S8, 0l), _) when Regvar.(x = y) ->
-        (* lea x, [x*8] => shl x, 3 *)
-        Two (SHL, Oreg (x, ty), Oimm (3L, `i8))
-      | o, a, b ->
-        (* Default case. *)
-        Two (o, a, b)
-    end
+  | Two (o, a, b) -> substitute_binop' o a b op
   | IMUL3 (a, b, c) -> IMUL3 (op a, op b, c)
   | JMP (Jind a) -> JMP (Jind (op a))
   | CDQ
@@ -203,88 +203,90 @@ module Typed_writes = struct
     Regvar.Map.of_alist_reduce ~f:reduce @@
     List.map l ~f:(fun (r, t) -> Regvar.reg r, t)
 
-  (* Registers written to by an instruction. *)
-  let writes call = function
-    | Two (o, a, _) ->
-      begin match o with
-        | ADD
-        | ADDSD
-        | ADDSS
-        | AND
-        | BSF
-        | BSR
-        | CMOVcc _
-        | CVTSD2SI
-        | CVTSD2SS
-        | CVTSI2SD
-        | CVTSI2SS
-        | CVTSS2SD
-        | CVTSS2SI
-        | DIVSD
-        | DIVSS
-        | IMUL2
-        | LEA
-        | MOV
-        | MOV_
-        | MOVD
-        | MOVDQA
-        | MOVQ
-        | MOVSD
-        | MOVSS
-        | MOVSX
-        | MOVSXD
-        | MOVZX
-        | MULSD
-        | MULSS
-        | OR
-        | POPCNT
-        | ROL
-        | ROR
-        | SAR
-        | SHL
-        | SHR
-        | SUB
-        | SUBSD
-        | SUBSS
-        | XOR
-        | XORPD
-        | XORPS
-          -> rmap_reg [a]
-        | CMP
-        | TEST_
-        | UCOMISD
-        | UCOMISS
-          -> Regvar.Map.empty
-      end
-    | One (o, a) ->
-      begin match o with
-        | CALL _
-          -> call
-        | DEC
-        | INC
-        | NEG
-        | NOT
-        | SETcc _
-        | POP
-          -> rmap_reg [a]
-        | PUSH
+  let binop_writes o a _b = match o with
+    | ADD
+    | ADDSD
+    | ADDSS
+    | AND
+    | BSF
+    | BSR
+    | CMOVcc _
+    | CVTSD2SI
+    | CVTTSD2SI
+    | CVTSD2SS
+    | CVTSI2SD
+    | CVTSI2SS
+    | CVTSS2SD
+    | CVTSS2SI
+    | CVTTSS2SI
+    | DIVSD
+    | DIVSS
+    | IMUL2
+    | LEA
+    | MOV
+    | MOV_
+    | MOVD
+    | MOVDQA
+    | MOVQ
+    | MOVSD
+    | MOVSS
+    | MOVSX
+    | MOVSXD
+    | MOVZX
+    | MULSD
+    | MULSS
+    | OR
+    | POPCNT
+    | ROL
+    | ROR
+    | SAR
+    | SHL
+    | SHR
+    | SUB
+    | SUBSD
+    | SUBSS
+    | XOR
+    | XORPD
+    | XORPS
+      -> rmap_reg [a]
+    | CMP
+    | TEST_
+    | UCOMISD
+    | UCOMISS
+      -> Regvar.Map.empty
+
+  let unop_writes call o a = match o with
+    | CALL _
+      -> call
+    | DEC
+    | INC
+    | NEG
+    | NOT
+    | SETcc _
+    | POP
+      -> rmap_reg [a]
+    | PUSH
+      -> Regvar.Map.empty
+    | DIV
+    | IDIV
+    | IMUL1
+    | MUL ->
+      begin match a with
+        | Oreg (_, `i8)
+          -> rmap' [`rax, `i8]
+        | Oreg (_, t)
+          -> rmap' [`rax, wty t; `rdx, wty t]
+        | Omem (_, t)
+          -> rmap' [`rax, wty t; `rdx, wty t]
+        | _
+          (* invalid forms *)
           -> Regvar.Map.empty
-        | DIV
-        | IDIV
-        | IMUL1
-        | MUL ->
-          begin match a with
-            | Oreg (_, `i8)
-              -> rmap' [`rax, `i8]
-            | Oreg (_, t)
-              -> rmap' [`rax, wty t; `rdx, wty t]
-            | Omem (_, t)
-              -> rmap' [`rax, wty t; `rdx, wty t]
-            | _
-              (* invalid forms *)
-              -> Regvar.Map.empty
-          end
       end
+
+  (* Registers written to by an instruction. *)
+  let writes call = function
+    | Two (o, a, b) -> binop_writes o a b
+    | One (o, a) -> unop_writes call o a
     | IMUL3 (a, _, _)
       -> rmap_reg [a]
     | Jcc _
@@ -318,65 +320,65 @@ module Pre_assign_slots(C : Context_intf.S) = struct
     let off' = Int32.of_int_exn off in
     let d' = Int32.(d + off') in
     if (off > 0 && Int32.(d' < d)) ||
-       (off < 0 && Int32.(d' > d)) then begin
+       (off < 0 && Int32.(d' > d)) then
       let+ x, is = freshen base off' in
       Second (x, is)
-    end else !!(First d')
+    else !!(First d')
+
+  let assign_ab find base a b = match find b with
+    | Some 0 -> !!(Ab base, [])
+    | Some o -> !!(Abd (base, Int32.of_int_exn o), [])
+    | None -> !!(a, [])
+
+  let assign_abd find base a b d = match find b with
+    | None -> !!(a, [])
+    | Some o -> add_disp base o d >>| function
+      | First d' -> Abd (base, d'), []
+      | Second (b', bi) -> Abd (b', d), bi
+
+  let assign_abis find base a b i s = match find b, find i with
+    | None, None -> !!(a, [])
+    | Some 0, None -> !!(Abis (base, i, s), [])
+    | Some o, None -> !!(Abisd (base, i, s, Int32.of_int_exn o), [])
+    | None, Some o ->
+      let+ i', ii = freshen base (Int32.of_int_exn o) in
+      Abis (b, i', s), ii
+    | Some ob, Some oi ->
+      let+ i', ii = freshen base (Int32.of_int_exn oi) in
+      Abisd (base, i', s, Int32.of_int_exn ob), ii
+
+  let assign_aisd find base a i s d = match find i with
+    | None -> !!(a, [])
+    | Some o ->
+      let+ i', ii = freshen base (Int32.of_int_exn o) in
+      Aisd (i', s, d), ii
 
   let rec assign_amode find base a = match a with
     | Albl _ | Asym _ -> !!(a, [])
-    | Ab b ->
-      begin match find b with
-        | Some 0 -> !!(Ab base, [])
-        | Some o -> !!(Abd (base, Int32.of_int_exn o), [])
-        | None -> !!(a, [])
-      end
-    | Abd (b, d) ->
-      begin match find b with
-        | None -> !!(a, [])
-        | Some o -> add_disp base o d >>| function
-          | First d' -> Abd (base, d'), []
-          | Second (b', bi) -> Abd (b', d), bi
-      end
-    | Abis (b, i, s) ->
-      begin match find b, find i with
-        | None, None -> !!(a, [])
-        | Some 0, None -> !!(Abis (base, i, s), [])
-        | Some o, None -> !!(Abisd (base, i, s, Int32.of_int_exn o), [])
-        | None, Some o ->
-          let+ i', ii = freshen base (Int32.of_int_exn o) in
-          Abis (b, i', s), ii
-        | Some ob, Some oi ->
-          let+ i', ii = freshen base (Int32.of_int_exn oi) in
-          Abisd (base, i', s, Int32.of_int_exn ob), ii
-      end
+    | Ab b -> assign_ab find base a b
+    | Abd (b, d) -> assign_abd find base a b d
+    | Abis (b, i, s) -> assign_abis find base a b i s
     | Aisd (i, S1, d) -> assign_amode find base @@ Abd (i, d)
-    | Aisd (i, s, d) ->
-      begin match find i with
-        | None -> !!(a, [])
-        | Some o ->
-          let+ i', ii = freshen base (Int32.of_int_exn o) in
-          Aisd (i', s, d), ii
-      end
-    | Abisd (b, i, s, d) ->
-      begin match find b, find i with
-        | None, None -> !!(a, [])
-        | None, Some _ when equal_scale s S1 ->
-          assign_amode find base @@ Abisd (i, b, S1, d)
-        | None, Some o ->
-          let+ i', ii = freshen base (Int32.of_int_exn o) in
-          Abisd (b, i', s, d), ii
-        | Some o, None ->
-          begin add_disp base o d >>| function
-            | First d' -> Abisd (base, i, s, d'), []
-            | Second (b', bi) -> Abisd (b', i, s, d), bi
-          end
-        | Some ob, Some oi ->
-          let* i', ii = freshen base (Int32.of_int_exn oi) in
-          add_disp base ob d >>| function
-          | First d' -> Abisd (base, i', s, d'), ii
-          | Second (b', bi) -> Abisd (b', i', s, d), bi @ ii
+    | Aisd (i, s, d) -> assign_aisd find base a i s d
+    | Abisd (b, i, s, d) -> assign_abisd find base a b i s d
+
+  and assign_abisd find base a b i s d = match find b, find i with
+    | None, None -> !!(a, [])
+    | None, Some _ when equal_scale s S1 ->
+      assign_amode find base @@ Abisd (i, b, S1, d)
+    | None, Some o ->
+      let+ i', ii = freshen base (Int32.of_int_exn o) in
+      Abisd (b, i', s, d), ii
+    | Some o, None ->
+      begin add_disp base o d >>| function
+        | First d' -> Abisd (base, i, s, d'), []
+        | Second (b', bi) -> Abisd (b', i, s, d), bi
       end
+    | Some ob, Some oi ->
+      let* i', ii = freshen base (Int32.of_int_exn oi) in
+      add_disp base ob d >>| function
+      | First d' -> Abisd (base, i', s, d'), ii
+      | Second (b', bi) -> Abisd (b', i', s, d), bi @ ii
 
   let assign_operand find base op = match op with
     | Oreg (r, t) ->
@@ -428,41 +430,41 @@ module Pre_assign_slots(C : Context_intf.S) = struct
     | FP64 _ -> !![i]
 end
 
+(* NB: this makes assumptions based on the results of `Pre_assign_slots`. *)
 module Post_assign_slots = struct
-  (* NB: this makes assumptions based on the results of `Pre_assign_slots`. *)
+  (* All spills/reloads should be using this form. *)
+  let assign_ab find base a b = match find b with
+    | Some 0 -> Ab base
+    | Some o -> Abd (base, Int32.of_int_exn o)
+    | None -> a
+
+  let assign_abd find _base a b _d = match find b with
+    | None -> a
+    | Some _ -> assert false
+
+  let assign_abis find _base a b i _s = match find b, find i with
+    | None, None -> a
+    | Some _, None -> assert false
+    | None, Some _ -> assert false
+    | Some _, Some _ -> assert false
+
+  let assign_aisd find _base a i _s _d = match find i with
+    | None -> a
+    | Some _ -> assert false
+
+  let assign_abisd find _base a b i _s _d = match find b, find i with
+    | None, None -> a
+    | None, Some _ -> assert false
+    | Some _, Some _ -> assert false
+    | Some _, None -> assert false
+
   let assign_amode find base a = match a with
     | Albl _ | Asym _ -> a
-    | Ab b ->
-      (* All spills/reloads should be using this form. *)
-      begin match find b with
-        | Some 0 -> Ab base
-        | Some o -> Abd (base, Int32.of_int_exn o)
-        | None -> a
-      end
-    | Abd (b, _d) ->
-      begin match find b with
-        | None -> a
-        | Some _ -> assert false
-      end
-    | Abis (b, i, _s) ->
-      begin match find b, find i with
-        | None, None -> a
-        | Some _, None -> assert false
-        | None, Some _ -> assert false
-        | Some _, Some _ -> assert false
-      end
-    | Aisd (i, _s, _d) ->
-      begin match find i with
-        | None -> a
-        | Some _ -> assert false
-      end
-    | Abisd (b, i, _s, _d) ->
-      begin match find b, find i with
-        | None, None -> a
-        | None, Some _ -> assert false
-        | Some _, Some _ -> assert false
-        | Some _, None -> assert false
-      end
+    | Ab b -> assign_ab find base a b
+    | Abd (b, d) -> assign_abd find base a b d
+    | Abis (b, i, s) -> assign_abis find base a b i s
+    | Aisd (i, s, d) -> assign_aisd find base a i s d
+    | Abisd (b, i, s, d) -> assign_abisd find base a b i s d
 
   let assign_operand find base op = match op with
     | Oreg (r, _) ->
diff --git a/src/lib/matcher.ml b/src/lib/matcher.ml
index 71af273e..be56c51d 100644
--- a/src/lib/matcher.ml
+++ b/src/lib/matcher.ml
@@ -260,12 +260,15 @@ module Make(M : L) = struct
   end
 
   type 'a program = {
+    name : string;
     rule : (pat * 'a) array;
     code : insn Vec.t;
     root : (op, label) Hashtbl.t;
     rmin : int array;
   }
 
+  let name p = p.name
+
   let pp_program ppf p = Vec.iteri p.code ~f:(fun l i ->
       Format.fprintf ppf "@%d: %a\n" l pp_insn i)
 
@@ -490,7 +493,8 @@ module Make(M : L) = struct
       | [] ->
         failwithf "compile_tree: empty sequence at rule %d" i ()
 
-    let compile ?(commute = false) rules =
+    let compile ?(commute = false) ~name rules =
+      let[@alert "-deprecated"] t0 = Time.now () in
       let rule = Array.of_list rules in
       let code = Vec.create () in
       let rmin, root = match rules with
@@ -506,7 +510,12 @@ module Make(M : L) = struct
                 compile_tree forest i pat);
           let root = Hashtbl.map forest ~f:(linearize code) in
           compute_rmin code, root in
-      {rule; code; root; rmin}
+      let[@alert "-deprecated"] t = Time.now () in
+      let[@alert "-deprecated"] elapsed = Time.(Span.to_sec (diff t t0)) in
+      Logs.debug (fun m ->
+          m "%s: ruleset %s: compiled %d rules to %d instructions in %g seconds, commute=%b%!"
+            __FUNCTION__ name (Array.length rule) (Vec.length code) elapsed commute);
+      {name; rule; code; root; rmin}
   end
 
   let compile = Compiler.compile
diff --git a/src/lib/matcher.mli b/src/lib/matcher.mli
index 3f78c262..c31cb7b8 100644
--- a/src/lib/matcher.mli
+++ b/src/lib/matcher.mli
@@ -72,6 +72,9 @@ module Make(M : L) : sig
   (** A compiled VM program. *)
   type 'a program
 
+  (** The name of the ruleset that produced the program. *)
+  val name : 'a program -> string
+
   (** Pretty-print the program. *)
   val pp_program : Format.formatter -> 'a program -> unit
 
@@ -89,7 +92,7 @@ module Make(M : L) : sig
       and the [VM] (see below) will prioritize yielding matches in
       this order.
   *)
-  val compile : ?commute:bool -> (pat * 'a) list -> 'a program
+  val compile : ?commute:bool -> name:string -> (pat * 'a) list -> 'a program
 
   (** Returns [true] is the program is empty. *)
   val is_empty : 'a program -> bool
diff --git a/src/lib/passes/abi_loadopt.ml b/src/lib/passes/abi_loadopt.ml
index 8c6041b9..58bf4b09 100644
--- a/src/lib/passes/abi_loadopt.ml
+++ b/src/lib/passes/abi_loadopt.ml
@@ -73,6 +73,7 @@ type t = {
   blks         : Abi.blk Label.Table.t;
   mems         : store Mem.Table.t;
   vars         : operand Var.Table.t;
+  nop          : Label.Hash_set.t;
   mutable mem  : Label.t option;
   mutable memo : operand Hashcons.t;
 }
@@ -107,8 +108,9 @@ let init fn =
   let blks = Label.Table.create () in
   let mems = Mem.Table.create () in
   let vars = Var.Table.create () in
+  let nop = Label.Hash_set.create () in
   let mem = None and memo = Hashcons.empty in
-  {reso; dom; rdom; lst; blks; mems; mem; vars; memo}
+  {reso; dom; rdom; lst; blks; mems; mem; vars; memo; nop}
 
 module Optimize = struct
   let var t x = match Hashtbl.find t.vars x with
@@ -151,10 +153,20 @@ module Optimize = struct
         | _ -> `var x in
       t.memo <- Hashcons.set t.memo ~key:op ~data
 
+  let same_store t v v' l l' =
+    equal_operand v v' && t.rdom ~parent:l' l
+
   let store t l ty v a =
     let v = operand t v in
     let a = operand t a in
     let key = {label = l; addr = a; ty} in
+    (* Mark redundant stores only if we can prove that
+       it is storing the same value to the same address. *)
+    Option.iter t.mem ~f:(fun m ->
+        match Hashtbl.find t.mems {key with label = m} with
+        | Some Value (v', l') when same_store t v v' l l' ->
+          Hash_set.add t.nop l
+        | Some _ | None -> ());
     Hashtbl.set t.mems ~key ~data:(Value (v, l));
     t.mem <- Some l;
     `store (ty, v, a)
@@ -203,17 +215,55 @@ module Optimize = struct
     t.mem <- Some l;
     `call (xs, f, args)
 
+  let eval_bop o a b = match a, b with
+    | `int (a, _), `int (b, _) ->
+      (Abi.Eval.binop_int o a b :> const option)
+    | `float a, `float b ->
+      (Abi.Eval.binop_single o a b :> const option)
+    | `double a, `double b ->
+      (Abi.Eval.binop_double o a b :> const option)
+    | _ -> None
+
+  let eval_uop o = function
+    | `int (a, ty) ->
+      (Abi.Eval.unop_int o a ty :> const option)
+    | `float a ->
+      (Abi.Eval.unop_single o a :> const option)
+    | `double a ->
+      (Abi.Eval.unop_double o a :> const option)
+    | _ -> None
+
+  let bop t l x o a b =
+    let a' = operand t a in
+    let b' = operand t b in
+    let op = `bop (x, o, a', b') in
+    begin match eval_bop o a' b' with
+      | None ->
+        let k = Op.of_insn op in
+        canonicalize t x k;
+        Op.commute k |> Option.iter ~f:(canonicalize t x)
+      | Some c ->
+        Hashtbl.set t.vars ~key:x ~data:(c :> operand);
+        Hash_set.add t.nop l
+    end;
+    op
+
+  let uop t l x o a =
+    let a' = operand t a in
+    let op = `uop (x, o, a') in
+    begin match eval_uop o a' with
+      | None ->
+        let k = Op.of_insn op in
+        canonicalize t x k
+      | Some c ->
+        Hashtbl.set t.vars ~key:x ~data:(c :> operand);
+        Hash_set.add t.nop l
+    end;
+    op
+
   let insn t l : Abi.Insn.op -> Abi.Insn.op = function
-    | `bop (x, o, a, b) ->
-      let op = `bop (x, o, operand t a, operand t b) in
-      let k = Op.of_insn op in
-      canonicalize t x k;
-      Op.commute k |> Option.iter ~f:(canonicalize t x);
-      op
-    | `uop (x, o, a) ->
-      let op = `uop (x, o, operand t a) in
-      canonicalize t x @@ Op.of_insn op;
-      op
+    | `bop (x, o, a, b) -> bop t l x o a b
+    | `uop (x, o, a) -> uop t l x o a
     | `sel (x, ty, c, y, n) -> sel t x ty c y n
     | `call (xs, f, args) -> call t l xs f args
     | `store (ty, v, a) -> store t l ty v a
@@ -279,8 +329,13 @@ let run fn =
         Semi_nca.Tree.children t.dom l |> Seq.iter ~f:(fun l ->
             Stack.push q (l, t.mem, t.memo)));
     Abi.Func.map_blks fn ~f:(fun b ->
-        Abi.Blk.label b |> Hashtbl.find t.blks |>
-        Option.value ~default:b)
+        let b =
+          Abi.Blk.label b |> Hashtbl.find t.blks |>
+          Option.value ~default:b in
+        if Hash_set.is_empty t.nop then b else
+          Abi.Blk.insns b |> Seq.filter ~f:(fun i ->
+              not @@ Hash_set.mem t.nop @@ Abi.Insn.label i) |>
+          Seq.to_list |> Abi.Blk.with_insns b)
   else
     E.failf "In Abi_loadopt: function $%s is not in SSA form"
       (Abi.Func.name fn) ()
diff --git a/src/lib/passes/coalesce_slots/coalesce_slots.ml b/src/lib/passes/coalesce_slots/coalesce_slots.ml
new file mode 100644
index 00000000..a8a510cc
--- /dev/null
+++ b/src/lib/passes/coalesce_slots/coalesce_slots.ml
@@ -0,0 +1,54 @@
+open Core
+open Monads.Std
+open Regular.Std
+open Virtual
+open Scalars
+open Coalesce_slots_impl
+open Scalars_common
+
+module E = Monad.Result.Error
+module V = Make(VL)
+module A = Make(AL)
+
+open E.Let
+
+let check_ssa msg n d =
+  if Dict.mem d Tags.ssa then Ok ()
+  else E.failf "In Coalesce_slots%s: function $%s is not in SSA form" msg n ()
+
+let apply t fn map_blks map_blk insns with_insns with_ctrl label =
+  let not_dead = not @. Lset.mem t.deads @. label in
+  if is_empty t then fn else
+    let f = if Map.is_empty t.subst then fun b ->
+        insns b |> Seq.filter ~f:not_dead |>
+        Seq.to_list |> with_insns b
+      else fun b ->
+        let is, c = map_blk t.subst b in
+        let is = if Lset.is_empty t.deads then is
+          else List.filter is ~f:not_dead in
+        with_ctrl (with_insns b is) c in
+    map_blks fn ~f
+[@@specialise]
+
+let run fn =
+  let+ () = check_ssa "" (Func.name fn) (Func.dict fn) in
+  let t = V.run fn in
+  apply t fn
+    Func.map_blks
+    Subst_mapper.map_blk
+    Blk.insns
+    Blk.with_insns
+    Blk.with_ctrl
+    Insn.label
+
+let run_abi fn =
+  let open Abi in
+  let+ () = check_ssa " (ABI)" (Func.name fn) (Func.dict fn) in
+  let t = A.run fn in
+  apply t fn
+    Func.map_blks
+    Subst_mapper_abi.map_blk
+    Blk.insns
+    Blk.with_insns
+    Blk.with_ctrl
+    Insn.label
diff --git a/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml
new file mode 100644
index 00000000..533e77b3
--- /dev/null
+++ b/src/lib/passes/coalesce_slots/coalesce_slots_impl.ml
@@ -0,0 +1,338 @@
+open Core
+open Regular.Std
+open Graphlib.Std
+open Scalars
+
+module Ltree = Label.Tree
+module Lset = Label.Tree_set
+module Slot = Virtual.Slot
+module Allen = Allen_interval_algebra
+
+type tag = Def | Use | Both [@@deriving compare, equal, sexp]
+
+let pp_tag ppf = function
+  | Def -> Format.fprintf ppf "def"
+  | Use -> Format.fprintf ppf "use"
+  | Both -> Format.fprintf ppf "both"
+
+let join_tag a b = if equal_tag a b then a else Both
+
+type range = {
+  lo : int;
+  hi : int;
+  tg : tag;
+} [@@deriving compare, equal, sexp]
+
+module Range = struct
+  type t = range [@@deriving compare, equal, sexp]
+
+  let pp ppf r = Format.fprintf ppf "%a[%d, %d]" pp_tag r.tg r.lo r.hi
+
+  let bad = {lo = Int.min_value; hi = Int.max_value; tg = Both}
+  let is_bad = equal bad
+
+  let singleton n = {lo = n; hi = n; tg = Def}
+  let size r = r.hi - r.lo
+
+  let distance x y =
+    if x.hi < y.lo then y.lo - x.hi
+    else if x.lo > y.hi then x.lo - y.hi
+    else 0
+  [@@ocaml.warning "-32"]
+
+  (* Extend the upper-bound on the live range. *)
+  let use r n = {
+    r with
+    hi = Int.max r.hi n;
+    tg = join_tag r.tg Use;
+  }
+
+  (* Shrink the lower-bound on the live range.
+
+     Also, a defintion counts as a use, because we need to
+     reference the slot, so extend the upper-bound.
+  *)
+  let def r n = {
+    lo = Int.min r.lo n;
+    hi = Int.max r.hi n;
+    tg = join_tag r.tg Def;
+  }
+
+  module Algebra = Allen.Make(struct
+      type point = int
+      type nonrec t = t
+      let lo t = t.lo [@@inline]
+      let hi t = t.hi [@@inline]
+      include Int.Replace_polymorphic_compare
+    end)
+end
+
+type candidate = {
+  var : Var.t;
+  size : int;
+  align : int;
+  range : range;
+  mutable adj : Var.Set.t;
+  mutable assigned : bool;
+}
+
+let create_candidate slots rs v =
+  let slot = Map.find_exn slots v in {
+    var = v;
+    size = Slot.size slot;
+    align = Slot.align slot;
+    range = Map.find_exn rs v;
+    adj = Var.Set.empty;
+    assigned = false;
+  }
+
+let compat_size_align x y =
+  (* The smaller slot must not have a higher alignment. *)
+  not ((x.size < y.size && x.align > y.align) ||
+       (y.size < x.size && y.align > x.align))
+
+(* Find compatible slots. Most importantly, their live ranges must
+   not interfere. *)
+let compat_range x y =
+  compat_size_align x y &&
+  let a : Allen.t = Range.Algebra.relate x.range y.range in
+  Logs.debug (fun m ->
+      m "%s: %a, %a: %a%!" __FUNCTION__
+        Var.pp x.var Var.pp y.var Allen.pp a);
+  match a with
+  | Before | After -> true
+  | _ -> false
+
+let range_priority x y =
+  (* Prefer shorter live ranges. *)
+  let c = Int.compare (Range.size x.range) (Range.size y.range) in
+  if c = 0 then
+    (* Break ties by comparing on the var. This
+       is to give a bit more determinism to the
+       algorithm. *)
+    Var.compare x.var y.var
+  else c
+
+let size_priority x y =
+  (* Assuming that the sizes and alignments are compatible,
+     just pick the biggest one. *)
+  match Int.compare x.size y.size with
+  | 0 -> Int.compare x.align y.align
+  | c -> c
+
+let candidates slots rs =
+  let vs = Vec.create ~capacity:(Map.length rs) () in
+  Map.to_sequence rs |>
+  (* Do not consider escapees. This would mess up
+     our heuristics for building the groups. *)
+  Seq.filter ~f:(not @. Range.is_bad @. snd) |>
+  Seq.map ~f:(create_candidate slots rs @. fst) |>
+  Seq.iter ~f:(Vec.push vs);
+  vs
+
+let is_subset g y = List.for_all g ~f:(fun x -> Set.mem y.adj x.var)
+
+(* Greedy partitioning algorithm. *)
+let partition slots rs =
+  let vs = candidates slots rs in
+  match Vec.length vs with
+  | 0 -> []
+  | 1 -> [[Vec.front_exn vs]]
+  | len ->
+    assert (len > 1);
+    let gs = ref [] in
+    (* Compute the adjacency sets. *)
+    for i = 0 to len - 1 do
+      let x = Vec.unsafe_get vs i in
+      for j = 0 to len - 1 do
+        if i <> j then
+          let y = Vec.unsafe_get vs j in
+          if compat_range x y then
+            x.adj <- Set.add x.adj y.var
+      done
+    done;
+    (* Use an ascending order. *)
+    Vec.sort vs ~compare:(fun x y -> range_priority y x);
+    while not @@ Vec.is_empty vs do
+      let x = Vec.pop_exn vs in
+      if not x.assigned then
+        let () = x.assigned <- true in
+        Logs.debug (fun m ->
+            m "%s: processing %a%!"
+              __FUNCTION__ Var.pp x.var);
+        let g = ref [x] in
+        for i = Vec.length vs - 1 downto 0 do
+          let y = Vec.unsafe_get vs i in
+          if not y.assigned && is_subset !g y then
+            let () = y.assigned <- true in
+            g := y :: !g
+        done;
+        gs := !g :: !gs
+    done;
+    !gs
+
+(* invariant: a group is never empty *)
+let canon_elt g = List.max_elt g ~compare:size_priority |> Option.value_exn
+
+let make_subst _slots p =
+  List.fold p ~init:Var.Map.empty
+    ~f:(fun init -> function
+        | [] | [_] -> init
+        | g ->
+          let canon = canon_elt g in
+          let data = `var canon.var in
+          List.fold g ~init ~f:(fun acc x ->
+              if Var.(x.var = canon.var) then acc
+              else Map.set acc ~key:x.var ~data))
+
+type t = {
+  subst : Subst_mapper.t; (* Map from coalesced to canonical slots *)
+  deads : Lset.t;         (* Stores to dead slots. *)
+}
+
+let empty = {
+  subst = Var.Map.empty;
+  deads = Lset.empty;
+}
+
+let is_empty t =
+  Map.is_empty t.subst && Lset.is_empty t.deads
+
+module Make(M : Scalars.L) = struct
+  open M
+
+  module Sinit = Slot_initialization.Make(M)
+
+  let mkdef s x n = Map.update s x ~f:(function
+      | None -> Range.singleton n
+      | Some r -> Range.def r n)
+
+  let mkuse s x n = Map.change s x ~f:(function
+      | Some r -> Some (Range.use r n)
+      | None -> None)
+
+  let update (si : Slot_initialization.t) acc s x n l ldst =
+    match Map.find s x with
+    | None -> acc
+    | Some Top -> Map.set acc ~key:x ~data:Range.bad
+    | Some Offset (base, _) -> match ldst with
+      | Some Store -> mkdef acc base n
+      | Some Load when Hash_set.mem si.bad l ->
+        (* Uninitialized load is UB: forbid this slot as
+           a candidate for coalescing. *)
+        Logs.debug (fun m ->
+            m "%s: uninitialized load at %a from %a: marking bad%!"
+              __FUNCTION__ Label.pp l Var.pp base);
+        Map.set acc ~key:base ~data:Range.bad
+      | Some Load | None -> mkuse acc base n
+
+  let liveness_insn si acc s ip i =
+    let l = Insn.label i and op = Insn.op i in
+    let r = Insn.free_vars op in
+    let r, w, ldst = match Insn.load_or_store_to op with
+      | None -> r, None, None
+      | Some (ptr, _, ldst) ->
+        Set.remove r ptr, Some ptr, Some ldst in
+    Option.fold w ~init:acc ~f:(fun acc x ->
+        update si acc s x ip l ldst) |> fun init ->
+    Set.fold r ~init ~f:(fun acc x ->
+        update si acc s x ip l None)
+
+  let liveness_ctrl si acc s ip l c =
+    Ctrl.free_vars c |> Set.fold ~init:acc
+      ~f:(fun acc x -> update si acc s x ip l None)
+
+  let liveness cfg blks slots t si =
+    let ip = ref 0 in
+    let nums = Vec.create () in
+    let init =
+      Hash_set.fold t.esc ~init:Var.Map.empty
+        ~f:(fun acc x -> Map.set acc ~key:x ~data:Range.bad) in
+    let acc =
+      Graphlib.reverse_postorder_traverse
+        (module Cfg) ~start:Label.pseudoentry cfg |>
+      Seq.filter_map ~f:(Ltree.find blks) |>
+      Seq.fold ~init ~f:(fun acc b ->
+          let l = Blk.label b in
+          let s = ref @@ get t l in
+          let acc = Blk.insns b |> Seq.fold ~init:acc ~f:(fun acc i ->
+              let op = Insn.op i in
+              let acc = liveness_insn si acc !s !ip i in
+              Vec.push nums (Insn.label i);
+              s := Sinit.S.transfer_op slots !s op;
+              incr ip;
+              acc) in
+          let acc = liveness_ctrl si acc !s !ip l @@ Blk.ctrl b in
+          Vec.push nums l;
+          incr ip;
+          acc) in
+    acc, nums
+
+  let collect_deads blks slots rs t =
+    Ltree.fold blks ~init:Lset.empty
+      ~f:(fun ~key ~data:b init ->
+          let s = ref @@ get t key in
+          Blk.insns b |> Seq.fold ~init ~f:(fun acc i ->
+              let op = Insn.op i in
+              let acc = match Insn.load_or_store_to op with
+                | Some (ptr, _, Store) ->
+                  begin match Map.find !s ptr with
+                    | Some Offset (base, _) ->
+                      begin match Map.find rs base with
+                        | Some {tg = Def; _} ->
+                          (* This slot is only ever stored to, so we can
+                             safely remove it. *)
+                          Lset.add acc @@ Insn.label i
+                        | _ -> acc
+                      end
+                    | _ -> acc
+                  end
+                | _ -> acc in
+              s := Sinit.S.transfer_op slots !s op;
+              acc))
+
+  let debug_show slots rs nums deads p subst =
+    Logs.debug (fun m ->
+        Map.iter_keys slots ~f:(fun x ->
+            let ppr ppf x = match Map.find rs x with
+              | None -> Format.fprintf ppf "none"
+              | Some r when Range.is_bad r ->
+                Format.fprintf ppf "bad"
+              | Some r ->
+                Format.fprintf ppf "%a (%a to %a)"
+                  Range.pp r
+                  Label.pp (Vec.get_exn nums r.lo)
+                  Label.pp (Vec.get_exn nums r.hi) in
+            m "%s: %a: %a%!" __FUNCTION__ Var.pp x ppr x));
+    Logs.debug (fun m ->
+        List.iter p ~f:(fun g ->
+            m "%s: group: %s%!" __FUNCTION__
+              (List.to_string g ~f:(fun x ->
+                   Var.to_string x.var))));
+    Logs.debug (fun m ->
+        if not @@ Lset.is_empty deads then
+          m "%s: dead stores: %a%!"
+            __FUNCTION__
+            (Format.pp_print_list
+               ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ")
+               Label.pp)
+            (Lset.to_list deads));
+    Logs.debug (fun m ->
+        Map.iteri subst ~f:(fun ~key ~data ->
+            m "%s: coalesce slot: %a => %a%!"
+              __FUNCTION__ Var.pp key Virtual.pp_operand data))
+
+  let run fn =
+    let slots = Sinit.S.collect_slots fn in
+    if Map.is_empty slots then empty else
+      let cfg = Cfg.create fn in
+      let blks = Func.map_of_blks fn in
+      let t = Sinit.S.analyze cfg blks slots in
+      let si = Sinit.analyze' t cfg blks slots in
+      let rs, nums = liveness cfg blks slots t si in
+      let p = partition slots rs in
+      let deads = collect_deads blks slots rs t in
+      let subst = make_subst slots p in
+      debug_show slots rs nums deads p subst;
+      {subst; deads}
+end
diff --git a/src/lib/passes/egraph_opt/egraph_opt.ml b/src/lib/passes/egraph_opt/egraph_opt.ml
index 36361e23..39b2f842 100644
--- a/src/lib/passes/egraph_opt/egraph_opt.ml
+++ b/src/lib/passes/egraph_opt/egraph_opt.ml
@@ -3,5 +3,5 @@
 
 module Rules = Egraph_opt_rules
 
-let run tenv fn = Egraph.run fn tenv Rules.all
-let run_no_rules tenv fn = Egraph.run fn tenv Rules.none
+let run tenv fn = Egraph.run fn tenv @@ Rules.all ()
+let run_no_rules tenv fn = Egraph.run fn tenv @@ Rules.none ()
diff --git a/src/lib/passes/egraph_opt/egraph_opt_rules.ml b/src/lib/passes/egraph_opt/egraph_opt_rules.ml
index 24d8d8da..d737a530 100644
--- a/src/lib/passes/egraph_opt/egraph_opt_rules.ml
+++ b/src/lib/passes/egraph_opt/egraph_opt_rules.ml
@@ -2041,5 +2041,14 @@ module Groups = struct
     ]
 end
 
-let all = Egraph.compile Groups.all
-let none = Egraph.compile []
+let thunked name g =
+  let r = ref None in
+  fun () -> match !r with
+    | Some p -> p
+    | None ->
+      let p = Egraph.compile ~name g in
+      r := Some p;
+      p
+
+let all = thunked "egraph-all" Groups.all
+let none = thunked "egraph-none" []
diff --git a/src/lib/passes/passes.ml b/src/lib/passes/passes.ml
index 3ff18ea9..c9d0103f 100644
--- a/src/lib/passes/passes.ml
+++ b/src/lib/passes/passes.ml
@@ -3,6 +3,7 @@ open Virtual
 open Context.Syntax
 
 module Abi_loadopt = Abi_loadopt
+module Coalesce_slots = Coalesce_slots
 module Egraph_opt = Egraph_opt
 module Lower_abi = Lower_abi
 module Promote_slots = Promote_slots
@@ -11,6 +12,7 @@ module Remove_disjoint_blks = Remove_disjoint_blks
 module Resolve_constant_blk_args = Resolve_constant_blk_args
 module Sccp = Sccp
 module Simplify_cfg = Simplify_cfg
+module Sroa = Sroa
 module Ssa = Ssa
 
 let initialize m =
@@ -21,11 +23,14 @@ let initialize m =
   !!(tenv, m)
 
 let retype tenv m =
-  Module.funs m |>
-  Seq.to_list |> Typecheck.update_fns tenv
+  Module.funs m |> Seq.to_list |> Typecheck.update_fns tenv
 
 let optimize tenv m =
   let module Cv = Context.Virtual in
+  let*? m = Module.map_funs_err m ~f:Coalesce_slots.run in
+  let*? m = Module.map_funs_err m ~f:Resolve_constant_blk_args.run in
+  let*? m = Module.map_funs_err m ~f:Remove_dead_vars.run in
+  let* m = Context.Virtual.Module.map_funs m ~f:Sroa.run in
   let*? m = Module.map_funs_err m ~f:Promote_slots.run in
   let*? tenv = retype tenv m in
   let*? m = Module.map_funs_err m ~f:(Sccp.run tenv) in
@@ -57,10 +62,18 @@ let to_abi tenv m =
     ~data:(Seq.to_list @@ Module.data m)
 
 let optimize_abi m =
+  let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in
+  let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in
+  let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in
+  let* m = Context.Virtual.Module.map_funs_abi m ~f:Sroa.run_abi in
   let*? m = Abi.Module.map_funs_err m ~f:Promote_slots.run_abi in
   let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in
   let m = Abi.Module.map_funs m ~f:Remove_disjoint_blks.run_abi in
   let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in
+  let*? m = Abi.Module.map_funs_err m ~f:Coalesce_slots.run_abi in
+  let*? m = Abi.Module.map_funs_err m ~f:Resolve_constant_blk_args.run_abi in
+  let*? m = Abi.Module.map_funs_err m ~f:Abi_loadopt.run in
+  let*? m = Abi.Module.map_funs_err m ~f:Remove_dead_vars.run_abi in
   let* () = Context.iter_seq_err (Abi.Module.funs m) ~f:Ssa.check_abi in
   !!m
 
diff --git a/src/lib/passes/passes.mli b/src/lib/passes/passes.mli
index be95b1f8..fdc57fba 100644
--- a/src/lib/passes/passes.mli
+++ b/src/lib/passes/passes.mli
@@ -64,6 +64,16 @@ module Abi_loadopt : sig
   val run : Abi.func -> Abi.func Or_error.t
 end
 
+(** Attempts to coalesce slots of compatible size and alignment which
+    do not interfere (i.e. are never live at the same time).
+
+    Assumes the function is in SSA form.
+*)
+module Coalesce_slots : sig
+  val run : func -> func Or_error.t
+  val run_abi : Abi.func -> Abi.func Or_error.t
+end
+
 (** Uses the [Egraph] module to perform a variety of optimizations
     to a function.
 
@@ -121,6 +131,7 @@ end
 *)
 module Resolve_constant_blk_args : sig
   val run : func -> func Or_error.t
+  val run_abi : Abi.func -> Abi.func Or_error.t
 end
 
 (** Performs the classic Sparse Conditional Constant
@@ -141,6 +152,25 @@ module Simplify_cfg : sig
   val run : Typecheck.env -> func -> func Context.t
 end
 
+(** Performs Scalar Replacement of Aggregates (SROA).
+
+    This aims to analyze access patterns of stack slots
+    and break them up into individual (scalar) components.
+
+    An important detail to note is that in the non-ABI pass,
+    slots being referenced in terms of user-defined compound
+    (aggregate) types are treated as opaque, and therefore
+    not optimized, as these are often ABI-dependent.
+
+    When ABI lowering is performed, these references should
+    be desugared into access patterns on primitive types,
+    at which point the algorithm can attempt to process them.
+*)
+module Sroa : sig
+  val run : func -> func Context.t
+  val run_abi : Abi.func -> Abi.func Context.t
+end
+
 (** Transforms a function into semi-pruned SSA form. *)
 module Ssa : sig
   val run : func -> func Or_error.t
diff --git a/src/lib/passes/promote_slots/promote_slots.ml b/src/lib/passes/promote_slots/promote_slots.ml
index 1b0a5d14..4f89e36f 100644
--- a/src/lib/passes/promote_slots/promote_slots.ml
+++ b/src/lib/passes/promote_slots/promote_slots.ml
@@ -1,3 +1,4 @@
+open Core
 open Virtual
 open Promote_slots_impl
 
@@ -47,18 +48,31 @@ module A = Make(struct
 
 open E.Syntax
 
+let apply
+    (type fn)
+    (module M : Scalars.L with type Func.t = fn)
+    run ssa (fn : fn) =
+  let module Sinit = Slot_initialization.Make(M) in
+  let slots = Sinit.S.collect_slots fn in
+  let cfg = M.Cfg.create fn in
+  let blks = M.Func.map_of_blks fn in
+  let s = Sinit.analyze cfg blks slots in
+  let undef l = Hash_set.mem s.bad l in
+  run fn ~undef >>= ssa
+
 let run fn =
   if Dict.mem (Func.dict fn) Tags.ssa then
-    V.run fn >>= Ssa.run
+    apply (module Scalars_common.VL) V.run Ssa.run fn
   else
     E.failf
       "In Promote_slots: expected SSA form for function $%s"
       (Func.name fn) ()
 
 let run_abi fn =
-  if Dict.mem (Abi.Func.dict fn) Tags.ssa then
-    A.run fn >>= Ssa.run_abi
+  let open Abi in
+  if Dict.mem (Func.dict fn) Tags.ssa then
+    apply (module Scalars_common.AL) A.run Ssa.run_abi fn
   else
     E.failf
       "In Promote_slots (ABI): expected SSA form for function $%s"
-      (Abi.Func.name fn) ()
+      (Func.name fn) ()
diff --git a/src/lib/passes/promote_slots/promote_slots_impl.ml b/src/lib/passes/promote_slots/promote_slots_impl.ml
index 880c9dcd..28e7f488 100644
--- a/src/lib/passes/promote_slots/promote_slots_impl.ml
+++ b/src/lib/passes/promote_slots/promote_slots_impl.ml
@@ -101,24 +101,37 @@ module Make(M : L) = struct
           | Write (_, t') when not @@ Type.equal_basic t t' -> Bad
           | Read _ | Write _ -> Write (s, t)
 
-    let go env s =
+    let go env s ~undef =
       let x = Slot.var s in
       Resolver.uses env.reso x |>
       List.fold_until ~init:Bad ~finish:Fn.id
         ~f:(fun acc -> function
             |`blk _ -> Stop Bad
-            | `insn (i, _, _, _) -> match infer acc x i with
-              | (Read _ | Write _) as acc -> Continue acc
-              | Bad -> Stop Bad)
+            | `insn (i, _, _, _) ->
+              if undef @@ Insn.label i then Stop Bad
+              else match infer acc x i with
+                | (Read _ | Write _) as acc -> Continue acc
+                | Bad -> Stop Bad)
   end
 
-  let collect env =
+  let collect env ~undef =
     Func.slots env.fn |>
     Seq.fold ~init:Var.Map.empty ~f:(fun acc s ->
-        match Qualify.go env s with
-        | Bad -> acc
-        | Write (_, t) -> Map.set acc ~key:(Slot.var s) ~data:t
+        match Qualify.go env s ~undef with
+        | Bad ->
+          Logs.debug (fun m ->
+              m "%s: cannot promote %a%!"
+                __FUNCTION__ Var.pp (Slot.var s));
+          acc
+        | Write (_, t) ->
+          Logs.debug (fun m ->
+              m "%s: promoting %a%!"
+                __FUNCTION__ Var.pp (Slot.var s));
+          Map.set acc ~key:(Slot.var s) ~data:t
         | Read _ ->
+          Logs.debug (fun m ->
+              m "%s: slot %a is read, but never written to%!"
+                __FUNCTION__ Var.pp (Slot.var s));
           (* In this case, we read from the slot but never stored anything
              to it. It's undefined behavior, but it's also what the programmer
              intended, so we should cancel this promotion. *)
@@ -151,9 +164,9 @@ module Make(M : L) = struct
               | Some (y, t') -> replace_load env x l y t t'
               | None -> assert false))
 
-  let run fn =
+  let run fn ~undef =
     let+ env = init fn in
-    let xs = collect env in
+    let xs = collect env ~undef in
     if not @@ Map.is_empty xs then
       let fn = remove_slots fn xs in
       replace env xs;
diff --git a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml
index 9b30ccf6..9522becf 100644
--- a/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml
+++ b/src/lib/passes/remove_dead_vars/remove_dead_vars_impl.ml
@@ -16,6 +16,7 @@ module type S = sig
 
   module Insn : sig
     type t
+    val label : t -> Label.t
     val check_div_rem : t -> bool
     val is_effectful : t -> bool
     val lhs : t -> Var.t option
@@ -96,15 +97,23 @@ module Make(M : S) = struct
      function is in SSA form then keeping in them in the alive set
      shouldn't affect the results. *)
   let insn (acc, changed, alive) i = match Insn.lhs i with
-    | Some x when not @@ keep i x alive -> acc, true, alive
+    | Some x when not @@ keep i x alive ->
+      Logs.debug (fun m ->
+          m "%s: %a: %a is dead%!" __FUNCTION__
+            Label.pp (Insn.label i) Var.pp x);
+      acc, true, alive
     | Some x -> i :: acc, changed, alive -- x ++ Insn.free_vars i
     | None -> i :: acc, changed, alive ++ Insn.free_vars i
 
+  let remove_slot fn x =
+    Logs.debug (fun m -> m "%s: slot %a is dead%!" __FUNCTION__ Var.pp x);
+    Func.remove_slot fn x
+
   let finalize fn blks live =
     let ins = Live.ins live @@ Func.entry fn in
     Func.slots fn |> Seq.map ~f:Slot.var |>
     Seq.filter ~f:(Fn.non @@ Set.mem ins) |>
-    Seq.fold ~init:fn ~f:Func.remove_slot |>
+    Seq.fold ~init:fn ~f:remove_slot |>
     Fn.flip Func.update_blks' blks
 
   let rec run fn blks cfg =
diff --git a/src/lib/passes/resolve_constant_blk_args.ml b/src/lib/passes/resolve_constant_blk_args.ml
index 2c72dcac..69b062e6 100644
--- a/src/lib/passes/resolve_constant_blk_args.ml
+++ b/src/lib/passes/resolve_constant_blk_args.ml
@@ -1,34 +1,78 @@
 open Core
 open Virtual
+open Phi_values
 
-module Phis = Phi_values.Make(struct
-    (* `None` indicates that there may be many values for the phi,
-       while `Some v` indicates that `v` is its singular value.
+module D = struct
+  (* `None` indicates that there may be many values for the phi,
+     while `Some v` indicates that `v` is its singular value.
 
-       We choose `option` because filtering the substitution produced
-       by the analysis can use the identity function via `Map.filter_map`,
-       thus avoiding extra allocations.
-    *)
-    type t = operand option [@@deriving equal]
+     We choose `option` because filtering the substitution produced
+     by the analysis can use the identity function via `Map.filter_map`,
+     thus avoiding extra allocations.
+  *)
+  type t = operand option [@@deriving equal]
 
-    let one v = Some v
-    let join x y = if equal x y then x else None
-  end)
+  let one v = Some v
+  let join x y = if equal x y then x else None
+end
 
-let run fn =
-  if Dict.mem (Func.dict fn) Tags.ssa then
-    let cfg = Cfg.create fn in
-    let blks = Func.map_of_blks fn in
-    let s =
-      Map.filter_map ~f:Fn.id @@
-      Phis.analyze ~blk:(Label.Tree.find blks) cfg in
-    let fn =
-      if not @@ Map.is_empty s then Func.map_blks fn ~f:(fun b ->
-          let is, k = Subst_mapper.map_blk s b in
-          Blk.(with_ctrl (with_insns b is) k))
-      else fn in
-    Ok fn
-  else
+module V = Make(struct
+    module Ctrl = struct
+      type t = ctrl
+      let locals = (locals Ctrl.Table.enum :> t -> _)
+    end
+    module Blk = Blk
+    module Func = Func
+    module Cfg = Cfg
+  end)(D)
+
+module A = Make(struct
+    open Abi
+    module Ctrl = struct
+      type t = ctrl
+      let locals = (locals Ctrl.Table.enum :> t -> _)
+    end
+    module Blk = Blk
+    module Func = Func
+    module Cfg = Cfg
+  end)(D)
+
+let check_ssa msg n d fn f =
+  if Dict.mem (d fn) Tags.ssa then Ok (f ()) else
     Or_error.errorf
-      "In Resolve_constant_blk_args: function $%s is \
-       not in SSA form" (Func.name fn)
+      "In Resolve_constant_blk_args%s: function $%s is \
+       not in SSA form" msg (n fn)
+
+let apply fn cfg mb analyze map_blks map_blk with_ctrl with_insns =
+  let cfg = cfg fn in
+  let blks = mb fn in
+  let s = Map.filter_map ~f:Fn.id @@
+    analyze ~blk:(Label.Tree.find blks) cfg in
+  if not @@ Map.is_empty s then map_blks fn ~f:(fun b ->
+      let is, k = map_blk s b in
+      with_ctrl (with_insns b is) k)
+  else fn
+[@@specialise]
+
+let run fn =
+  check_ssa "" Func.name Func.dict fn @@ fun () ->
+  apply fn
+    Cfg.create
+    Func.map_of_blks
+    V.analyze
+    Func.map_blks
+    Subst_mapper.map_blk
+    Blk.with_ctrl
+    Blk.with_insns
+
+let run_abi fn =
+  let open Abi in
+  check_ssa " (ABI)" Func.name Func.dict fn @@ fun () ->
+  apply fn
+    Cfg.create
+    Func.map_of_blks
+    A.analyze
+    Func.map_blks
+    Subst_mapper_abi.map_blk
+    Blk.with_ctrl
+    Blk.with_insns
diff --git a/src/lib/passes/scalars_common.ml b/src/lib/passes/scalars_common.ml
new file mode 100644
index 00000000..10c273d9
--- /dev/null
+++ b/src/lib/passes/scalars_common.ml
@@ -0,0 +1,132 @@
+open Core
+open Virtual
+open Scalars
+
+let var_set_of_option = function
+  | Some x -> Var.Set.singleton x
+  | None -> Var.Set.empty
+
+(* Instructions that can produce a scalar `Offset` value. *)
+let offset = function
+  | `bop (_, `add _, `var x, `int (i, _))
+  | `bop (_, `add _, `int (i, _), `var x) ->
+    Some (x, Bv.to_int64 i)
+  | `bop (_, `sub _, `var x, `int (i, _)) ->
+    Some (x, Int64.neg @@ Bv.to_int64 i)
+  | _ -> None
+
+(* Instructions that behave like `copy` instead of causing
+   a variable to "escape".
+
+   TODO: more cases?
+*)
+let copy_of = function
+  | `uop (_, `copy _, `var x) -> Some x
+  | `bop (_, `add _, `var x, `int (i, _))
+  | `bop (_, `add _, `int (i, _), `var x)
+  | `bop (_, `sub _, `var x, `int (i, _))
+    when Bv.(i = zero) -> Some x
+  | _ -> None
+
+let escapes mty fv = function
+  | `store (ty, `var x, `var y) when mty ty -> Var.Set.of_list [x; y]
+  | `store (ty, _, `var y) when mty ty -> Var.Set.of_list [y]
+  | `store (_, `var x, _) -> Var.Set.singleton x
+  | `store _ -> Var.Set.empty
+  | `load (_, ty, `var x) when mty ty -> Var.Set.singleton x
+  | `load _ -> Var.Set.empty
+  | o when Option.is_some (offset o) -> Var.Set.empty
+  | o when Option.is_some (copy_of o) -> Var.Set.empty
+  | o -> fv o
+[@@specialise]
+
+let load_or_store_to = function
+  | `load (_, (#Type.basic as b), `var x) -> Some (x, b, Load)
+  | `store ((#Type.basic as b), _, `var x) -> Some (x, b, Store)
+  | _ -> None
+
+let replace_load_or_store_addr a = function
+  | `load (x, (#Type.basic as b), _) -> `load (x, b, `var a)
+  | `store ((#Type.basic as b), v, _) -> `store (b, v, `var a)
+  | op -> op
+
+let add x ty b o =
+  assert Int64.(o > 0L);
+  let i = Bv.(int64 o mod modulus (Type.sizeof_imm_base ty)) in
+  `bop (x, `add (ty :> Type.basic), `var b, `int (i, (ty :> Type.imm)))
+
+let is_named = function
+  | `name _ -> true
+  | _ -> false
+
+let escapes_ctrl fv = function
+  | `hlt -> Var.Set.empty
+  | `jmp `var x -> Var.Set.singleton x
+  | `jmp _ -> Var.Set.empty
+  | `br (c, `var y, `var n) -> Var.Set.of_list [c; y; n]
+  | `br (c, _, `var n) -> Var.Set.of_list [c; n]
+  | `br (c, `var y, _) -> Var.Set.of_list [c; y]
+  | `br (c, _, _) -> Var.Set.singleton c
+  | `ret _ as c -> fv c
+  | `sw (_, `var i, _, _) -> Var.Set.singleton i
+  | `sw _ -> Var.Set.empty
+[@@specialise]
+
+(* Virtual language *)
+module VL = struct
+  module Insn = struct
+    type t = Insn.t
+    type op = Insn.op
+    let create ~label op = Insn.create op ~label
+    let with_op = Insn.with_op
+    let op = Insn.op
+    let label = Insn.label
+    let lhs = var_set_of_option @. Insn.lhs_of_op
+    let offset = (offset :> op -> _)
+    let copy_of = (copy_of :> op -> _)
+    let free_vars = Insn.free_vars_of_op
+    let escapes = (escapes is_named free_vars :> op -> _)
+    let load_or_store_to = (load_or_store_to :> op -> _)
+    let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _)
+    let add x ty b o = (add x ty b o :> op)
+  end
+  module Ctrl = struct
+    type t = ctrl
+    let free_vars = Ctrl.free_vars
+    let escapes = (escapes_ctrl free_vars :> t -> _)
+    let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _)
+  end
+  module Blk = Blk
+  module Func = Func
+  module Cfg = Cfg
+end
+
+(* Virtual.Abi language *)
+module AL = struct
+  open Abi
+  module Insn = struct
+    type t = Insn.t
+    type op = Insn.op
+    let create ~label op = Insn.create op ~label
+    let with_op = Insn.with_op
+    let op = Insn.op
+    let label = Insn.label
+    let lhs = Insn.def_of_op
+    let offset = (offset :> op -> _)
+    let copy_of = (copy_of :> op -> _)
+    let free_vars = Insn.free_vars_of_op
+    let escapes = (escapes (const false) free_vars :> op -> _)
+    let load_or_store_to = (load_or_store_to :> op -> _)
+    let replace_load_or_store_addr a = (replace_load_or_store_addr a :> op -> _)
+    let add x ty b o = (add x ty b o :> op)
+  end
+  module Ctrl = struct
+    type t = ctrl
+    let free_vars = Ctrl.free_vars
+    let escapes = (escapes_ctrl free_vars :> t -> _)
+    let locals = (Phi_values.locals Ctrl.Table.enum :> t -> _)
+  end
+  module Blk = Blk
+  module Func = Func
+  module Cfg = Cfg
+end
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml b/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml
index 2f3d9cb1..362f799d 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_brsel.ml
@@ -58,8 +58,15 @@ let collect tenv env fn =
 let run tenv env fn =
   let xs = collect tenv env fn in
   let+ () = Context.List.iter xs ~f:(fun (b, c, l, args) ->
+      Logs.debug (fun m ->
+          m "%s: block %a, simplifying br on %a to %a%!"
+            __FUNCTION__ Label.pp (Blk.label b)
+            Var.pp c Label.pp l);
       let+ sels = Context.List.map args ~f:(fun (ty, y, n) ->
           let+ x, sel = Context.Virtual.sel ty c y n in
+          Logs.debug (fun m ->
+              m "%s: inserting %a%!" __FUNCTION__
+                Insn.pp_op (Insn.op sel));
           Hashtbl.set env.typs ~key:x ~data:(ty :> Type.t);
           `var x, sel) in
       let args, sels = List.unzip sels in
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml b/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml
index 098bca2d..2ac42523 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_contract.ml
@@ -84,20 +84,29 @@ let find_loc env s : local -> local option = function
     | #global -> assert false
     | #local as l -> l
 
-let map_dst changed env s d = match find_dst env s d with
+let map_dst key changed env s d = match find_dst env s d with
   | Some x when equal_dst d x -> x
-  | Some x -> changed := true; x
+  | Some x ->
+    Logs.debug (fun m ->
+        m "%s: block %a: contracted %a to %a%!"
+          __FUNCTION__ Label.pp key pp_dst d pp_dst x);
+    changed := true; x
   | None -> d
 
-let map_loc changed env s l = match find_loc env s l with
+let map_loc key changed env s l = match find_loc env s l with
   | Some x when equal_local l x -> x
-  | Some x -> changed := true; x
+  | Some x ->
+    Logs.debug (fun m ->
+        m "%s: block %a: contracted %a to %a%!"
+          __FUNCTION__ Label.pp key pp_local l pp_local x);
+    changed := true; x
   | None -> l
 
 let contract_blks changed env (s : singles) =
   Hashtbl.map_inplace env.blks ~f:(fun b ->
-      let dst = map_dst changed env s in
-      let loc = map_loc changed env s in
+      let key = Blk.label b in
+      let dst = map_dst key changed env s in
+      let loc = map_loc key changed env s in
       Blk.map_ctrl b ~f:(function
           | (`hlt | `ret _) as x -> x
           | `jmp d -> `jmp (dst d)
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml b/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml
index 5b549cc0..cc952caf 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_duplicate_br.ml
@@ -75,6 +75,11 @@ let collect env =
         | `br (c, y, n) ->
           Option.merge (true_br env c y n) (false_br env c y n) ~f:merge_br |>
           Option.value_map ~default:acc ~f:(fun br ->
+              Logs.debug (fun m ->
+                  m "%s: block %a, simplifying c=%a, y=%a, n=%a to %a%!"
+                    __FUNCTION__ Label.pp (Blk.label b)
+                    Var.pp c pp_dst y pp_dst n
+                    Ctrl.pp (br :> ctrl));
               Label.Tree.set acc ~key ~data:br)
         | _ -> acc)
 
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml b/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml
index ce668b8a..45c0b47e 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_merge_blks.ml
@@ -49,6 +49,9 @@ and merge subst env l l' b b' =
   env.cfg <- Cfg.Node.remove l' env.cfg;
   List.iter es ~f:(fun e ->
       env.cfg <- Cfg.Edge.insert e env.cfg);
+  Logs.debug (fun m ->
+      m "%s: merged block %a into %a%!"
+        __FUNCTION__ Label.pp l' Label.pp l);
   try_merge ~child:l' subst env l
 
 let run env =
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml b/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml
index 117bd9bc..90b7828e 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_merge_rets.ml
@@ -15,6 +15,10 @@ let map_blk tbl rb b = match Hashtbl.find tbl @@ Blk.label b with
 
 let commit env tbl rb fn =
   let key = Blk.label rb in
+  Logs.debug (fun m ->
+      m "%s: merged returns to new block %a: %s%!"
+        __FUNCTION__ Label.pp key
+        (Hashtbl.keys tbl |> List.to_string ~f:Label.to_string));
   Hashtbl.map_inplace env.blks ~f:(map_blk tbl rb);
   Hashtbl.set env.blks ~key ~data:rb;
   env.ret <- Some key;
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml b/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml
index eead9500..34bcfd74 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_short_circ.ml
@@ -98,14 +98,23 @@ let redir changed env cphi =
       | `br (c, y, n) ->
         let y' = brcond env cphi c y ~f:(fun y _ -> changed := true; y) in
         let n' = brcond env cphi c n ~f:(fun _ n -> changed := true; n) in
-        if phys_equal y y' && phys_equal n n' then b
-        else Blk.with_ctrl b @@ `br (c, y', n')
-      | `jmp `label (l, args) ->
+        if phys_equal y y' && phys_equal n n' then b else
+          let () = Logs.debug (fun m ->
+              m "%s: block %a: simplified br c=%a, y=%a, n=%a: y'=%a, n'=%a%!"
+                __FUNCTION__ Label.pp (Blk.label b)
+                Var.pp c pp_dst y pp_dst n
+                pp_dst y' pp_dst n') in
+          Blk.with_ctrl b @@ `br (c, y', n')
+      | `jmp (`label (l, args) as loc) ->
         Option.value ~default:b begin
           let* y, n, i, ne = Label.Tree.find cphi l in
           let* x = List.nth args i >>= var_of_operand in
           let+ c = if ne then Hashtbl.find env.flag x else !!x in
           changed := true;
+          Logs.debug (fun m ->
+              m "%s: block %a: simplified jmp %a: c=%a, y=%a, n=%a%!"
+                __FUNCTION__ Label.pp (Blk.label b)
+                pp_local loc Var.pp c pp_dst y pp_dst n);
           Blk.with_ctrl b @@ `br (c, y, n)
         end
       | _ -> b)
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml b/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml
index 796c529f..6ff05548 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_tailrec.ml
@@ -75,4 +75,17 @@ let go env fn =
   let* h = new_entry fn e in
   let*? fn = Func.with_blks fn (h :: blks) in
   env.start <- Blk.label h;
+  Logs.debug (fun m ->
+      let pp_call ppf (l, (args, l')) =
+        Format.fprintf ppf "  %a:\n    args: %s\n    l': %a%!"
+          Label.pp l
+          (List.to_string args ~f:(fun a ->
+               Format.asprintf "%a" pp_operand a))
+          Label.pp l' in
+      m "%s: transformed $%s to tailrec, h=%a:\n%a%!"
+        __FUNCTION__ (Func.name fn) Label.pp (Blk.label h)
+        (Format.pp_print_list
+           ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")
+           pp_call)
+        (Label.Tree.to_list calls));
   !!fn
diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml b/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml
index 2d8ef7f7..77625df6 100644
--- a/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml
+++ b/src/lib/passes/simplify_cfg/simplify_cfg_two_case_switch.ml
@@ -17,6 +17,11 @@ let go fn =
             (i :> operand)
             (`int (v, t)) in
         let b = Blk.append_insn b cmp in
+        Logs.debug (fun m ->
+            m "%s: switch at %a collapsed: i=%a, c=%a, k=%a, d=%a%!"
+              __FUNCTION__ Label.pp (Blk.label b)
+              pp_operand (i :> operand)
+              Var.pp c pp_dst (k :> dst) pp_dst (d :> dst));
         Some (Blk.with_ctrl b @@ `br (c, (k :> dst), (d :> dst)))
       | _ -> !!None) >>| Seq.to_list in
   Func.update_blks_exn fn bs
diff --git a/src/lib/passes/sroa/sroa.ml b/src/lib/passes/sroa/sroa.ml
new file mode 100644
index 00000000..44f9705b
--- /dev/null
+++ b/src/lib/passes/sroa/sroa.ml
@@ -0,0 +1,21 @@
+open Core
+open Virtual
+open Sroa_impl
+open Scalars_common
+
+module V = Make(VL)
+module A = Make(AL)
+
+open Context.Syntax
+
+let run fn =
+  let* () = Context.unless (Dict.mem (Func.dict fn) Tags.ssa) @@ fun () ->
+    Context.failf "In SROA: function $%s is not in SSA form"
+      (Func.name fn) () in
+  V.run fn
+
+let run_abi fn =
+  let* () = Context.unless (Dict.mem (Abi.Func.dict fn) Tags.ssa) @@ fun () ->
+    Context.failf "In SROA (ABI): function $%s is not in SSA form"
+      (Abi.Func.name fn) () in
+  A.run fn
diff --git a/src/lib/passes/sroa/sroa.mli b/src/lib/passes/sroa/sroa.mli
new file mode 100644
index 00000000..0f02dd46
--- /dev/null
+++ b/src/lib/passes/sroa/sroa.mli
@@ -0,0 +1,19 @@
+(** Performs SROA (Scalar Replacement of Aggregates).
+
+    This pass aims to analyze the usage of slots in a function, where
+    individual fields of the slot can be separated into their own
+    unique slots themselves.
+
+    This is typical when a slot is allocated to hold a structure and
+    pointer arithmetic is used to store to and load from individual
+    fields of this structure. If these fields can become their own slots,
+    then they can possibly promoted to SSA variables, enabling more
+    optimization opportunities.
+
+    The function must be in SSA form for this pass.
+*)
+
+open Virtual
+
+val run : func -> func Context.t
+val run_abi : Abi.func -> Abi.func Context.t
diff --git a/src/lib/passes/sroa/sroa_impl.ml b/src/lib/passes/sroa/sroa_impl.ml
new file mode 100644
index 00000000..45820419
--- /dev/null
+++ b/src/lib/passes/sroa/sroa_impl.ml
@@ -0,0 +1,341 @@
+(* A conservative implementation of SROA (Scalar Replacement of Aggregates).
+
+   Most notably, we don't consider compound/aggregate types at all. Why?
+   Because they are second-class citizens in our IR, and they are highly
+   dependent on the implementation details of the target ABI.
+
+   After ABI lowering, they are desugared, after which this pass becomes
+   more useful.
+*)
+
+open Core
+open Regular.Std
+open Scalars
+
+module Slot = Virtual.Slot
+module Allen = Allen_interval_algebra
+
+type range = {
+  lo : int64;
+  hi : int64;
+}
+
+module Range = struct
+  type t = range
+  let lo r = r.lo [@@inline]
+  let hi r = r.hi [@@inline]
+  let size r = Int64.(r.hi - r.lo) [@@inline]
+  let coverage a b = Int64.(b.hi - a.lo) [@@inline]
+  let pp ppf r = Format.fprintf ppf "[0x%Lx, 0x%Lx)" r.lo r.hi
+end
+
+module Algebra = Allen.Make(struct
+    include Range
+    type point = int64
+    include Int64.Replace_polymorphic_compare
+  end)
+
+let basic_size ty = Type.sizeof_basic ty / 8 [@@inline]
+
+(* A partition of elements at a particular offset+size range. *)
+type 'a partition = {
+  off  : int64;
+  size : int64;
+  mems : 'a list;
+}
+
+type 'a partitions = 'a partition list Var.Map.t
+
+module Partition = struct
+  type 'a t = 'a partition
+
+  let cmp a b = Int64.compare a.off b.off
+
+  (* Check if a partition covers the entire slot `s`. *)
+  let is_entire_slot s p = match p.off with
+    | 0L -> Int64.(of_int (Slot.size s) = p.size)
+    | _ -> false
+  [@@inline]
+
+  let range p = {
+    lo = p.off;
+    hi = Int64.(p.off + p.size);
+  } [@@inline]
+
+  let pp ppa ppf p =
+    Format.fprintf ppf "0x%Lx:%Ld: %a"
+      p.off p.size
+      (Format.pp_print_list
+         ~pp_sep:(fun ppf () -> Format.fprintf ppf " ")
+         ppa) p.mems
+
+  let add_member x p = {p with mems = x :: p.mems} [@@inline]
+  let change off size x p = {off; size; mems = x :: p.mems} [@@inline]
+  let singleton off size x = {off; size; mems = [x]} [@@inline]
+end
+
+module Make(M : Scalars.L) : sig
+  val run : M.Func.t -> M.Func.t Context.t
+end = struct
+  open M
+
+  module Analysis = Scalars.Make(M)
+
+  (* A memory access for a slot. *)
+  type access = {
+    insn : Insn.t;
+    off  : int64;
+    ty   : Type.basic;
+    ldst : load_or_store;
+  }
+
+  type accesses = access list Var.Map.t
+
+  module Access = struct
+    type t = access [@@ocaml.warning "-34"]
+
+    let sizeof a = basic_size a.ty [@@inline]
+
+    let cmp a b =
+      match Int64.compare a.off b.off with
+      | 0 -> Int.compare (sizeof a) (sizeof b)
+      | c -> c
+
+    let range a = {
+      lo = a.off;
+      hi = Int64.(a.off + of_int (sizeof a));
+    } [@@inline]
+
+    let pp ppf a =
+      let neg = Int64.is_negative a.off in
+      let pre, off = if neg then '-', Int64.neg a.off else '+', a.off in
+      Format.fprintf ppf "%a[%a.%a,%c0x%Lx]"
+        Label.pp (Insn.label a.insn)
+        pp_load_or_store a.ldst
+        Type.pp_basic a.ty
+        pre off
+  end
+
+  let collect_accesses slots fn t : accesses =
+    (* Group all memory accesses by their corresponding slot. *)
+    Func.blks fn |> Seq.fold ~init:Var.Map.empty ~f:(fun init b ->
+        let s = ref @@ get t @@ Blk.label b in
+        Blk.insns b |> Seq.fold ~init ~f:(fun acc insn ->
+            let op = Insn.op insn in
+            let acc = match Insn.load_or_store_to op with
+              | None -> acc
+              | Some (ptr, ty, ldst) -> match Map.find !s ptr with
+                | Some Offset (base, _) when escaped t base ->
+                  (* Any slot that escaped at any time should not
+                     be considered for partitioning. *)
+                  Logs.debug (fun m ->
+                      m "%s: ignoring escaped pointer %a%!"
+                        __FUNCTION__ Var.pp base);
+                  acc
+                | Some Offset (base, off) ->
+                  Map.add_multi acc ~key:base ~data:{insn; off; ty; ldst}
+                | _ -> acc in
+            s := Analysis.transfer_op slots !s op;
+            acc)) |>
+    Map.map ~f:(List.sort ~compare:Access.cmp)
+
+  (* Sort the memory accesses into self-contained, non-overlapping
+     partitions, which are the fully-or-partially scalarized sub-objects
+     of the aggregate. *)
+  let partition_acesses : accesses -> access partitions = fun m ->
+    let rec merge acc c = function
+      | [] -> List.sort (c :: acc) ~compare:Partition.cmp
+      | x :: xs ->
+        let rc = Partition.range c in
+        let rx = Access.range x in
+        let a = Algebra.relate rc rx in
+        let acc, p = match a with
+          | Equal | Started_by | Finished_by | Contains ->
+            (* Partition subsumes the access. *)
+            acc, Partition.add_member x c
+          | During | Finishes ->
+            (* Access subsumes the partition *)
+            acc, Partition.change rx.lo (Range.size rx) x c
+          | Overlaps | Starts ->
+            (* Extend the upper bound. *)
+            acc, Partition.change rc.lo (Range.coverage rc rx) x c
+          | Overlapped_by ->
+            (* Extend the lower bound. *)
+            acc, Partition.change rx.lo (Range.coverage rx rc) x c
+          | Before | After | Meets | Met_by ->
+            (* No overlap, so we start a new partition. *)
+            let sx = Int64.of_int @@ Access.sizeof x in
+            c :: acc, Partition.singleton x.off sx x in
+        Logs.debug (fun m ->
+            m "%s: partition %a, access %a: %a%!"
+              __FUNCTION__ Range.pp rc Range.pp rx Allen.pp a);
+        merge acc p xs in
+    (* pre: each access list is sorted *)
+    Map.filter_map m ~f:(function
+        | [] -> None
+        | x :: xs ->
+          let sx = Int64.of_int @@ Access.sizeof x in
+          let p = Partition.singleton x.off sx x in
+          Some (merge [] p xs))
+
+  (* Turn each partition into a concrete slot. *)
+  let materialize_partitions slots parts : scalars Context.t =
+    Map.to_sequence parts |> Seq.filter_map ~f:(fun (base, ps) ->
+        Map.find slots base |> Option.map ~f:(fun s -> base, ps, s)) |>
+    Context.Seq.fold ~init:Scalar.Map.empty ~f:(fun init (base, ps, s) ->
+        Seq.of_list ps |> Seq.filter ~f:(not @. Partition.is_entire_slot s) |>
+        Context.Seq.fold ~init ~f:(fun acc p ->
+            let open Context.Syntax in
+            let size = Int64.to_int_exn p.size in
+            (* TODO: look through `p.mems` and see if there is a store
+               that is larger than other acesses (i.e. `st.l` followed
+               by one or more `ld.w`). If so, this partition could be
+               broken down further if we modify the store instruction(s). *)
+            let* x = Context.Var.fresh in
+            let*? s = Slot.create x ~size ~align:size in
+            Logs.debug (fun m ->
+                m "%s: new slot %a, base=%a, off=0x%Lx, size=%Ld%!"
+                  __FUNCTION__ Var.pp x Var.pp base p.off p.size);
+            !!(Map.set acc ~key:(base, p.off) ~data:s)))
+
+  (* Find the corresponding partition for [base+off, base+off+size). *)
+  let find_partition (parts : 'a partitions) base off size =
+    Map.find parts base |> Option.bind ~f:(fun ps ->
+        let r = {lo = off; hi = Int64.(off + of_int size)} in
+        List.find ps ~f:(fun (p : 'a partition) ->
+            let rp = Partition.range p in
+            let a = Algebra.relate r rp in
+            Logs.debug (fun m ->
+                m "%s: relating %a to %a: %a%!"
+                  __FUNCTION__ Range.pp r Range.pp rp Allen.pp a);
+            match a with
+            | Starts | During | Finishes | Equal -> true
+            | _ -> false))
+
+  (* Exact cover for a scalar at `base + off`. *)
+  let rewrite_insn_exact (m : scalars) i ~exact ~base ~off =
+    let open Context.Syntax in
+    Logs.debug (fun m ->
+        m "%s: exact=0x%Lx, off=0x%Lx, base=%a%!"
+          __FUNCTION__ exact off Var.pp base);
+    let op = Insn.op i in
+    let delta = Int64.(off - exact) in
+    match Map.find m (base, exact) with
+    | None ->
+      Logs.debug (fun m -> m "%s: no slot found%!" __FUNCTION__);
+      !![i]
+    | Some s when Int64.(delta = 0L) ->
+      Logs.debug (fun m ->
+          m "%s: found slot %a (base)%!"
+            __FUNCTION__ Var.pp (Slot.var s));
+      (* Store to base of new slot. *)
+      let addr = Slot.var s in
+      let op' = Insn.replace_load_or_store_addr addr op in
+      !![Insn.with_op i op']
+    | Some s ->
+      Logs.debug (fun m ->
+          m "%s: found slot %a (delta 0x%Lx)%!"
+            __FUNCTION__ Var.pp (Slot.var s) delta);
+      (* Compute offset of new slot and store to it. *)
+      let* l = Context.Label.fresh in
+      let* y = Context.Var.fresh in
+      let+ word = Context.target >>| Target.word in
+      let a = Insn.add y word (Slot.var s) delta in
+      let op' = Insn.replace_load_or_store_addr y op in
+      [Insn.create ~label:l a; Insn.with_op i op']
+
+  let debug_show_insn i ptr ty ldst =
+    Logs.debug (fun m ->
+        m "%s: %a: looking at %a.%a to %a%!"
+          __FUNCTION__
+          Label.pp (Insn.label i)
+          pp_load_or_store ldst
+          Type.pp_basic ty
+          Var.pp ptr)
+
+  let debug_show_bad_val ptr v =
+    Logs.debug (fun m ->
+        m "%s: cannot rewrite: %a is %a"
+          __FUNCTION__ Var.pp ptr
+          (Format.pp_print_option
+             ~none:pp_bot
+             pp_value)
+          v)
+
+  (* Rewrite an instruction. *)
+  let rewrite_insn parts (m : scalars) (s : state) i =
+    let open Context.Syntax in
+    let op = Insn.op i in
+    match Insn.load_or_store_to op with
+    | None -> !![i]
+    | Some (ptr, ty, ldst) ->
+      debug_show_insn i ptr ty ldst;
+      match Map.find s ptr with
+      | (Some Top | None) as v ->
+        debug_show_bad_val ptr v;
+        !![i]
+      | Some Offset (base, off) ->
+        match find_partition parts base off @@ basic_size ty with
+        | Some p -> rewrite_insn_exact m i ~exact:p.off ~base ~off
+        | None ->
+          Logs.debug (fun m -> m "%s: no parts found%!" __FUNCTION__);
+          !![i]
+
+  let rewrite_with_partitions slots fn t parts m =
+    let open Context.Syntax in
+    let* blks = Func.blks fn |> Context.Seq.map ~f:(fun b ->
+        let s = ref @@ get t @@ Blk.label b in
+        let+ insns = Blk.insns b |> Context.Seq.map ~f:(fun i ->
+            let+ is = rewrite_insn parts m !s i in
+            s := Analysis.transfer_op slots !s @@ Insn.op i; is)
+          >>| List.concat @. Seq.to_list in
+        Blk.with_insns b insns) >>| Seq.to_list in
+    Context.lift_err @@ Func.with_blks fn blks
+
+  let insert_new_slots fn m = Map.fold m ~init:fn
+      ~f:(fun ~key:_ ~data fn -> Func.insert_slot fn data)
+
+  let debug_show_parts parts =
+    Logs.debug (fun m ->
+        m "%s: partitions:\n%a%!"
+          __FUNCTION__
+          (Format.pp_print_list
+             ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")
+             (fun ppf (key, data) ->
+                Format.fprintf ppf "%a:\n%a"
+                  Var.pp key
+                  (Format.pp_print_list
+                     ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")
+                     (fun ppf -> Format.fprintf ppf "  %a"
+                         (Partition.pp Access.pp)))
+                  data))
+          (Map.to_alist parts))
+
+  (* XXX: allowing the analysis to propagate through block parameters
+     could possibly be done, but the current transformation isn't
+     set up to properly handle it.
+
+     The main issue arises from pointers into the slots that get
+     aliased via a block parameter. That dangling reference will
+     currently be left unchanged, leading to garbage values being
+     read from the old slot.
+  *)
+  let analyze slots fn =
+    Analysis.analyze ~blkparam:false slots fn
+
+  let run fn =
+    let open Context.Syntax in
+    let slots = Analysis.collect_slots fn in
+    if Map.is_empty slots then !!fn else
+      let cfg = Cfg.create fn in
+      let blks = Func.map_of_blks fn in
+      let t = analyze cfg blks slots in
+      let accs = collect_accesses slots fn t in
+      let parts = partition_acesses accs in
+      if Map.is_empty parts then !!fn else
+        let () = debug_show_parts parts in
+        let* m = materialize_partitions slots parts in
+        if Map.is_empty m then !!fn else
+          let fn = insert_new_slots fn m in
+          rewrite_with_partitions slots fn t parts m
+end
diff --git a/src/lib/passes/ssa/ssa_impl_rename.ml b/src/lib/passes/ssa/ssa_impl_rename.ml
index 2b58c832..35e22da7 100644
--- a/src/lib/passes/ssa/ssa_impl_rename.ml
+++ b/src/lib/passes/ssa/ssa_impl_rename.ml
@@ -15,20 +15,27 @@ end = struct
     | None -> raise_notrace @@ Missing_blk l
     | Some _ as b -> b
 
+  let fresh_version nums base =
+    let i = ref 1 in
+    Hashtbl.update nums base ~f:(function
+        | None -> !i
+        | Some n ->
+          let n = n + 1 in
+          i := n;
+          n);
+    !i
+
   let new_name stk nums x =
-    let key = Var.base x in
-    let default = 1 in
-    let n = ref default in
-    let upd x = n := x + 1; !n in
-    Hashtbl.update nums key ~f:(Option.value_map ~default ~f:upd);
-    let y = Var.with_index x !n in
-    Hashtbl.add_multi stk ~key ~data:y;
+    let base = Var.base x in
+    let idx = fresh_version nums base in
+    let y = Var.with_index x idx in
+    Hashtbl.add_multi stk ~key:base ~data:y;
     y
 
   let rename_args stk nums b =
     Blk.args b |> Seq.map ~f:(new_name stk nums) |> Seq.to_list
 
-  let map_var stk x = match Hashtbl.find stk x with
+  let map_var stk x = match Hashtbl.find stk @@ Var.base x with
     | Some [] | None -> x
     | Some (y :: _) -> y
 
diff --git a/src/lib/phi_values.ml b/src/lib/phi_values.ml
index 282bc772..7ccea306 100644
--- a/src/lib/phi_values.ml
+++ b/src/lib/phi_values.ml
@@ -3,43 +3,68 @@
 open Core
 open Regular.Std
 open Graphlib.Std
-open Virtual
+
+let table enum d ds tbl =
+  enum tbl |> Seq.map ~f:snd |>
+  Seq.map ~f:(fun (`label (l, args)) -> l, args) |>
+  Seq.to_list |> List.cons (d, ds)
+
+let locals enum =
+  let open Virtual in function
+    | `hlt -> []
+    | `jmp #global -> []
+    | `jmp `label (l, args) -> [l, args]
+    | `br (_, #global, #global) -> []
+    | `br (_, `label (y, ys), #global) -> [y, ys]
+    | `br (_, #global, `label (n, ns)) -> [n, ns]
+    | `br (_, `label (y, ys), `label (n, ns)) -> [y, ys; n, ns]
+    | `ret _ -> []
+    | `sw (_, _, `label (d, ds), tbl) -> table enum d ds tbl
 
 module type Domain = sig
   type t [@@deriving equal]
-  val one : operand -> t
+  val one : Virtual.operand -> t
   val join : t -> t -> t
 end
 
-module Make(D : Domain) = struct
+module type L = sig
+  module Ctrl : sig
+    type t
+    val locals : t -> (Label.t * Virtual.operand list) list
+  end
+  module Blk : sig
+    type t
+    val args : ?rev:bool -> t -> Var.t seq
+    val ctrl : t -> Ctrl.t
+  end
+  module Func : sig
+    type t
+  end
+  module Cfg : sig
+    include Label.Graph_s
+    val create : Func.t -> t
+  end
+end
+
+module Make(M : L)(D : Domain) = struct
+  open M
+
   type state = D.t Var.Map.t [@@deriving equal]
 
-  let local ~blk s : local -> state = function
-    | `label (l, vs) ->
-      blk l |> Option.value_map ~default:s ~f:(fun b ->
-          let args = Seq.to_list @@ Blk.args b in
-          match List.zip args vs with
-          | Unequal_lengths -> assert false
-          | Ok xs -> List.fold xs ~init:s ~f:(fun s (x, v) ->
-              Map.update s x ~f:(function
-                  | Some vs -> D.(join vs @@ one v)
-                  | None -> D.one v)))
-
-  let dst ~blk s : dst -> state = function
-    | #local as l -> local ~blk s l
-    | #global -> s
+  let local ~blk s (l, vs) : state =
+    blk l |> Option.value_map ~default:s ~f:(fun b ->
+        let args = Seq.to_list @@ Blk.args b in
+        match List.zip args vs with
+        | Unequal_lengths -> assert false
+        | Ok xs -> List.fold xs ~init:s ~f:(fun s (x, v) ->
+            Map.update s x ~f:(function
+                | Some vs -> D.(join vs @@ one v)
+                | None -> D.one v)))
 
   let transfer ~blk l s =
     blk l |> Option.value_map ~default:s ~f:(fun b ->
-        match Blk.ctrl b with
-        | `hlt | `ret _ -> s
-        | `jmp d -> dst ~blk s d
-        | `br (_, y, n) ->
-          dst ~blk (dst ~blk s y) n
-        | `sw (_, _, d, tbl) ->
-          let init = local ~blk s d in
-          Ctrl.Table.enum tbl |> Seq.fold ~init
-            ~f:(fun s (_, l') -> local ~blk s l'))
+        Blk.ctrl b |> Ctrl.locals |>
+        List.fold ~init:s ~f:(local ~blk))
 
   let merge = Map.merge_skewed ~combine:(fun ~key:_ -> D.join)
 
diff --git a/src/lib/regalloc/regalloc_irc.ml b/src/lib/regalloc/regalloc_irc.ml
index 3de447de..641fddcd 100644
--- a/src/lib/regalloc/regalloc_irc.ml
+++ b/src/lib/regalloc/regalloc_irc.ml
@@ -50,7 +50,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let label = Insn.label i in 
     let insn = Insn.insn i in
     let use = M.Insn.reads insn in
-    Set.iter use ~f:(update_cost ~loop_depth t);
+    Set.iter use ~f:(fun u ->
+        update_cost ~loop_depth t u;
+        inc_use t u);
     let def = M.Insn.writes insn in
     (* if isMoveInstruction(I) then *)
     let+ out = match M.Regalloc.is_copy insn with
@@ -82,14 +84,16 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     Set.iter def ~f:(fun d ->
         (* forall l \in live
              AddEdge(l,d) *)
+        add_def t d label;
         Set.iter out ~f:(fun o -> add_edge t o d));
     (* live := use(I) U (live\def(I)) *)
     Set.union use (Set.diff out def)
 
   (* Build the interference graph and other initial state for the
      algorithm. *)
-  let build t live =
+  let build t =
     (* forall b \in blocks in program *)
+    let live = Option.value_exn t.live in
     Func.blks t.fn |> C.Seq.iter ~f:(fun b ->
         let l = Blk.label b in
         let loop_depth = match Loop.blk t.loop l with
@@ -98,12 +102,15 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
             (* NB: levels start at 0 *)
             (Loop.(level (get t.loop lp)) :> int) + 1 in
         (* live := liveOut(b) *)
-        let out = Live.outs live l in
+        let out = ref @@ Live.outs live l in
         (* forall I \in instructions(b) in reverse order *)
-        let+ _out =
-          Blk.insns b ~rev:true |>
-          C.Seq.fold ~init:out ~f:(build_insn ~loop_depth t) in
-        ())
+        let insns = Blk.insns b ~rev:true |> Seq.to_list in
+        let ord = ref (List.length insns - 1) in
+        C.List.iter insns ~f:(fun i ->
+            Hashtbl.set t.insn_blks ~key:(Insn.label i) ~data:(l, !ord);
+            let+ out' = build_insn ~loop_depth t !out i in
+            out := out';
+            decr ord))
 
   (* Initialize the worklists. *)
   let make_worklist t =
@@ -127,6 +134,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
         node_moves t n |> Lset.iter ~f:(fun m ->
             (* if m \in activeMoves then *)
             if Lset.mem t.amoves m then begin
+              Logs.debug (fun m_ ->
+                  m_ "%s: enabling move %a for node %a%!"
+                    __FUNCTION__ Label.pp m Rv.pp n);
               (* activeMoves := activeMoves \ {m} *)
               t.amoves <- Lset.remove t.amoves m;
               (* worklistMoves := worklistMoves U {m} *)
@@ -165,7 +175,10 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
        simplifyWorklist := simplifyWorklist \ {n} *)
     let n = take_one t.wsimplify in
     (* push(n, selectStack) *)
-    if can_be_colored t n then Stack.push t.select n;
+    if can_be_colored t n then begin
+      Logs.debug (fun m -> m "%s: selecting %a%!" __FUNCTION__ Rv.pp n);
+      Stack.push t.select n;
+    end;
     (* forall m \in Adjacent(n) *)
     adjacent t n |> Set.iter ~f:(decrement_degree t)
 
@@ -185,12 +198,17 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     end
 
   let ok t a r =
-    (* t \in precolored *)
-    exclude_from_coloring t a ||
-    (* degree[t] < K *)
-    degree t a < Regs.node_k a ||
-    (* (a,r) \in adjSet *)
-    has_edge t a r
+    let res =
+      (* t \in precolored *)
+      exclude_from_coloring t a ||
+      (* degree[t] < K *)
+      degree t a < Regs.node_k a ||
+      (* (a,r) \in adjSet *)
+      has_edge t a r in
+    Logs.debug (fun m ->
+        m "%s: %a, %a: %b%!"
+          __FUNCTION__ Rv.pp a Rv.pp r res);
+    res
 
   (* forall t \in Adjacent(v), OK(t,u)  *)
   let all_adjacent_ok t u v =
@@ -203,16 +221,20 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
        if degree[n] >= k then k := k + 1
      return (k < K)
   *)
-  let conservative t nk nodes =
-    let k = Set.fold nodes ~init:0 ~f:(fun k n ->
-        if degree t n >= Regs.node_k n then k + 1 else k) in
-    k < nk
+  let conservative t nodes =
+    Set.fold nodes ~init:0 ~f:(fun k n ->
+        if degree t n >= Regs.node_k n then k + 1 else k)
 
   (* Conservative(Adjacent(u) U Adjacent(v)) *)
   let conservative_adj t u v =
     assert (Regs.same_class_node u v);
     let nodes = Set.union (adjacent t u) (adjacent t v) in
-    conservative t (Regs.node_k u) nodes
+    let nk = Regs.node_k u in
+    let k = conservative t nodes in
+    Logs.debug (fun m ->
+        m "%s: u=%a, v=%a, k=%d, nk=%d%!"
+          __FUNCTION__ Rv.pp u Rv.pp v k nk);
+    k < nk
 
   (* XXX: the algorithm in the paper does:
 
@@ -227,7 +249,11 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
      of `u`, leaving the degree of `t` unchanged.
   *)
   let combine_edge t u v =
-    if has_edge t u v then
+    let e = has_edge t u v in
+    Logs.debug (fun m ->
+        m "%s: combining edge u=%a, v=%a, has_edge=%b%!"
+          __FUNCTION__ Rv.pp u Rv.pp v e);
+    if e then
       decrement_degree t v
     else begin
       add_adjlist t u v;
@@ -238,6 +264,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
   (* Combine `v` with `u` in the interference graph, where
      `u` is the destination. *)
   let combine t u v =
+    Logs.debug (fun m ->
+        m "%s: combining u=%a with v=%a%!"
+          __FUNCTION__ Rv.pp u Rv.pp v);
     (* if v \in freezeWorklist *)
     if Hash_set.mem t.wfreeze v then
       (* freezeWorklist := freezeWorklist \ {v} *)
@@ -259,12 +288,43 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     if (* degree[u] >= K *)
       degree t u >= Regs.node_k u &&
       (* u \in freezeWorklist *)
-      Hash_set.mem t.wfreeze u then begin
+      Hash_set.mem t.wfreeze u
+    then
       (* freezeWorklist := freezeWorklist \ {u} *)
-      Hash_set.remove t.wfreeze u;
+      let () = Hash_set.remove t.wfreeze u in
       (* spillWorklist := spillWorklist U {u} *)
       add_spill t u
-    end
+
+  (* We can bypass the Briggs conservative heuristic if this
+     move is trivially coalescable.
+
+     Criteria:
+
+     1. The source node is not precolored.
+     2. This move is its only use.
+     3. All definitions dominate the use.
+     4. The source node is not live-out.
+  *)
+  let is_trivial_du t m v =
+    can_be_colored t v && num_uses t v = 1 &&
+    match Hashtbl.find t.insn_blks m with
+    | None -> false
+    | Some (bm, om) ->
+      let live = Option.value_exn t.live in
+      let out = Live.outs live bm in
+      not (Set.mem out v) &&
+      Hashtbl.find t.defs v |>
+      Option.value ~default:Lset.empty |>
+      Lset.to_sequence |> Seq.for_all ~f:(fun d ->
+          match Hashtbl.find t.insn_blks d with
+          | None -> false
+          | Some (bd, od) when Label.(bm = bd) ->
+            Logs.debug (fun m ->
+                m "%s: bm=%a, om=%d, bd=%a, od=%d%!"
+                  __FUNCTION__ Label.pp bm om Label.pp bd od);
+            od < om
+          | Some (bd, _) ->
+            Semi_nca.Tree.is_descendant_of t.dom ~parent:bd bm)
 
   (* P(u,v) = S(u,v) * (W(u,v) / (1 + D'(u) + D'(v)))
 
@@ -297,6 +357,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
       let dv = effective_degree c.src pv in
       let w = Float.of_int @@ Int.pow 10 c.loop in
       let p = w /. Float.of_int (1 + du + dv) in
+      (* If if this move is trivially coalescable, then bump the weight a
+         bit so that it can coalesce earlier. *)
+      let p = if is_trivial_du t m c.src then p *. 2.0 else p in
       (* If one of the nodes is pre-colored, then this coalesce will be
          much riskier. If both are pre-colored, then we should avoid it
          at all costs (see topmost condition). *)
@@ -308,12 +371,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     Lset.to_sequence t.wmoves |>
     Seq.map ~f:(fun m -> m, move_priority t m) |>
     Seq.max_elt ~compare:(fun (_, a) (_, b) ->  Float.compare a b) |>
-    Option.value_exn |> fst
+    Option.value_exn
 
   (* pre: wmoves is not empty *)
   let coalesce t =
     (* let m_(=copy(x,y)) \in worklistMoves *)
-    let m = pick_move t in
+    let m, score = pick_move t in
     let c = Hashtbl.find_exn t.copies m in
     (* x := GetAlias(x) *)
     let x = alias t c.dst in
@@ -324,39 +387,48 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
        else
          let (u,v) = (x,y) *)
     let u, v = if exclude_from_coloring t y then y, x else x, y in
+    Logs.debug (fun m_ ->
+        m_ "%s: looking at move %a, score=%g, u=%a, v=%a%!"
+          __FUNCTION__ Label.pp m score Rv.pp u Rv.pp v);
     (* worklistMoves := worklistMoves \ {m} *)
     t.wmoves <- Lset.remove t.wmoves m;
     (* if u = v then *)
-    if Rv.(u = v) then begin
+    if Rv.(u = v) then
       (* coalescedMoves := coalescedMoves U {m} *)
-      t.cmoves <- Lset.add t.cmoves m;
+      let () = t.cmoves <- Lset.add t.cmoves m in
+      Logs.debug (fun m -> m "%s: already coalesced%!" __FUNCTION__);
       (* AddWorkList(u) *)
       add_worklist t u
-    end else if
+    else if
       (* v \in precolored *)
       exclude_from_coloring t v ||
       (* (u,v) \in adjSet *)
-      has_edge t u v then begin
+      has_edge t u v
+    then
       (* constrainedMoves := constrainedMoves U {m} *)
-      t.kmoves <- Lset.add t.kmoves m;
+      let () = t.kmoves <- Lset.add t.kmoves m in
+      Logs.debug (fun m_ ->
+          m_ "%s: constraining %a%!" __FUNCTION__ Label.pp m);
       (* addWorkList(u) *)
       add_worklist t u;
       (* addWorkList(v) *)
       add_worklist t v
-    end else if
+    else if
       (* u \in precolored ^ (\forall t \in Adjacent(v), OK(t,u)) *)
       (exclude_from_coloring t u && all_adjacent_ok t u v) ||
       (* u \notin precolored ^ Conservative(Adjacent(u), Adjacent(v)) *)
-      (can_be_colored t u && conservative_adj t u v) then begin
+      (can_be_colored t u && (is_trivial_du t m v || conservative_adj t u v))
+    then
       (* coalescedMoves := coalescedMoves U {m} *)
-      t.cmoves <- Lset.add t.cmoves m;
+      let () = t.cmoves <- Lset.add t.cmoves m in
       (* Combine(u,v) *)
       combine t u v;
       (* AddWorkList(u) *)
       add_worklist t u
-    end else
+    else
       (* activeMoves := activeMoves U {m} *)
-      t.amoves <- Lset.add t.amoves m
+      let () = t.amoves <- Lset.add t.amoves m in
+      Logs.debug (fun m -> m "%s: adding to active moves%!" __FUNCTION__)
 
   (* pre: m \in copies
 
@@ -381,6 +453,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
               (* worklistMoves := worklistMoves \ {m} *)
               t.wmoves <- Lset.remove t.wmoves m;
             (* frozenMoves := frozenMoves U {m} *)
+            Logs.debug (fun m_ ->
+                m_ "%s: freezing move %a, v=%a"
+                  __FUNCTION__ Label.pp m Rv.pp v);
             t.fmoves <- Lset.add t.fmoves m;
             (* if NodeMoves(v) = {} ^ degree[v] < K *)
             if Lset.is_empty (node_moves t v)
@@ -396,6 +471,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     (* let u \in freezeWorklist
        freezeWorklist := freezeWorklist \ {u} *)
     let u = take_one t.wfreeze in
+    Logs.debug (fun m -> m "%s: frozen node u=%a%!" __FUNCTION__ Rv.pp u);
     (* simplifyWorklist := simplifyWorklist U {u} *)
     Hash_set.add t.wsimplify u;
     (* FreezeMoves(u) *)
@@ -403,6 +479,9 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
 
   let select_spill t =
     Pairing_heap.pop t.wspill |> Option.iter ~f:(fun m ->
+        Logs.debug (fun m_ ->
+            m_ "%s: selecting spill node %a%!"
+              __FUNCTION__ Rv.pp m);
         (* spillWorklist := spillWorklist \ {m} *)
         Hashtbl.remove t.wspill_elts m;
         (* simplifyWorklist := simplifyworklist U {m} *)
@@ -419,7 +498,7 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     Seq.filter ~f:(fun w -> Rv.is_reg w || Hash_set.mem t.colored w) |>
     (* okColors := okColors \ {color[GetAlias(w)]} *)
     Seq.filter_map ~f:(color t) |>
-    Seq.iter ~f:(fun c -> cs := Z.(!cs land ~!(one lsl c)))
+    Seq.iter ~f:(fun c -> cs := Bitset.Id.clear !cs c)
 
   let assign_colors t =
     (* while SelectStack is not empty
@@ -428,19 +507,19 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
         assert (can_be_colored t n);
         (* okColors := {0,...,K-1} *)
         let k = Regs.node_k n in
-        let cs = ref Z.(pred (one lsl k)) in
+        let cs = ref @@ Bitset.Id.init k in
         eliminate_colors t n cs;
         (* if okColors = {} then *)
-        if Z.(equal !cs zero) then
+        match Bitset.Id.min_elt !cs with
+        | None ->
           (* spilledNodes := spilledNodes U {n} *)
           t.spilled <- Set.add t.spilled n
-        else begin
+        | Some c ->
           (* coloredNodes := coloredNodes U {n} *)
           Hash_set.add t.colored n;
           (* let c \in okColors
              color[n] := c *)
-          set_color t n @@ Z.trailing_zeros !cs
-        end);
+          set_color t n c);
     (* forall n \in coalescedNodes *)
     Hash_set.iter t.coalesced ~f:(fun n ->
         (* color[n] := color[GetAlias(n)] *)
@@ -470,9 +549,15 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
             let r = Rv.var GPR v in
             match find_reusable_slot t m n size with
             | Some r' ->
+              Logs.debug (fun m ->
+                  m "%s: re-using slot %a for spilled node %a%!"
+                    __FUNCTION__ Rv.pp r' Rv.pp r);
               Hashtbl.set t.slots ~key:r ~data:r';
               acc, m
             | None ->
+              Logs.debug (fun m ->
+                  m "%s: spilling %a to new slot%!"
+                    __FUNCTION__ Rv.pp r);
               let s = Virtual.Slot.create_exn v ~size ~align:size in
               Hashtbl.set t.slots ~key:r ~data:r;
               s :: acc, Map.add_multi m ~key:size ~data:r) in
@@ -599,10 +684,14 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
 
   (* Clear the relevant state for the next round. *)
   let new_round t =
+    t.live <- None;
     Hashtbl.clear t.adjlist;
     Hashtbl.clear t.degree;
     Hashtbl.clear t.copies;
+    Hashtbl.clear t.nuse;
+    Hashtbl.clear t.defs;
     Hashtbl.clear t.moves;
+    Hashtbl.clear t.insn_blks;
     Hashtbl.clear t.spill_cost;
     (* This doesn't seem to happen in the paper, but we should discard
        the previous coloring since we introduced new spill/reload code.
@@ -617,9 +706,12 @@ module Make(M : Machine_intf.S)(C : Context_intf.S) = struct
     let* () = C.when_ (round > max_rounds) @@ fun () ->
       C.failf "In Regalloc.main: maximum rounds reached (%d) with no \
                convergence on spilling" max_rounds () in
+    Logs.debug (fun m ->
+        m "%s: $%s: round %d of %d"
+          __FUNCTION__ (Func.name t.fn) round max_rounds);
     (* Build the interference graph. *)
-    let live = Live.compute ~keep:t.keep t.fn in
-    let* () = build t live in
+    t.live <- Some (Live.compute ~keep:t.keep t.fn);
+    let* () = build t in
     make_worklist t;
     (* Process the worklists. *)
     let continue = ref true in
diff --git a/src/lib/regalloc/regalloc_irc_state.ml b/src/lib/regalloc/regalloc_irc_state.ml
index 2c9c00cb..394bbfc4 100644
--- a/src/lib/regalloc/regalloc_irc_state.ml
+++ b/src/lib/regalloc/regalloc_irc_state.ml
@@ -48,7 +48,10 @@ module Make(M : Machine_intf.S) = struct
     adjlist         : Rv.Set.t Rv.Table.t;
     degree          : int Rv.Table.t;
     moves           : Lset.t Rv.Table.t;
+    insn_blks       : (Label.t * int) Label.Table.t;
     copies          : copy Label.Table.t;
+    nuse            : int Rv.Table.t;
+    defs            : Lset.t Rv.Table.t;
     mutable wmoves  : Lset.t; (* worklist moves *)
     mutable amoves  : Lset.t; (* active moves *)
     mutable cmoves  : Lset.t; (* coalesced moves *)
@@ -72,6 +75,7 @@ module Make(M : Machine_intf.S) = struct
     loop            : Loop.t;
     spill_cost      : int Rv.Table.t;
     dom             : Label.t Semi_nca.tree;
+    mutable live    : Live.t option;
   }
 
   (* Explicit registers and variables that correspond to stack slots
@@ -123,7 +127,10 @@ module Make(M : Machine_intf.S) = struct
       adjlist = Rv.Table.create ();
       degree;
       moves = Rv.Table.create ();
+      insn_blks = Label.Table.create ();
       copies = Label.Table.create ();
+      nuse = Rv.Table.create ();
+      defs = Rv.Table.create ();
       wmoves = Lset.empty;
       amoves = Lset.empty;
       cmoves = Lset.empty;
@@ -147,11 +154,15 @@ module Make(M : Machine_intf.S) = struct
       loop;
       spill_cost;
       dom;
+      live = None;
     }
 
   let add_spill t n =
     if can_be_colored t n && not (Hashtbl.mem t.wspill_elts n) then
       let elt = Pairing_heap.add_removable t.wspill n in
+      Logs.debug (fun m ->
+          m "%s: adding %a to spill worklist%!"
+            __FUNCTION__ Rv.pp n);
       Hashtbl.set t.wspill_elts ~key:n ~data:elt
 
   let remove_spill t n = Hashtbl.change t.wspill_elts n ~f:(function
@@ -216,6 +227,19 @@ module Make(M : Machine_intf.S) = struct
   let move_related t n =
     not @@ Lset.is_empty @@ node_moves t n
 
+  let inc_use t n =
+    Hashtbl.update t.nuse n ~f:(function
+        | Some n -> n + 1
+        | None -> 1)
+
+  let num_uses t n =
+    Hashtbl.find t.nuse n |> Option.value ~default:0
+
+  let add_def t n l =
+    Hashtbl.update t.defs n ~f:(function
+        | None -> Lset.singleton l
+        | Some s -> Lset.add s l)
+
   (* if n \in coalescedNodes then
        GetAlias(alias[n])
      else n *)
@@ -228,7 +252,11 @@ module Make(M : Machine_intf.S) = struct
     | Second _ -> Hashtbl.find t.colors n
     | First r -> Regs.reg_color r
 
-  let set_color t n c = Hashtbl.set t.colors ~key:n ~data:c
+  let set_color t n c =
+    Logs.debug (fun m ->
+        m "%s: assigning color %a=%d%!"
+          __FUNCTION__ Rv.pp n c);
+    Hashtbl.set t.colors ~key:n ~data:c
 
   let add_move t label n =
     Hashtbl.update t.moves n ~f:(function
diff --git a/src/lib/scalars.ml b/src/lib/scalars.ml
new file mode 100644
index 00000000..5d5f720e
--- /dev/null
+++ b/src/lib/scalars.ml
@@ -0,0 +1,267 @@
+open Core
+open Regular.Std
+open Graphlib.Std
+
+let (@.) = Fn.compose
+let (@<) = Fn.flip
+
+module Slot = Virtual.Slot
+
+(* A scalar access. *)
+module Scalar = struct
+  module T = struct
+    type t = Var.t * int64 [@@deriving compare, equal, hash, sexp]
+  end
+  include T
+  module Map = Map.Make(T)
+  module Table = Hashtbl.Make(T)
+end
+
+type scalar = Scalar.t [@@deriving compare, equal, hash, sexp]
+type scalars = Slot.t Scalar.Map.t
+
+(* Lattice of scalar accesses.
+
+   [Top]: the access is inconsistent or escapes
+
+   [Offset (s, o)]: access to slot [s] at offset [o]
+*)
+type value =
+  | Top
+  | Offset of scalar
+[@@deriving equal, sexp]
+
+let pp_value ppf = function
+  | Top -> Format.fprintf ppf "\u{22a4}"
+  | Offset (slot, offset) ->
+    let neg = Int64.is_negative offset in
+    let pre, off = if neg then '-', Int64.neg offset else '+', offset in
+    Format.fprintf ppf "%a%c0x%Lx" Var.pp slot pre off
+
+let pp_bot ppf () = Format.fprintf ppf "\u{22a5}" [@@ocaml.warning "-32"]
+
+module Value = struct
+  type t = value [@@deriving equal, sexp]
+  let merge a b = match a, b with
+    | Offset s1, Offset s2 when equal_scalar s1 s2 -> a
+    | _ -> Top
+end
+
+type slots = Slot.t Var.Map.t
+
+module State : sig
+  type t = value Var.Map.t [@@deriving equal, sexp]
+  val empty : t
+  val merge : t -> t -> t
+  val derive : slots -> t -> Var.t -> int64 -> value option
+end = struct
+  (* NB: the keys are the LHS of a given instruction *)
+  type t = value Var.Map.t [@@deriving equal, sexp]
+
+  let empty = Var.Map.empty
+
+  let merge a b = Map.merge_skewed a b
+      ~combine:(fun ~key:_ a b -> Value.merge a b)
+
+  let is_bad slots ptr offset =
+    Int64.(offset < 0L) || match Map.find slots ptr with
+    | Some s -> Int64.(offset >= of_int (Slot.size s))
+    | None -> false
+
+  (* Normalize the scalar referred to by `ptr` and `offset`. *)
+  let derive slots s ptr offset = match Map.find s ptr with
+    | (Some Top | None) as v -> v
+    | Some Offset (ptr', offset') ->
+      let offset'' = Int64.(offset + offset') in
+      (* Out of bounds offset to a slot should be undefined. *)
+      let value =
+        if is_bad slots ptr' offset'' then Top
+        else Offset (ptr', offset'') in
+      Some value
+end
+
+type state = State.t [@@deriving equal, sexp]
+
+let pp_state ppf s =
+  let pp_sep ppf () = Format.fprintf ppf "@ " in
+  let pp_elt ppf (x, v) = Format.fprintf ppf "(%a@ %a)" Var.pp x pp_value v in
+  let pp_elts = Format.pp_print_list ~pp_sep pp_elt in
+  Format.fprintf ppf "@[<hov 0>%a@]" pp_elts @@ Map.to_alist s
+[@@ocaml.warning "-32"]
+
+type solution = (Label.t, state) Solution.t
+
+let empty_solution = Solution.create Label.Map.empty State.empty
+
+type load_or_store = Load | Store
+
+let pp_load_or_store ppf = function
+  | Load -> Format.fprintf ppf "load"
+  | Store -> Format.fprintf ppf "store"
+
+let is_store = function
+  | Load -> false
+  | Store -> true
+
+module type L = sig
+  module Insn : sig
+    type t
+    type op
+
+    val create : label:Label.t -> op -> t
+
+    (* General accessors. *)
+    val op : t -> op
+    val label : t -> Label.t
+
+    (* Used during analysis. *)
+    val lhs : op -> Var.Set.t
+    val offset : op -> scalar option
+    val copy_of : op -> Var.t option
+    val free_vars : op -> Var.Set.t
+    val escapes : op -> Var.Set.t
+
+    (* Used during replacement. *)
+    val load_or_store_to : op -> (Var.t * Type.basic * load_or_store) option
+    val replace_load_or_store_addr : Var.t -> op -> op
+    val with_op : t -> op -> t
+    val add : Var.t -> Type.imm_base -> Var.t -> int64 -> op
+  end
+
+  module Ctrl : sig
+    type t
+    val free_vars : t -> Var.Set.t
+    val escapes : t -> Var.Set.t
+    val locals : t -> (Label.t * Virtual.operand list) list
+  end
+
+  module Blk : sig
+    type t
+    val label : t -> Label.t
+    val args : ?rev:bool -> t -> Var.t seq
+    val insns : ?rev:bool -> t -> Insn.t seq
+    val ctrl : t -> Ctrl.t
+    val with_insns : t -> Insn.t list -> t
+  end
+
+  module Func : sig
+    type t
+    val slots : ?rev:bool -> t -> Slot.t seq
+    val blks : ?rev:bool -> t -> Blk.t seq
+    val map_of_blks : t -> Blk.t Label.Tree.t
+    val with_blks : t -> Blk.t list -> t Or_error.t
+    val insert_slot : t -> Slot.t -> t
+  end
+
+  module Cfg : sig
+    include Label.Graph_s
+    val create : Func.t -> t
+  end
+end
+
+type t = {
+  soln : solution;       (* Dataflow solution *)
+  esc  : Var.Hash_set.t; (* All slots that escaped, globally *)
+}
+
+let get t l = Solution.get t.soln l
+let escaped t x = Hash_set.mem t.esc x
+
+module Make(M : L) = struct
+  open M
+
+  (* Set all known scalars to `Top` according to `f`, which is the
+     set of variables that escape. *)
+  let escaping ?esc f x s =
+    Set.fold (f x) ~init:s ~f:(fun s v ->
+        match Map.find s v with
+        | Some Offset (ptr, _) ->
+          Option.iter esc ~f:(fun t ->
+              Hash_set.strict_add t ptr |>
+              Or_error.iter ~f:(fun () ->
+                  Logs.debug (fun m ->
+                      m "%s: %a escapes via %a%!"
+                        __FUNCTION__ Var.pp ptr Var.pp v)));
+          Map.set s ~key:ptr ~data:Top
+        | Some _ | None -> s)
+
+  (* Transfer function for a single instruction. *)
+  let transfer_op ?esc slots s op =
+    let value = match Insn.offset op with
+      | Some (ptr, offset) -> State.derive slots s ptr offset
+      | None -> Insn.copy_of op |> Option.bind ~f:(Map.find s) in
+    let s = match value with
+      | None -> s
+      | Some v ->
+        Insn.lhs op |> Set.fold ~init:s
+          ~f:(fun s key -> Map.set s ~key ~data:v) in
+    escaping ?esc Insn.escapes op s
+
+  let merge_blkarg acc src dst = match src with
+    | `var src when Var.(src = dst) -> acc
+    | `var src ->
+      begin match Map.find acc src with
+        | None -> acc
+        | Some v -> Map.update acc dst ~f:(function
+            | Some v' -> Value.merge v v'
+            | None -> v)
+      end
+    | _ -> acc
+
+  let blkargs blks s (l, xs) =
+    Label.Tree.find blks l |>
+    Option.value_map ~default:s ~f:(fun b ->
+        let args = Seq.to_list @@ Blk.args b in
+        List.fold2 xs args ~init:s ~f:merge_blkarg |> function
+        | Ok s -> s
+        | Unequal_lengths ->
+          Logs.warn (fun m ->
+              m "%s: unequal lengths (%d vs %d) for %a%!"
+                __FUNCTION__ (List.length xs) (List.length args) Label.pp l);
+          s)
+
+  let ctrl_esc blkparam =
+    if blkparam then Ctrl.escapes else Ctrl.free_vars
+  [@@inline]
+
+  (* Transfer for control-flow instruction. *)
+  let transfer_ctrl ?(blkparam = true) ?esc blks s c =
+    let init = escaping ?esc (ctrl_esc blkparam) c s in
+    (* Propagate the block parameters we are passing. *)
+    Ctrl.locals c |> List.fold ~init ~f:(blkargs blks)
+  [@@specialise]
+
+  (* Transfer function for a block. *)
+  let transfer ?(blkparam = true) ?esc slots blks l s =
+    Label.Tree.find blks l |>
+    Option.value_map ~default:s ~f:(fun b ->
+        Blk.insns b |> Seq.map ~f:Insn.op |>
+        Seq.fold ~init:s ~f:(transfer_op ?esc slots) |>
+        transfer_ctrl ~blkparam ?esc blks @< Blk.ctrl b)
+  [@@specialise]
+
+  (* Initial constraints. *)
+  let initialize slots _blks =
+    (* Set all slots to point to their own base address. *)
+    let init = Map.mapi slots ~f:(fun ~key ~data:_ -> Offset (key, 0L)) in
+    Label.Map.singleton Label.pseudoentry init |>
+    Solution.create @< State.empty
+  [@@specialise]
+
+  (* All slots mapped to their names. *)
+  let collect_slots fn = Func.slots fn |> Seq.fold ~init:Var.Map.empty
+      ~f:(fun acc s -> Map.set acc ~key:(Slot.var s) ~data:s)
+
+  (* Run the dataflow analysis. *)
+  let analyze ?(blkparam = true) cfg blks slots =
+    let esc = Var.Hash_set.create () in
+    let s =
+      Graphlib.fixpoint (module Cfg) cfg
+        ~init:(initialize slots blks)
+        ~start:Label.pseudoentry
+        ~equal:State.equal
+        ~merge:State.merge
+        ~f:(transfer ~blkparam ~esc slots blks) in
+    {soln = s; esc}
+  [@@specialise]
+end
diff --git a/src/lib/slot_initialization.ml b/src/lib/slot_initialization.ml
new file mode 100644
index 00000000..a8656095
--- /dev/null
+++ b/src/lib/slot_initialization.ml
@@ -0,0 +1,177 @@
+open Core
+open Regular.Std
+open Graphlib.Std
+
+module Slot = Virtual.Slot
+
+type interval = {
+  lo : int;
+  hi : int;
+} [@@deriving compare, sexp]
+
+module Interval = struct
+  type point = int [@@deriving compare, sexp]
+  type t = interval [@@deriving compare, sexp]
+
+  let lower t = t.lo
+  let upper t = t.hi
+
+  let pp ppf t = Format.fprintf ppf "[%d,%d]" t.lo t.hi
+
+  let from_access off ty =
+    let lo = Int64.to_int_exn off in
+    let sz = Type.sizeof_basic ty / 8 in
+    {lo; hi = lo + sz - 1}
+
+  let extended t = {
+    lo = t.lo - 1;
+    hi = t.hi + 1;
+  }
+end
+
+module Tree = Interval_tree.Make(Interval)
+
+(* For each slot, we have a set of intervals corresponding to
+   relative byte offsets that were initialized by a store. *)
+type state = unit Tree.t Var.Map.t
+
+let equal_state s1 s2 =
+  Map.equal (fun t1 t2 ->
+      Seq.equal (fun (i1, _) (i2, _) ->
+          Interval.compare i1 i2 = 0)
+        (Tree.to_sequence t1)
+        (Tree.to_sequence t2))
+    s1 s2
+
+let empty_state : state = Var.Map.empty
+
+(* Starting constraint has the entry block with no incoming
+   initializations. *)
+let init_constraints : state Label.Map.t =
+  Label.Map.singleton Label.pseudoentry empty_state
+
+(* Our top element, which is every slot having been initialized. *)
+let top_state slots : state =
+  Map.map slots ~f:(fun s ->
+      let i = {lo = 0; hi = Slot.size s - 1} in
+      Tree.singleton i ())
+
+(* Coalesce `i` with any overlapping or adjacent intervals in `t`. *)
+let normalize_add t i =
+  let lo, hi, t =
+    Interval.extended i |> Tree.intersections t |> Seq.map ~f:fst |>
+    Seq.fold ~init:(i.lo, i.hi, t) ~f:(fun (lo, hi, t) i ->
+        min lo i.lo, max hi i.hi, Tree.remove t i) in
+  Tree.add t {lo; hi} ()
+
+(* Intersect the intervals (and also normalize them). *)
+let merge_tree t1 t2 =
+  Tree.to_sequence t1 |> Seq.map ~f:fst |>
+  Seq.fold ~init:Tree.empty ~f:(fun init i1 ->
+      Tree.intersections t2 i1 |> Seq.map ~f:fst |>
+      Seq.fold ~init ~f:(fun acc i2 ->
+          let lo = max i1.lo i2.lo in
+          let hi = min i1.hi i2.hi in
+          if lo <= hi then normalize_add acc {lo; hi} else acc))
+
+(* Since this is a "must" forward-flow analysis, incoming
+   predecessor states must intersect. *)
+let merge_state s1 s2 =
+  Map.merge s1 s2 ~f:(fun ~key:_ -> function
+      | `Both (t1, t2) -> Some (merge_tree t1 t2)
+      | `Left _ | `Right _ -> None)
+
+type solution = (Label.t, state) Solution.t
+
+type t = {
+  soln : solution;
+  bad  : Label.Hash_set.t;
+}
+
+(* If the slot is not always initialized by the
+   time we reach the load, then we have UB. *)
+let is_uninitialized acc base off ty =
+  match Map.find acc base with
+  | None -> true
+  | Some t ->
+    let i = Interval.from_access off ty in
+    not (Tree.dominates t i)
+
+let transfer_store esc acc ptr ty (s : Scalars.state) =
+  match Map.find s ptr with
+  | Some Offset (base, off) ->
+    (* If `base` ever escaped, then don't ever consider
+       it initialized. *)
+    if Hash_set.mem esc base then
+      let () = Logs.debug (fun m ->
+          m "%s: ignoring escaped slot %a%!"
+            __FUNCTION__ Var.pp base) in
+      acc
+    else
+      let i = Interval.from_access off ty in
+      Map.update acc base ~f:(function
+          | None -> Tree.singleton i ()
+          | Some t when Tree.dominates t i -> t
+          | Some t -> normalize_add t i)
+  | _ -> acc
+
+let transfer_load bad acc l ptr ty (s : Scalars.state) =
+  match Map.find s ptr with
+  | Some Offset (base, off) ->
+    if is_uninitialized acc base off ty then
+      Hash_set.add bad l;
+    acc
+  | _ -> acc
+
+let debug_dump blks bad s =
+  Logs.debug (fun m ->
+      Label.Tree.iter blks ~f:(fun ~key ~data:_ ->
+          let s = Solution.get s key in
+          let pp_tree ppf (x, t) =
+            Tree.to_sequence t |> Seq.to_list |>
+            List.to_string ~f:(fun (i, ()) ->
+                Format.asprintf "%a" Interval.pp i) |>
+            Format.fprintf ppf "%a:%s" Var.pp x in
+          m "%s: %a: incoming must-initialize: %s%!"
+            __FUNCTION__ Label.pp key
+            (Map.to_alist s |>
+             List.to_string ~f:(Format.asprintf "%a" pp_tree))));
+  Logs.debug (fun m ->
+      Hash_set.iter bad ~f:(fun l ->
+          m "%s: load at %a is potentially uninitialized%!"
+            __FUNCTION__ Label.pp l))
+
+module Make(M : Scalars.L) = struct
+  open M
+
+  module S = Scalars.Make(M)
+
+  let transfer bad t blks slots l st =
+    match Label.Tree.find blks l with
+    | None -> st
+    | Some b ->
+      let s = ref @@ Scalars.get t l in
+      Blk.insns b |> Seq.fold ~init:st ~f:(fun acc i ->
+          let op = Insn.op i and l = Insn.label i in
+          let acc = match Insn.load_or_store_to op with
+            | Some (ptr, ty, Store) -> transfer_store t.esc acc ptr ty !s
+            | Some (ptr, ty, Load) -> transfer_load bad acc l ptr ty !s
+            | _ -> acc in
+          s := S.transfer_op slots !s op;
+          acc)
+
+  let analyze' t cfg blks slots =
+    let bad = Label.Hash_set.create () in
+    let s = Graphlib.fixpoint (module Cfg) cfg
+        ~init:(Solution.create init_constraints @@ top_state slots)
+        ~start:Label.pseudoentry
+        ~equal:equal_state
+        ~merge:merge_state
+        ~f:(transfer bad t blks slots) in
+    debug_dump blks bad s;
+    {soln = s; bad}
+
+  let analyze cfg blks slots =
+    let t = S.analyze cfg blks slots in
+    analyze' t cfg blks slots
+end
diff --git a/src/lib/subst_mapper_abi.ml b/src/lib/subst_mapper_abi.ml
new file mode 100644
index 00000000..f43f81ec
--- /dev/null
+++ b/src/lib/subst_mapper_abi.ml
@@ -0,0 +1,74 @@
+(* Same as `Subst_mapper` with as much re-use as possible. *)
+
+open Core
+open Regular.Std
+open Virtual
+open Abi
+open Subst_mapper
+
+type t = operand Var.Map.t
+
+let map_callarg subst : Insn.callarg -> Insn.callarg = function
+  | `reg (a, r) -> `reg (map_arg subst a, r)
+  | `stk (a, o) -> `stk (map_arg subst a, o)
+
+let map_op subst (op : Insn.op) =
+  let arg = map_arg subst in
+  match op with
+  | `bop (x, b, l, r) -> `bop (x, b, arg l, arg r)
+  | `uop (x, u, a) -> `uop (x, u, arg a)
+  | `sel (x, t, c, l, r) -> map_sel subst x t c l r
+  | `call (x, f, args) ->
+    let f = map_global subst f in
+    let args = List.map args ~f:(map_callarg subst) in
+    `call (x, f, args)
+  | `load (x, t, a) -> `load (x, t, arg a)
+  | `store (t, v, a) -> `store (t, arg v, arg a)
+  | `regcopy _ -> op
+  | `regstore (r, a) -> `regstore (r, arg a)
+  | `regassign (r, a) -> `regassign (r, arg a)
+  | `stkargs _ -> op
+
+let map_insn subst i =
+  Insn.with_op i @@ map_op subst @@ Insn.op i
+
+let map_tbl_entry subst i l = i, map_local subst l
+
+let map_sw subst t i d tbl =
+  let d = map_local subst d in
+  let tbl = Ctrl.Table.map_exn tbl ~f:(map_tbl_entry subst) in
+  match i with
+  | `sym _ -> `sw (t, i, d, tbl)
+  | `var x -> match Map.find subst x with
+    | Some (#Ctrl.swindex as i) -> `sw (t, i, d, tbl)
+    | Some `int (i, _) ->
+      let d = Ctrl.Table.find tbl i |> Option.value ~default:d in
+      `jmp (d :> dst)
+    | Some o -> invalid "sw" o
+    | None -> `sw (t, i, d, tbl)
+
+let map_ctrl subst : ctrl -> ctrl = function
+  | `hlt -> `hlt
+  | `jmp d -> `jmp (map_dst subst d)
+  | `br (c, y, n) -> map_br subst c y n
+  | `ret xs -> `ret (List.map xs ~f:(fun (r, a) -> r, map_arg subst a))
+  | `sw (t, i, d, tbl) -> map_sw subst t i d tbl
+
+let map_blk subst b =
+  let insns = Blk.insns b |> Seq.map ~f:(map_insn subst) in
+  Seq.to_list insns, map_ctrl subst (Blk.ctrl b)
+
+(* TODO: should we enable more than just the `jmp` case? With
+   `br` and `switch` we can have different applications of the
+   substitution for the same destination. *)
+let map_blk_args subst b l = match Blk.ctrl b with
+  | `jmp `label (l', args) when Label.(l = l') ->
+    Some (List.map args ~f:(map_arg subst))
+  | _ -> None
+
+let blk_extend subst b b' =
+  Blk.label b' |> map_blk_args subst b |> Option.bind ~f:(fun args ->
+      Blk.args b' |> Seq.to_list |> List.zip args |> function
+      | Ok l -> Option.some @@ List.fold l ~init:subst
+          ~f:(fun subst (o, x) -> Map.set subst ~key:x ~data:o)
+      | Unequal_lengths -> None)
diff --git a/src/lib/target.ml b/src/lib/target.ml
index 7cb56d66..80864f72 100644
--- a/src/lib/target.ml
+++ b/src/lib/target.ml
@@ -24,7 +24,7 @@ let declare ~name ~word ~little () =
     targets := m;
     t
 
-let find = Map.find !targets
+let find name = Map.find !targets name
 
 let name t = t.name
 let word t = t.word
diff --git a/src/lib/virtual/abi/abi.ml b/src/lib/virtual/abi/abi.ml
index 5a2a384a..d9114623 100644
--- a/src/lib/virtual/abi/abi.ml
+++ b/src/lib/virtual/abi/abi.ml
@@ -15,3 +15,4 @@ module Module = Abi_module
 module Cfg = Abi_cfg
 module Live = Abi_live
 module Resolver = Abi_resolver
+module Eval = Abi_eval
diff --git a/src/lib/virtual/abi/abi_eval.ml b/src/lib/virtual/abi/abi_eval.ml
new file mode 100644
index 00000000..5550ec36
--- /dev/null
+++ b/src/lib/virtual/abi/abi_eval.ml
@@ -0,0 +1 @@
+include Virtual_eval
diff --git a/src/lib/virtual/abi/abi_insn.ml b/src/lib/virtual/abi/abi_insn.ml
index b4bdf4b2..c2f3d18e 100644
--- a/src/lib/virtual/abi/abi_insn.ml
+++ b/src/lib/virtual/abi/abi_insn.ml
@@ -171,7 +171,7 @@ module Tag = struct
       "non-tail" (module Unit)
 end
 
-let def i = match i.op with
+let def_of_op = function
   | `bop (x, _, _, _)
   | `uop (x, _, _)
   | `sel (x, _, _, _, _)
@@ -180,3 +180,5 @@ let def i = match i.op with
   | `stkargs x -> Var.Set.singleton x
   | `store _ | `regstore _ | `regassign _ -> Var.Set.empty
   | `call (xs, _, _) -> Var.Set.of_list @@ List.map xs ~f:fst3
+
+let def i = def_of_op i.op
diff --git a/src/lib/virtual/virtual.mli b/src/lib/virtual/virtual.mli
index 23a3aee7..d2588ee7 100644
--- a/src/lib/virtual/virtual.mli
+++ b/src/lib/virtual/virtual.mli
@@ -231,53 +231,7 @@ end
 type insn = Insn.t [@@deriving bin_io, compare, equal, sexp]
 
 (** Evaluation of instructions. *)
-module Eval : sig
-  (** [binop_int o x y] evaluates a binary operator [o] over integers
-      [x] and [y], and returns the result if it is defined. *)
-  val binop_int :
-    Insn.binop ->
-    Bv.t ->
-    Bv.t ->
-    [`bool of bool | `int of Bv.t * Type.imm] option
-
-  (** [binop_single o x y] evaluates a binary operator [o] over 32-bit
-      floats [x] and [y], and returns the result if it is defined. *)
-  val binop_single :
-    Insn.binop ->
-    Float32.t ->
-    Float32.t ->
-    [`bool of bool | `float of Float32.t] option
-
-  (** [binop_double o x y] evaluates a binary operator [o] over 64-bit
-      floats [x] and [y], and returns the result if it is defined. *)
-  val binop_double :
-    Insn.binop ->
-    float ->
-    float ->
-    [`bool of bool | `double of float] option
-
-  (** [unop_int o x ty] evaluates a unary operator [o] over the integer
-      [x] with type [ty], and returns the result if it is defined. *)
-  val unop_int :
-    Insn.unop ->
-    Bv.t ->
-    Type.imm ->
-    [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option
-
-  (** [unop_single o x] evaluates a unary operator [o] over the 32-bit float
-      [x], and returns the result if it is defined. *)
-  val unop_single :
-    Insn.unop ->
-    Float32.t ->
-    [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option
-
-  (** [unop_double o x] evaluates a unary operator [o] over the 64-bit float
-      [x], and returns the result if it is defined. *)
-  val unop_double :
-    Insn.unop ->
-    float ->
-    [`double of float | `int of Bv.t * Type.imm] option
-end
+module Eval : Virtual_eval_intf.S
 
 (** Control-flow-effectful instructions. *)
 module Ctrl : Virtual_ctrl_intf.S
@@ -693,7 +647,10 @@ module Abi : sig
     (** Transforms the underlying operation. *)
     val map : t -> f:(op -> op) -> t
 
-    (** Returns the set of defined variables of the instruction. *)
+    (** Returns the set of defined variables of the underlying op. *)
+    val def_of_op : op -> Var.Set.t
+
+    (** Equivalent to [def_of_op (op i)] *)
     val def : t -> Var.Set.t
 
     (** Same as [pp_op]. *)
@@ -708,6 +665,8 @@ module Abi : sig
 
   type insn = Insn.t [@@deriving bin_io, compare, equal, sexp]
 
+  module Eval : Virtual_eval_intf.S
+
   module Ctrl : Virtual_ctrl_intf.S
     with type operand := operand
      and type local := local
diff --git a/src/lib/virtual/virtual_eval_intf.ml b/src/lib/virtual/virtual_eval_intf.ml
new file mode 100644
index 00000000..9f63915e
--- /dev/null
+++ b/src/lib/virtual/virtual_eval_intf.ml
@@ -0,0 +1,49 @@
+module Insn = Virtual_insn
+
+module type S = sig
+  (** [binop_int o x y] evaluates a binary operator [o] over integers
+      [x] and [y], and returns the result if it is defined. *)
+  val binop_int :
+    Insn.binop ->
+    Bv.t ->
+    Bv.t ->
+    [`bool of bool | `int of Bv.t * Type.imm] option
+
+  (** [binop_single o x y] evaluates a binary operator [o] over 32-bit
+      floats [x] and [y], and returns the result if it is defined. *)
+  val binop_single :
+    Insn.binop ->
+    Float32.t ->
+    Float32.t ->
+    [`bool of bool | `float of Float32.t] option
+
+  (** [binop_double o x y] evaluates a binary operator [o] over 64-bit
+      floats [x] and [y], and returns the result if it is defined. *)
+  val binop_double :
+    Insn.binop ->
+    float ->
+    float ->
+    [`bool of bool | `double of float] option
+
+  (** [unop_int o x ty] evaluates a unary operator [o] over the integer
+      [x] with type [ty], and returns the result if it is defined. *)
+  val unop_int :
+    Insn.unop ->
+    Bv.t ->
+    Type.imm ->
+    [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option
+
+  (** [unop_single o x] evaluates a unary operator [o] over the 32-bit float
+      [x], and returns the result if it is defined. *)
+  val unop_single :
+    Insn.unop ->
+    Float32.t ->
+    [`double of float | `float of Float32.t | `int of Bv.t * Type.imm] option
+
+  (** [unop_double o x] evaluates a unary operator [o] over the 64-bit float
+      [x], and returns the result if it is defined. *)
+  val unop_double :
+    Insn.unop ->
+    float ->
+    [`double of float | `int of Bv.t * Type.imm] option
+end
diff --git a/src/lib/virtual_lexer.mll b/src/lib/virtual_lexer.mll
index 6cd8ab2c..8333a289 100644
--- a/src/lib/virtual_lexer.mll
+++ b/src/lib/virtual_lexer.mll
@@ -2,8 +2,6 @@
   open Lexing
   open Virtual_parser
 
-  let string_buff = Buffer.create 256
-
   let char_for_backslash = function
     | 'n' -> '\010'
     | 'r' -> '\013'
@@ -56,7 +54,7 @@ let exp = ('E' | 'e') (integer | ninteger)
 let inf = ("INF" | "inf" | "INFINITY" | "infinity")
 let nan = ("NAN" | "NaN" | "nan")
 let flt = ('-'? digit+ '.' digit+ exp?) | ('-'? inf) | ('-'? nan)
-let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
+let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r']
 let imm = ['w' 'l' 'b' 'h']
 let imm_base = ['w' 'l']
 let fp = ['s' 'd']
@@ -65,6 +63,7 @@ let special = ['m' 'f']
 let typ = (basic | special)
 
 rule token = parse
+  | "(;" { block_comment 0 lexbuf; token lexbuf }
   | ';' { line_comment lexbuf; token lexbuf }
   | '\n' { newline lexbuf; token lexbuf }
   | space { token lexbuf }
@@ -72,13 +71,13 @@ rule token = parse
   | '-' { MINUS }
   | ':' (ident as id) { TYPENAME id }
   | ':' { COLON }
+  | '@' (integer as id) { LABEL id }
   | '$' (ident as id) { SYM id }
   | '@' (ident as id) { LABEL id }
-  | '@' (integer as id) { LABEL id }
-  | '%' (ident as id) '.' (integer as i) { VAR (id, int_of_string i) }
-  | '%' (ident as id) { IDENT id }
   | '%' (integer as id) { TEMP id }
   | '%' (integer as id) '.' (integer as i) { TEMPI (id, int_of_string i) }
+  | '%' (ident as id) '.' (integer as i) { VAR (id, int_of_string i) }
+  | '%' (ident as id) { IDENT id }
   | "module" space+ (ident as id) { MODULE id }
   | "align" { ALIGN }
   | "const" { CONST () }
@@ -92,7 +91,7 @@ rule token = parse
   | ',' { COMMA }
   | '=' { EQUALS }
   | "->" { ARROW }
-  | "..." { ELIPSIS }
+  | "..." { ELLIPSIS }
   | "sb" { SB }
   | "sh" { SH }
   | "sw" { SW }
@@ -184,9 +183,9 @@ rule token = parse
   | "section" { SECTION }
   | "noreturn" { NORETURN }
   | '"' {
-    Buffer.clear string_buff;
-    string lexbuf;
-    STRING (Buffer.contents string_buff)
+    let buf = Buffer.create 64 in
+    string buf lexbuf;
+    STRING (Buffer.contents buf)
   }
   | eof { EOF }
   | (posints as i) '_' (imm as t) {
@@ -204,17 +203,26 @@ rule token = parse
   | "false" { BOOL false }
   | _ { raise Error }
 
-and string = parse
+and string buf = parse
   | '"' { () }
   | '\\' (backslash_escapes as c) {
-    Buffer.add_char string_buff (char_for_backslash c);
-    string lexbuf
+    Buffer.add_char buf (char_for_backslash c);
+    string buf lexbuf
   }
+  | eof { raise Error }
   | _ as c {
-    Buffer.add_char string_buff c;
-    string lexbuf
+    Buffer.add_char buf c;
+    string buf lexbuf
   }
 
+and block_comment depth = parse
+  | "(;" { block_comment (depth + 1) lexbuf }
+  | ";)" { if depth > 0 then block_comment (depth - 1) lexbuf }
+  | '\n' { newline lexbuf; block_comment depth lexbuf }
+  | eof { raise Error }
+  | _ { block_comment depth lexbuf }
+
 and line_comment = parse
-  | '\n' { () }
+  | '\n' { newline lexbuf }
+  | eof { () }
   | _ { line_comment lexbuf }
diff --git a/src/lib/virtual_parser.mly b/src/lib/virtual_parser.mly
index 2e94e8ea..2a91d792 100644
--- a/src/lib/virtual_parser.mly
+++ b/src/lib/virtual_parser.mly
@@ -120,7 +120,7 @@
 %token COMMA
 %token EQUALS
 %token ARROW
-%token ELIPSIS
+%token ELLIPSIS
 %token SB SH SW W L B H S D Z
 %token <Type.basic> ADD DIV MUL SUB NEG
 %token <Type.imm> REM MULH UMULH UDIV UREM AND OR ASR LSL LSR ROL ROR XOR NOT
@@ -268,7 +268,7 @@ func:
     { make_fn slots blks args l name return true }
 
 func_args:
-  | ELIPSIS { !!([], true) }
+  | ELLIPSIS { !!([], true) }
   | t = type_arg x = var { let+ x = x in [x, t], false }
   | t = type_arg x = var COMMA rest = func_args
     {
@@ -456,7 +456,7 @@ call_args:
       let+ a = a and+ rest = rest in
       Arg a :: rest
     }
-  | a = operand COMMA ELIPSIS COMMA vargs = separated_nonempty_list(COMMA, operand)
+  | a = operand COMMA ELLIPSIS COMMA vargs = separated_nonempty_list(COMMA, operand)
     {
       let+ a = a and+ vargs = Context.List.all vargs in
       Arg a :: Core.List.map vargs ~f:(fun a -> Varg a)
diff --git a/src/test/data/opt/ackermann.driver.sysv.amd64.c b/src/test/data/opt/ackermann.driver.sysv.amd64.c
index a984836e..2edb4408 100644
--- a/src/test/data/opt/ackermann.driver.sysv.amd64.c
+++ b/src/test/data/opt/ackermann.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern int foo(int m, int n);
 
-int main() {
+int
+main() {
   assert(foo(0, 0) == 1);
   assert(foo(1, 3) == 5);
   assert(foo(2, 2) == 7);
diff --git a/src/test/data/opt/analyze_array.driver.sysv.amd64.c b/src/test/data/opt/analyze_array.driver.sysv.amd64.c
index 2548db80..7bbb43a4 100644
--- a/src/test/data/opt/analyze_array.driver.sysv.amd64.c
+++ b/src/test/data/opt/analyze_array.driver.sysv.amd64.c
@@ -3,7 +3,8 @@
 extern int analyze_array(int *arr, int n);
 extern int analyze_array_inl(int *arr, int n);
 
-int main() {
+int
+main() {
   int arr1[] = {3, 1, 4, 1, 5, 9, 2};
   assert(analyze_array(arr1, 7) == 229);
   assert(analyze_array_inl(arr1, 7) == 229);
diff --git a/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc
index 7b584c3e..9e6e62a7 100644
--- a/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/analyze_array.vir.opt.sysv.amd64.regalloc
@@ -68,11 +68,10 @@ export function $analyze_array { ; returns: rax
   mov ecx, r13d ; @75
   sub ecx, ebx ; @122
   add eax, dword ptr [rbp - 0x10] ; @77
-  add ecx, eax ; @78
-  lea eax, qword ptr [r14 + 0x1] ; @79
-  mov dword ptr [rbp - 0x10], ecx ; @186
+  add eax, ecx ; @78
+  inc r14d ; @79
+  mov dword ptr [rbp - 0x10], eax ; @186
   mov edi, r13d ; @119
-  mov r14d, eax ; @120
   jmp @13 ; @121
 }
 
diff --git a/src/test/data/opt/and_test.driver.sysv.amd64.c b/src/test/data/opt/and_test.driver.sysv.amd64.c
index 306ee73a..2bb8ea11 100644
--- a/src/test/data/opt/and_test.driver.sysv.amd64.c
+++ b/src/test/data/opt/and_test.driver.sysv.amd64.c
@@ -4,7 +4,8 @@ extern int foo(int x, int y);
 extern int bar(int x, int y);
 extern long baz(long x);
 
-int main() {
+int
+main() {
   assert(foo(3, 4) == 3);
   assert(foo(4, 4) == 4);
   assert(foo(1, 4) == 1);
diff --git a/src/test/data/opt/badload1.vir b/src/test/data/opt/badload1.vir
new file mode 100644
index 00000000..9fcc5a48
--- /dev/null
+++ b/src/test/data/opt/badload1.vir
@@ -0,0 +1,21 @@
+module badload1
+
+export function w $foo(w %x) {
+  %a = slot 4, align 4
+  %b = slot 4, align 4
+@start:
+  %c = slt.w %x, 0_w
+  br %c, @neg, @nonneg
+@neg:
+  %v = ld.w %a
+  jmp @stuff
+@nonneg:
+  st.w 5_w, %a
+  %v = ld.w %a
+  jmp @stuff
+@stuff:
+  st.w 4_w, %b
+  %w = ld.w %b
+  %r = add.w %v, %w
+  ret %r
+}
diff --git a/src/test/data/opt/badload1.vir.opt b/src/test/data/opt/badload1.vir.opt
new file mode 100644
index 00000000..c0937b1a
--- /dev/null
+++ b/src/test/data/opt/badload1.vir.opt
@@ -0,0 +1,17 @@
+module badload1
+
+export function w $foo(w %x) {
+  %a = slot 4, align 4
+@2:
+  %0 = slt.w %x, 0x0_w ; @13
+  br %0, @3, @4
+@3:
+  %v.3 = ld.w %a ; @7
+  jmp @6(%v.3)
+@4:
+  st.w 0x5_w, %a ; @8
+  jmp @6(0x5_w)
+@6(%v.1):
+  %1 = add.w %v.1, 0x4_w ; @14
+  ret %1
+}
diff --git a/src/test/data/opt/badload2.vir b/src/test/data/opt/badload2.vir
new file mode 100644
index 00000000..1308e2d0
--- /dev/null
+++ b/src/test/data/opt/badload2.vir
@@ -0,0 +1,20 @@
+module badload2
+
+export function w $foo(w %x) {
+  %a = slot 4, align 4
+  %b = slot 4, align 4
+@start:
+  %c = slt.w %x, 0_w
+  br %c, @neg, @nonneg
+@neg:
+  jmp @stuff
+@nonneg:
+  st.w 5_w, %a
+  jmp @stuff
+@stuff:
+  %v = ld.w %a
+  st.w 4_w, %b
+  %w = ld.w %b
+  %r = add.w %v, %w
+  ret %r
+}
diff --git a/src/test/data/opt/badload2.vir.opt b/src/test/data/opt/badload2.vir.opt
new file mode 100644
index 00000000..1cd43f63
--- /dev/null
+++ b/src/test/data/opt/badload2.vir.opt
@@ -0,0 +1,15 @@
+module badload2
+
+export function w $foo(w %x) {
+  %a = slot 4, align 4
+@2:
+  %0 = slt.w %x, 0x0_w ; @12
+  br %0, @6, @4
+@4:
+  st.w 0x5_w, %a ; @7
+  jmp @6
+@6:
+  %v.1 = ld.w %a ; @8
+  %1 = add.w %v.1, 0x4_w ; @13
+  ret %1
+}
diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.c b/src/test/data/opt/bsearch.driver.sysv.amd64.c
new file mode 100644
index 00000000..a4d5efa6
--- /dev/null
+++ b/src/test/data/opt/bsearch.driver.sysv.amd64.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+
+extern void *bsearch(const void *key, const void *base, size_t nel,
+                     size_t width, int (*cmp)(const void *, const void *));
+
+extern int intcmp(const void *a, const void *b);
+
+static void
+test_bsearch(const int *arr, size_t n, int key) {
+  const int *res = bsearch(&key, arr, n, sizeof(int), intcmp);
+  if (res) {
+    printf("found %d at index %ld, value %d\n", key, (long)(res - arr), *res);
+  } else {
+    printf("%d not found\n", key);
+  }
+}
+
+int
+main() {
+  int arr[] = {-10, -3, -1, 0, 1, 2, 4, 7, 10, 25, 100};
+
+  size_t n = sizeof(arr) / sizeof(arr[0]);
+
+  /* found */
+  test_bsearch(arr, n, -10);
+  test_bsearch(arr, n, -1);
+  test_bsearch(arr, n, 0);
+  test_bsearch(arr, n, 4);
+  test_bsearch(arr, n, 25);
+  test_bsearch(arr, n, 100);
+
+  /* not found */
+  test_bsearch(arr, n, -11);
+  test_bsearch(arr, n, -2);
+  test_bsearch(arr, n, 3);
+  test_bsearch(arr, n, 5);
+  test_bsearch(arr, n, 8);
+  test_bsearch(arr, n, 99);
+  test_bsearch(arr, n, 101);
+}
diff --git a/src/test/data/opt/bsearch.driver.sysv.amd64.output b/src/test/data/opt/bsearch.driver.sysv.amd64.output
new file mode 100644
index 00000000..a80e8202
--- /dev/null
+++ b/src/test/data/opt/bsearch.driver.sysv.amd64.output
@@ -0,0 +1,13 @@
+found -10 at index 0, value -10
+found -1 at index 2, value -1
+found 0 at index 3, value 0
+found 4 at index 6, value 4
+found 25 at index 9, value 25
+found 100 at index 10, value 100
+-11 not found
+-2 not found
+3 not found
+5 not found
+8 not found
+99 not found
+101 not found
diff --git a/src/test/data/opt/bsearch.vir b/src/test/data/opt/bsearch.vir
new file mode 100644
index 00000000..2ef348a8
--- /dev/null
+++ b/src/test/data/opt/bsearch.vir
@@ -0,0 +1,40 @@
+module bsearch
+
+export function w $intcmp(l %a, l %b) {
+@start:
+  %ia = ld.w %a
+  %ib = ld.w %b
+  %k = sub.w %ia, %ib
+  ret %k
+}
+
+export function l $bsearch(l %key, l %base, l %nel, l %width, l %cmp) {
+@start:
+  jmp @hdr
+@hdr:
+  %c = gt.l %nel, 0_l
+  br %c, @cmp, @null
+@cmp:
+  %nel2 = udiv.l %nel, 2_l
+  %off = mul.l %width, %nel2
+  %try = add.l %base, %off
+  %sign = call.w %cmp(%key, %try)
+  %c = slt.w %sign, 0_w
+  br %c, @less, @notless
+@less:
+  %nel = udiv.l %nel, 2_l
+  jmp @hdr
+@notless:
+  %c = sgt.w %sign, 0_w
+  br %c, @greater, @equal
+@greater:
+  %base = add.l %try, %width
+  %ne2 = udiv.l %nel, 2_l
+  %ne2 = add.l %ne2, 1_l
+  %nel = sub.l %nel, %ne2
+  jmp @hdr
+@equal:
+  ret %try
+@null:
+  ret 0_l
+}
diff --git a/src/test/data/opt/bsearch.vir.opt b/src/test/data/opt/bsearch.vir.opt
new file mode 100644
index 00000000..acb1c8f1
--- /dev/null
+++ b/src/test/data/opt/bsearch.vir.opt
@@ -0,0 +1,34 @@
+module bsearch
+
+export function w $intcmp(l %a, l %b) {
+@2:
+  %ia.1 = ld.w %a ; @3
+  %ib.1 = ld.w %b ; @4
+  %1 = sub.w %ia.1, %ib.1 ; @27
+  ret %1
+}
+
+export function l $bsearch(l %key, l %base, l %nel, l %width, l %cmp) {
+@6:
+  jmp @7(%nel, %base)
+@7(%nel.1, %base.1):
+  %2 = ne.l %nel.1, 0x0_l ; @28
+  br %2, @8, @26(0x0_l)
+@8:
+  %3 = lsr.l %nel.1, 0x1_l ; @29
+  %4 = mul.l %width, %3 ; @30
+  %5 = add.l %base.1, %4 ; @31
+  %sign.1 = call.w %cmp(%key, %5) ; @16
+  %6 = slt.w %sign.1, 0x0_w ; @32
+  br %6, @7(%3, %base.1), @12
+@12:
+  %7 = sgt.w %sign.1, 0x0_w ; @33
+  br %7, @19, @26(%5)
+@19:
+  %8 = add.l %5, %width ; @34
+  %9 = add.l %3, 0x1_l ; @35
+  %10 = sub.l %nel.1, %9 ; @36
+  jmp @7(%10, %8)
+@26(%0):
+  ret %0
+}
diff --git a/src/test/data/opt/bsearch.vir.opt.sysv b/src/test/data/opt/bsearch.vir.opt.sysv
new file mode 100644
index 00000000..1595ba27
--- /dev/null
+++ b/src/test/data/opt/bsearch.vir.opt.sysv
@@ -0,0 +1,34 @@
+module bsearch
+
+export function $intcmp(l %a/rdi, l %b/rsi) {
+@2:
+  %ia.1 = ld.w %a ; @3
+  %ib.1 = ld.w %b ; @4
+  %1 = sub.w %ia.1, %ib.1 ; @27
+  ret rax/%1
+}
+
+export function $bsearch(l %key/rdi, l %base/rsi, l %nel/rdx, l %width/rcx, l %cmp/r8) {
+@6:
+  jmp @7(%nel, %base)
+@7(%nel.1, %base.1):
+  %2 = ne.l %nel.1, 0x0_l ; @28
+  br %2, @8, @26(0x0_l)
+@8:
+  %3 = lsr.l %nel.1, 0x1_l ; @29
+  %4 = mul.l %width, %3 ; @30
+  %5 = add.l %base.1, %4 ; @31
+  %sign.1/w/rax = call %cmp(%key/rdi, %5/rsi) ; @16
+  %6 = slt.w %sign.1, 0x0_w ; @32
+  br %6, @7(%3, %base.1), @12
+@12:
+  %7 = sgt.w %sign.1, 0x0_w ; @33
+  br %7, @19, @26(%5)
+@19:
+  %8 = add.l %5, %width ; @34
+  %9 = add.l %3, 0x1_l ; @35
+  %10 = sub.l %nel.1, %9 ; @36
+  jmp @7(%10, %8)
+@26(%0):
+  ret rax/%0
+}
diff --git a/src/test/data/opt/bsearch.vir.opt.sysv.amd64 b/src/test/data/opt/bsearch.vir.opt.sysv.amd64
new file mode 100644
index 00000000..9c5c5a7d
--- /dev/null
+++ b/src/test/data/opt/bsearch.vir.opt.sysv.amd64
@@ -0,0 +1,66 @@
+module bsearch
+
+export function $intcmp { ; returns: rax
+@2:
+  mov %a:l, rdi ; @3
+  mov %b:l, rsi ; @40
+  mov %ia.1:w, dword ptr [%a] ; @41
+  mov %ib.1:w, dword ptr [%b] ; @4
+  mov %1:w, %ia.1:w ; @27
+  sub %1:w, %ib.1:w ; @39
+  mov eax, %1:w ; @37
+  ret ; @38
+}
+
+export function $bsearch { ; returns: rax
+@6:
+  mov %key:l, rdi ; @77
+  mov %base:l, rsi ; @78
+  mov %nel:l, rdx ; @79
+  mov %width:l, rcx ; @80
+  mov %cmp:l, r8 ; @81
+  mov %nel.1:l, %nel:l ; @82
+  mov %base.1:l, %base:l ; @83
+  jmp @7 ; @84
+@7:
+  test %nel.1:l, %nel.1:l ; @72
+  jne @8 ; @73
+  jmp @42 ; @74
+@42:
+  xor %0:w, %0:w ; @70
+  jmp @26 ; @71
+@8:
+  mov %3:l, %nel.1:l ; @29
+  shr %3:l, 0x1_b ; @69
+  mov %4:l, %width:l ; @30
+  imul %4:l, %3:l ; @68
+  lea %5:l, qword ptr [%base.1 + %4*1] ; @31
+  mov rdi, %key:l ; @16
+  mov rsi, %5:l ; @65
+  call %cmp:l ; rdi rsi ; @66
+  mov %sign.1:w, eax ; @67
+  test %sign.1:w, %sign.1:w ; @60
+  js @43 ; @61
+  jmp @12 ; @62
+@43:
+  mov %nel.1:l, %3:l ; @58
+  jmp @7 ; @59
+@12:
+  cmp %sign.1:w, 0x0_w ; @53
+  jg @19 ; @54
+  jmp @44 ; @55
+@44:
+  mov %0:l, %5:l ; @51
+  jmp @26 ; @52
+@26:
+  mov rax, %0:l ; @49
+  ret ; @50
+@19:
+  lea %8:l, qword ptr [%5 + %width*1] ; @34
+  lea %9:l, qword ptr [%3 + 0x1] ; @35
+  mov %10:l, %nel.1:l ; @36
+  sub %10:l, %9:l ; @48
+  mov %nel.1:l, %10:l ; @45
+  mov %base.1:l, %8:l ; @46
+  jmp @7 ; @47
+}
diff --git a/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc
new file mode 100644
index 00000000..a717a761
--- /dev/null
+++ b/src/test/data/opt/bsearch.vir.opt.sysv.amd64.regalloc
@@ -0,0 +1,65 @@
+module bsearch
+
+export function $intcmp { ; returns: rax
+@2:
+  mov eax, dword ptr [rdi] ; @41
+  sub eax, dword ptr [rsi] ; @39
+  ret ; @38
+}
+
+export function $bsearch { ; returns: rax
+@6:
+  push rbp ; @89
+  mov rbp, rsp ; @90
+  sub rsp, 0x18_l ; @91
+  push r12 ; @92
+  push r13 ; @93
+  push r14 ; @94
+  push r15 ; @95
+  push rbx ; @96
+  mov qword ptr [rbp - 0x18], rdi ; @85
+  mov r15, rsi ; @78
+  mov r13, rdx ; @79
+  mov r14, rcx ; @80
+  mov qword ptr [rbp - 0x10], r8 ; @86
+@7:
+  test r13, r13 ; @72
+  jne @8 ; @73
+@42:
+  xor eax, eax ; @70
+  jmp @26 ; @71
+@8:
+  mov rbx, r13 ; @29
+  shr rbx, 0x1_b ; @69
+  mov rax, r14 ; @30
+  imul rax, rbx ; @68
+  lea r12, qword ptr [r15 + rax*1] ; @31
+  mov rdi, qword ptr [rbp - 0x18] ; @87
+  mov rsi, r12 ; @65
+  call qword ptr [rbp - 0x10] ; rdi rsi ; @66
+  test eax, eax ; @60
+  jns @12 ; @61
+@43:
+  mov r13, rbx ; @58
+  jmp @7 ; @59
+@12:
+  cmp eax, 0x0_w ; @53
+  jg @19 ; @54
+@44:
+  mov rax, r12 ; @51
+@26:
+  pop rbx ; @97
+  pop r15 ; @98
+  pop r14 ; @99
+  pop r13 ; @100
+  pop r12 ; @101
+  leave ; @102
+  ret ; @50
+@19:
+  lea r15, qword ptr [r12 + r14*1] ; @34
+  lea rcx, qword ptr [rbx + 0x1] ; @35
+  mov rax, r13 ; @36
+  sub rax, rcx ; @48
+  mov r13, rax ; @45
+  jmp @7 ; @47
+}
diff --git a/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c b/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c
index 1532619b..8bdd5ca2 100644
--- a/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c
+++ b/src/test/data/opt/clz_ctz_8.driver.sysv.amd64.c
@@ -1,10 +1,10 @@
-#include <stdio.h>
 #include <assert.h>
 
 extern unsigned char clz8(unsigned char n);
 extern unsigned char ctz8(unsigned char n);
 
-int main() {
+int
+main() {
   assert(clz8(0b00001111) == 4);
   assert(ctz8(0b00001111) == 0);
 
diff --git a/src/test/data/opt/coalesce1.vir b/src/test/data/opt/coalesce1.vir
new file mode 100644
index 00000000..35efae18
--- /dev/null
+++ b/src/test/data/opt/coalesce1.vir
@@ -0,0 +1,36 @@
+module coalesce1
+
+;; %a can coalesce with either %b or %c, but not both
+export function w $f(w %x, w %y, w %z) {
+  %a = slot 8, align 8
+  %b = slot 8, align 8
+  %c = slot 8, align 8
+@start:
+  st.w %x, %a
+  jmp @b1
+@b1:
+  %u1 = ld.w %a
+  jmp @b2
+@b2:
+  jmp @b3
+@b3:
+  st.w %y, %b
+  jmp @b4
+@b4:
+  %u2 = ld.w %b
+  jmp @b5
+@b5:
+  st.w %z, %c
+  jmp @b6
+@b6:
+  %u3 = ld.w %c
+  jmp @b7
+@b7:
+  %u4 = ld.w %b
+  jmp @b8
+@b8:
+  %r = add.w %u1, %u2
+  %r = add.w %r, %u3
+  %r = add.w %r, %u4
+  ret %r
+}
diff --git a/src/test/data/opt/coalesce1.vir.opt b/src/test/data/opt/coalesce1.vir.opt
new file mode 100644
index 00000000..7bc292e4
--- /dev/null
+++ b/src/test/data/opt/coalesce1.vir.opt
@@ -0,0 +1,34 @@
+module coalesce1
+
+export function w $f(w %x, w %y, w %z) {
+  %b = slot 8, align 8
+  %c = slot 8, align 8
+@2:
+  st.w %x, %c ; @4
+  jmp @3
+@3:
+  %u1.1 = ld.w %c ; @6
+  jmp @5
+@5:
+  jmp @7
+@7:
+  st.w %y, %b ; @9
+  jmp @8
+@8:
+  %u2.1 = ld.w %b ; @11
+  jmp @10
+@10:
+  st.w %z, %c ; @13
+  jmp @12
+@12:
+  %u3.1 = ld.w %c ; @15
+  jmp @14
+@14:
+  %u4.1 = ld.w %b ; @17
+  jmp @16
+@16:
+  %r.1 = add.w %u1.1, %u2.1 ; @18
+  %r.2 = add.w %r.1, %u3.1 ; @19
+  %r.3 = add.w %r.2, %u4.1 ; @20
+  ret %r.3
+}
diff --git a/src/test/data/opt/coalesce1a.vir b/src/test/data/opt/coalesce1a.vir
new file mode 100644
index 00000000..f7b6ae9c
--- /dev/null
+++ b/src/test/data/opt/coalesce1a.vir
@@ -0,0 +1,36 @@
+module coalesce1a
+
+;; same as coalesce1, but let all of the opts run
+export function w $f(w %x, w %y, w %z) {
+  %a = slot 8, align 8
+  %b = slot 8, align 8
+  %c = slot 8, align 8
+@start:
+  st.w %x, %a
+  jmp @b1
+@b1:
+  %u1 = ld.w %a
+  jmp @b2
+@b2:
+  jmp @b3
+@b3:
+  st.w %y, %b
+  jmp @b4
+@b4:
+  %u2 = ld.w %b
+  jmp @b5
+@b5:
+  st.w %z, %c
+  jmp @b6
+@b6:
+  %u3 = ld.w %c
+  jmp @b7
+@b7:
+  %u4 = ld.w %b
+  jmp @b8
+@b8:
+  %r = add.w %u1, %u2
+  %r = add.w %r, %u3
+  %r = add.w %r, %u4
+  ret %r
+}
diff --git a/src/test/data/opt/coalesce1a.vir.opt b/src/test/data/opt/coalesce1a.vir.opt
new file mode 100644
index 00000000..9234fd83
--- /dev/null
+++ b/src/test/data/opt/coalesce1a.vir.opt
@@ -0,0 +1,9 @@
+module coalesce1a
+
+export function w $f(w %x, w %y, w %z) {
+@2:
+  %2 = add.w %x, %y ; @21
+  %4 = add.w %z, %y ; @23
+  %5 = add.w %2, %4 ; @24
+  ret %5
+}
diff --git a/src/test/data/opt/collatz.driver.sysv.amd64.c b/src/test/data/opt/collatz.driver.sysv.amd64.c
index 3dd398dd..3a1e3450 100644
--- a/src/test/data/opt/collatz.driver.sysv.amd64.c
+++ b/src/test/data/opt/collatz.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern int foo(int n);
 
-int main() {
+int
+main() {
   assert(foo(1) == 0);
   assert(foo(2) == 1);
   assert(foo(3) == 7);
diff --git a/src/test/data/opt/collatz_rec.driver.sysv.amd64.c b/src/test/data/opt/collatz_rec.driver.sysv.amd64.c
index 61ce7be7..ed1030e0 100644
--- a/src/test/data/opt/collatz_rec.driver.sysv.amd64.c
+++ b/src/test/data/opt/collatz_rec.driver.sysv.amd64.c
@@ -2,11 +2,13 @@
 
 extern int foo(int n, int count);
 
-int bar(int n) {
+int
+bar(int n) {
   return foo(n, 0);
 }
 
-int main() {
+int
+main() {
   assert(bar(1) == 0);
   assert(bar(2) == 1);
   assert(bar(3) == 7);
diff --git a/src/test/data/opt/cpyarray.driver.sysv.amd64.c b/src/test/data/opt/cpyarray.driver.sysv.amd64.c
index b5e44d25..49735158 100644
--- a/src/test/data/opt/cpyarray.driver.sysv.amd64.c
+++ b/src/test/data/opt/cpyarray.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern void foo(int *dst, int *src, unsigned int n);
 
-int main() {
+int
+main() {
   int dst[3];
   int src[3] = {1, 2, 3};
   foo(dst, src, 3);
diff --git a/src/test/data/opt/esc1.vir b/src/test/data/opt/esc1.vir
new file mode 100644
index 00000000..225c0a35
--- /dev/null
+++ b/src/test/data/opt/esc1.vir
@@ -0,0 +1,27 @@
+module esc1
+
+export function w $foo(w %x) {
+  %a = slot 8, align 8
+  %b = slot 8, align 8
+@start:
+  %x1 = add.w %x, 1_w
+  %x2 = add.w %x, 2_w
+  %x3 = add.w %x, 3_w
+  st.w %x, %a
+  %a4 = add.l %a, 4_l
+  st.w %x1, %a4
+  st.w %x2, %b
+  %b4 = add.l %b, 4_l
+  st.w %x3, %b4
+  %c = slt.w %x, 0_w
+  br %c, @yes, @no
+@yes:
+  %p = copy.l %b4
+  jmp @done
+@no:
+  %p = copy.l %a4
+  jmp @done
+@done:
+  %r = ld.w %p
+  ret %r
+}
diff --git a/src/test/data/opt/esc1.vir.opt b/src/test/data/opt/esc1.vir.opt
new file mode 100644
index 00000000..6d33a0e6
--- /dev/null
+++ b/src/test/data/opt/esc1.vir.opt
@@ -0,0 +1,20 @@
+module esc1
+
+export function w $foo(w %x) {
+  %a = slot 8, align 8
+  %b = slot 8, align 8
+@2:
+  %0 = add.w %x, 0x1_w ; @19
+  %1 = add.w %x, 0x2_w ; @20
+  %2 = add.w %x, 0x3_w ; @21
+  st.w %x, %a ; @8
+  %3 = add.l %a, 0x4_l ; @22
+  st.w %0, %3 ; @10
+  st.w %1, %b ; @11
+  %4 = add.l %b, 0x4_l ; @23
+  st.w %2, %4 ; @13
+  %5 = slt.w %x, 0x0_w ; @24
+  %6 = sel.l %5, %4, %3 ; @25
+  %r.1 = ld.w %6 ; @18
+  ret %r.1
+}
diff --git a/src/test/data/opt/gcdext.driver.sysv.amd64.c b/src/test/data/opt/gcdext.driver.sysv.amd64.c
index ed6172a4..1cd1b678 100644
--- a/src/test/data/opt/gcdext.driver.sysv.amd64.c
+++ b/src/test/data/opt/gcdext.driver.sysv.amd64.c
@@ -8,13 +8,15 @@ struct S {
 
 extern struct S gcd(int a, int b);
 
-void test(int a, int b, int x) {
+void
+test(int a, int b, int x) {
   struct S s = gcd(a, b);
   assert(s.gcd == x);
   assert(a * s.x + b * s.y == s.gcd);
 }
 
-int main() {
+int
+main() {
   test(12, 18, 6);
   test(30, 20, 10);
   test(101, 103, 1);
diff --git a/src/test/data/opt/gcdext.vir.opt.sysv b/src/test/data/opt/gcdext.vir.opt.sysv
index d13e608d..8a62f120 100644
--- a/src/test/data/opt/gcdext.vir.opt.sysv
+++ b/src/test/data/opt/gcdext.vir.opt.sysv
@@ -1,56 +1,38 @@
 module gcdext
 
 export function $gcd(w %a/rdi, w %b/rsi) {
-  %res = slot 16, align 8
-  %r = slot 16, align 8
-  %8 = slot 16, align 8
-  %13 = slot 16, align 8
-  %18 = slot 16, align 8
+  %32 = slot 8, align 8
+  %33 = slot 8, align 8
+  %34 = slot 8, align 8
 @2:
-  %1 = eq.w %a, 0x0_w ; @30
-  %2 = add.l %res, 0x8_l ; @31
-  %3 = add.l %res, 0x4_l ; @32
-  br %1, @3, @4
+  %1.1 = eq.w %a, 0x0_w ; @30
+  br %1.1, @3, @4
 @3:
-  st.w %b, %res ; @6
-  st.w 0x0_w, %3 ; @8
-  st.w 0x1_w, %2 ; @10
-  %19 = ld.l %res ; @49
-  st.l %19, %18 ; @50
-  %21 = ld.l %2 ; @52
-  %22 = add.l %18, 0x8_l ; @53
-  st.l %21, %22 ; @54
-  jmp @29(%18)
+  st.w %b, %33 ; @6
+  %35.1 = add.l %33, 0x4_l ; @61
+  st.w 0x0_w, %35.1 ; @8
+  st.w 0x1_w, %34 ; @10
+  %19.1 = ld.l %33 ; @49
+  %21.1 = ld.l %34 ; @52
+  jmp @29(%21.1, %19.1)
 @4:
   %m.1 = rem.w %b, %a ; @12
-  %27/l/rax, %28/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13
-  %26 = add.l %8, 0x8_l ; @58
-  st.l %27, %8 ; @59
-  st.l %28, %26 ; @60
-  %9 = ld.l %8 ; @37
-  st.l %9, %r ; @38
-  %11 = ld.l %26 ; @40
-  %12 = add.l %r, 0x8_l ; @41
-  st.l %11, %12 ; @42
-  %rg.1 = ld.w %r ; @15
-  %4 = add.l %r, 0x4_l ; @33
-  %rx.1 = ld.w %4 ; @17
-  %ry.1 = ld.w %12 ; @19
-  st.w %rg.1, %res ; @20
+  %27.1/l/rax, %28.1/l/rdx = call $gcd(%m.1/rdi, %a/rsi) ; @13
+  st.l %27.1, %33 ; @38
+  st.l %28.1, %32 ; @42
+  %rg.1 = ld.w %33 ; @15
+  %36.1 = add.l %33, 0x4_l ; @62
+  %rx.1 = ld.w %36.1 ; @17
+  %ry.1 = ld.w %32 ; @19
+  st.w %rg.1, %33 ; @20
   %nx.1 = div.w %b, %a ; @21
-  %6 = mul.w %nx.1, %rx.1 ; @35
-  %7 = sub.w %ry.1, %6 ; @36
-  st.w %7, %3 ; @25
-  st.w %rx.1, %2 ; @27
-  %14 = ld.l %res ; @43
-  st.l %14, %13 ; @44
-  %16 = ld.l %2 ; @46
-  %17 = add.l %13, 0x8_l ; @47
-  st.l %16, %17 ; @48
-  jmp @29(%13)
-@29(%0):
-  %23 = ld.l %0 ; @55
-  %24 = add.l %0, 0x8_l ; @56
-  %25 = ld.l %24 ; @57
-  ret rax/%23, rdx/%25
+  %6.1 = mul.w %nx.1, %rx.1 ; @35
+  %7.1 = sub.w %ry.1, %6.1 ; @36
+  st.w %7.1, %36.1 ; @25
+  st.w %rx.1, %34 ; @27
+  %14.1 = ld.l %33 ; @43
+  %16.1 = ld.l %34 ; @46
+  jmp @29(%16.1, %14.1)
+@29(%30.1, %29.1):
+  ret rax/%29.1, rdx/%30.1
 }
diff --git a/src/test/data/opt/int_pow.driver.sysv.amd64.c b/src/test/data/opt/int_pow.driver.sysv.amd64.c
index 764a9398..2a37a2cb 100644
--- a/src/test/data/opt/int_pow.driver.sysv.amd64.c
+++ b/src/test/data/opt/int_pow.driver.sysv.amd64.c
@@ -4,7 +4,8 @@
 extern int64_t int_pow(int64_t base, int64_t exponent);
 extern int64_t int_pow_alt(int64_t base, int64_t exponent);
 
-int main() {
+int
+main() {
   assert(int_pow(2, 10) == 1024);
   assert(int_pow(5, 3) == 125);
   assert(int_pow(7, 0) == 1);
diff --git a/src/test/data/opt/int_pow.vir.opt b/src/test/data/opt/int_pow.vir.opt
index b279f5cc..f57d0cfe 100644
--- a/src/test/data/opt/int_pow.vir.opt
+++ b/src/test/data/opt/int_pow.vir.opt
@@ -11,19 +11,23 @@ export function l $int_pow(l %base, l %exponent) {
   %2 = mul.l %1, %1 ; @57
   %3 = mul.l %2, %1 ; @58
   %4 = and.l %exponent.1, 0x3_l ; @59
-  %5 = mul.l %ret.2, %1 ; @60
-  switch.l %4, @21(%ret.2) [0x1_l -> @21(%5),
+  switch.l %4, @21(%ret.2) [0x1_l -> @14,
                             0x2_l -> @15,
                             0x3_l -> @16]
+@14:
+  %11 = mul.l %ret.2, %1 ; @66
+  jmp @21(%11)
 @15:
-  %9 = mul.l %5, %1 ; @64
-  jmp @21(%9)
+  %9 = mul.l %ret.2, %1 ; @64
+  %10 = mul.l %9, %1 ; @65
+  jmp @21(%10)
 @16:
-  %7 = mul.l %5, %2 ; @62
+  %6 = mul.l %ret.2, %1 ; @61
+  %7 = mul.l %6, %2 ; @62
   jmp @21(%7)
 @21(%ret.3):
-  %6 = asr.l %exponent.1, 0x2_l ; @61
-  jmp @3(%ret.3, %3, %1, %6)
+  %5 = asr.l %exponent.1, 0x2_l ; @60
+  jmp @3(%ret.3, %3, %1, %5)
 @10:
   ret %ret.2
 }
@@ -32,31 +36,31 @@ export function l $int_pow_alt(l %base, l %exponent) {
   %mul = slot 32, align 8
 @27:
   st.l 0x1_l, %mul ; @30
-  %10 = add.l %mul, 0x8_l ; @65
-  st.l %base, %10 ; @32
-  %11 = add.l %mul, 0x18_l ; @66
-  st.l 0x1_l, %11 ; @34
-  %12 = add.l %mul, 0x10_l ; @67
+  %12 = add.l %mul, 0x8_l ; @67
+  st.l %base, %12 ; @32
+  %13 = add.l %mul, 0x18_l ; @68
+  st.l 0x1_l, %13 ; @34
+  %14 = add.l %mul, 0x10_l ; @69
   jmp @28(0x1_l, %exponent)
 @28(%ret.2, %exponent.1):
-  %13 = ne.l %exponent.1, 0x0_l ; @68
-  br %13, @35, @36
+  %15 = ne.l %exponent.1, 0x0_l ; @70
+  br %15, @35, @36
 @35:
-  %mul1.1 = ld.l %10 ; @41
-  %mul3.1 = ld.l %11 ; @42
-  %14 = mul.l %mul1.1, %mul3.1 ; @69
-  st.l %14, %10 ; @44
-  %15 = mul.l %14, %14 ; @70
-  st.l %15, %12 ; @46
-  %16 = mul.l %15, %14 ; @71
-  st.l %16, %11 ; @48
-  %17 = and.l %exponent.1, 0x3_l ; @72
-  %18 = lsl.l %17, 0x3_l ; @73
-  %19 = add.l %mul, %18 ; @74
-  %mule.1 = ld.l %19 ; @52
-  %20 = mul.l %ret.2, %mule.1 ; @75
-  %21 = asr.l %exponent.1, 0x2_l ; @76
-  jmp @28(%20, %21)
+  %mul1.1 = ld.l %12 ; @41
+  %mul3.1 = ld.l %13 ; @42
+  %16 = mul.l %mul1.1, %mul3.1 ; @71
+  st.l %16, %12 ; @44
+  %17 = mul.l %16, %16 ; @72
+  st.l %17, %14 ; @46
+  %18 = mul.l %17, %16 ; @73
+  st.l %18, %13 ; @48
+  %19 = and.l %exponent.1, 0x3_l ; @74
+  %20 = lsl.l %19, 0x3_l ; @75
+  %21 = add.l %mul, %20 ; @76
+  %mule.1 = ld.l %21 ; @52
+  %22 = mul.l %ret.2, %mule.1 ; @77
+  %23 = asr.l %exponent.1, 0x2_l ; @78
+  jmp @28(%22, %23)
 @36:
   ret %ret.2
 }
diff --git a/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc
index d20e0a65..364833bc 100644
--- a/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/int_pow.vir.opt.sysv.amd64.regalloc
@@ -2,75 +2,73 @@ module int_pow
 
 export function $int_pow { ; returns: rax
 @2:
-  mov rcx, rdi ; @119
-  mov r8d, 0x1_w ; @121
-  mov r9d, 0x1_w ; @122
+  mov rdx, rsi ; @123
+  mov r8d, 0x1_w ; @124
+  mov esi, 0x1_w ; @125
 @3:
-  test rsi, rsi ; @114
-  jne @9 ; @115
+  test rdx, rdx ; @117
+  jne @9 ; @118
 @10:
-  mov rax, r8 ; @112
-  ret ; @113
+  mov rax, r8 ; @115
+  ret ; @116
 @9:
-  imul rcx, r9 ; @111
-  mov rdx, rcx ; @57
-  imul rdx, rcx ; @110
-  mov r9, rdx ; @58
-  imul r9, rcx ; @109
-  mov rax, rsi ; @59
-  and rax, 0x3_l ; @108
-  mov rdi, r8 ; @60
-  imul rdi, rcx ; @107
-  test rax, rax ; @96
-  je @77 ; @97
-  dec rax ; @99
-  cmp rax, 0x2_l ; @100
-  ja @77 ; @101
-  lea r10, qword ptr [rip + @95] ; @102
-  movsxd rax, dword ptr [r10 + rax*4] ; @103
-  add rax, r10 ; @104
-  jmp rax ; @105
-  .tbl @95 [@21, @15, @16] ; @106
-@77:
-  mov rdi, r8 ; @93
-  jmp @21 ; @94
+  imul rdi, rsi ; @114
+  mov rcx, rdi ; @57
+  imul rcx, rdi ; @113
+  mov rsi, rcx ; @58
+  imul rsi, rdi ; @112
+  mov rax, rdx ; @59
+  and rax, 0x3_l ; @111
+  je @21 ; @101
+  dec rax ; @103
+  cmp rax, 0x2_l ; @104
+  ja @21 ; @105
+  lea r9, qword ptr [rip + @99] ; @106
+  movsxd rax, dword ptr [r9 + rax*4] ; @107
+  add rax, r9 ; @108
+  jmp rax ; @109
+  .tbl @99 [@14, @15, @16] ; @110
 @16:
-  imul rdi, rdx ; @90
-  jmp @21 ; @89
+  imul r8, rdi ; @96
+  imul r8, rcx ; @95
+  jmp @21 ; @94
 @15:
-  imul rdi, rcx ; @87
+  imul r8, rdi ; @92
+  imul r8, rdi ; @91
+  jmp @21 ; @90
+@14:
+  imul r8, rdi ; @88
 @21:
-  sar rsi, 0x2_b ; @84
-  mov r8, rdi ; @79
-  jmp @3 ; @83
+  sar rdx, 0x2_b ; @85
+  jmp @3 ; @84
 }
 
 export function $int_pow_alt { ; returns: rax
 @27:
-  sub rsp, 0x28_l ; @148
-  mov qword ptr [rsp], 0x1_l ; @147
+  sub rsp, 0x28_l ; @151
+  mov qword ptr [rsp], 0x1_l ; @150
   mov qword ptr [rsp + 0x8], rdi ; @32
   mov qword ptr [rsp + 0x18], 0x1_l ; @34
-  mov eax, 0x1_w ; @143
+  mov eax, 0x1_w ; @146
 @28:
-  test rsi, rsi ; @138
-  jne @35 ; @139
+  test rsi, rsi ; @141
+  jne @35 ; @142
 @36:
-  add rsp, 0x28_l ; @149
-  ret ; @137
+  add rsp, 0x28_l ; @152
+  ret ; @140
 @35:
   mov rdx, qword ptr [rsp + 0x8] ; @41
-  imul rdx, qword ptr [rsp + 0x18] ; @135
+  imul rdx, qword ptr [rsp + 0x18] ; @138
   mov qword ptr [rsp + 0x8], rdx ; @44
-  mov rcx, rdx ; @70
-  imul rcx, rdx ; @134
+  mov rcx, rdx ; @72
+  imul rcx, rdx ; @137
   mov qword ptr [rsp + 0x10], rcx ; @46
-  imul rcx, rdx ; @133
+  imul rcx, rdx ; @136
   mov qword ptr [rsp + 0x18], rcx ; @48
-  mov rcx, rsi ; @72
-  and rcx, 0x3_l ; @132
+  mov rcx, rsi ; @74
+  and rcx, 0x3_l ; @135
   mov rcx, qword ptr [rsp + rcx*8] ; @52
-  imul rax, rcx ; @130
-  sar rsi, 0x2_b ; @129
-  jmp @28 ; @128
+  imul rax, rcx ; @133
+  sar rsi, 0x2_b ; @132
+  jmp @28 ; @131
 }
diff --git a/src/test/data/opt/nosink.driver.sysv.amd64.c b/src/test/data/opt/nosink.driver.sysv.amd64.c
index c272d3f2..bed0c02d 100644
--- a/src/test/data/opt/nosink.driver.sysv.amd64.c
+++ b/src/test/data/opt/nosink.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern int foo(int x);
 
-int main() {
+int
+main() {
   assert(foo(0) == 0);
   assert(foo(1) == 9);
   assert(foo(2) == 15);
diff --git a/src/test/data/opt/palindrome.driver.sysv.amd64.c b/src/test/data/opt/palindrome.driver.sysv.amd64.c
index 3e1ec13e..80cef3f2 100644
--- a/src/test/data/opt/palindrome.driver.sysv.amd64.c
+++ b/src/test/data/opt/palindrome.driver.sysv.amd64.c
@@ -3,7 +3,8 @@
 
 bool palindrome(int n);
 
-int main() {
+int
+main() {
   assert(palindrome(0));
   assert(palindrome(1));
   assert(palindrome(12321));
diff --git a/src/test/data/opt/popcnt.driver.sysv.amd64.c b/src/test/data/opt/popcnt.driver.sysv.amd64.c
index f2cc0f4f..65b3711b 100644
--- a/src/test/data/opt/popcnt.driver.sysv.amd64.c
+++ b/src/test/data/opt/popcnt.driver.sysv.amd64.c
@@ -5,7 +5,8 @@ extern unsigned short popcnt16(unsigned short n);
 extern unsigned int popcnt32(unsigned int n);
 extern unsigned long popcnt64(unsigned long n);
 
-int main() {
+int
+main() {
   assert(popcnt8(0xFF) == 8);
   assert(popcnt8(0x0F) == 4);
 
diff --git a/src/test/data/opt/prime.driver.sysv.amd64.c b/src/test/data/opt/prime.driver.sysv.amd64.c
index 4ca232e7..35abd58a 100644
--- a/src/test/data/opt/prime.driver.sysv.amd64.c
+++ b/src/test/data/opt/prime.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern int foo(unsigned int x);
 
-unsigned int nth(int n) {
+unsigned int
+nth(int n) {
   unsigned int x = 1;
   unsigned int i = 0;
   while (1) {
@@ -15,7 +16,8 @@ unsigned int nth(int n) {
   }
 }
 
-int main() {
+int
+main() {
   for (unsigned int i = 1; i <= 20; ++i) {
     printf("%d\n", nth(i));
   }
diff --git a/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc
index d5905974..f2cbb6ea 100644
--- a/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/prime_main_licm.vir.opt.sysv.amd64.regalloc
@@ -15,7 +15,7 @@ export function $main { ; returns: rax
   push rbx ; @69
   mov rdi, qword ptr [rsi + 0x8] ; @5
   call $atoi ; rdi ; @60
-  mov r14d, eax ; @56
+  mov r14d, eax ; @61
   mov r13d, 0x1_w ; @57
   mov eax, 0x1_w ; @58
 @3:
@@ -38,10 +38,8 @@ export function $main { ; returns: rax
   mov edx, ebx ; @36
   xor al, al ; @37
   call $printf ; rax rdi rdx rsi ; @38
-  lea ecx, qword ptr [r14 - 0x1] ; @25
-  lea eax, qword ptr [r13 + 0x1] ; @26
-  mov r14d, ecx ; @31
-  mov r13d, eax ; @32
+  dec r14d ; @25
+  inc r13d ; @26
   mov eax, r12d ; @33
   jmp @3 ; @34
 @9:
diff --git a/src/test/data/opt/promote2-partial.driver.sysv.amd64.c b/src/test/data/opt/promote2-partial.driver.sysv.amd64.c
index b0c7ebcd..c15d4734 100644
--- a/src/test/data/opt/promote2-partial.driver.sysv.amd64.c
+++ b/src/test/data/opt/promote2-partial.driver.sysv.amd64.c
@@ -2,12 +2,14 @@
 
 extern int foo(int a, int b);
 
-void test(int a, int b, int x) {
+void
+test(int a, int b, int x) {
   int gcd = foo(a, b);
   assert(gcd == x);
 }
 
-int main() {
+int
+main() {
   test(12, 18, 6);
   test(30, 20, 10);
   test(101, 103, 1);
diff --git a/src/test/data/opt/qsort.driver.sysv.amd64.c b/src/test/data/opt/qsort.driver.sysv.amd64.c
index b02d498e..13e3c46d 100644
--- a/src/test/data/opt/qsort.driver.sysv.amd64.c
+++ b/src/test/data/opt/qsort.driver.sysv.amd64.c
@@ -3,7 +3,8 @@
 
 extern void qsort(int *arr, long low, long high);
 
-int main() {
+int
+main() {
   int arr[] = {10, 7, 8, 9, 1, 5, 22, 59, 6, 17, 54};
   int n = sizeof(arr) / sizeof(arr[0]);
   qsort(arr, 0, n - 1);
diff --git a/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc
index aa502040..2c542c94 100644
--- a/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/qsort.vir.opt.sysv.amd64.regalloc
@@ -20,13 +20,13 @@ function $partition { ; returns: rax
   push r15 ; @132
   push rbx ; @133
   mov r12, rdi ; @49
-  mov rcx, rsi ; @100
-  lea rax, qword ptr [r12 + rdx*4] ; @50
+  mov rcx, rdx ; @101
+  lea rax, qword ptr [r12 + rcx*4] ; @50
   mov qword ptr [rbp - 0x10], rax ; @124
-  mov r14d, dword ptr [r12 + rdx*4] ; @11
-  lea rax, qword ptr [rcx - 0x1] ; @51
-  lea r15, qword ptr [rdx - 0x1] ; @52
-  mov rbx, rcx ; @97
+  mov r14d, dword ptr [r12 + rcx*4] ; @11
+  lea rax, qword ptr [rsi - 0x1] ; @51
+  lea r15, qword ptr [rcx - 0x1] ; @52
+  mov rbx, rsi ; @97
 @8:
   cmp rbx, r15 ; @92
   jle @14 ; @93
@@ -44,18 +44,17 @@ function $partition { ; returns: rax
   leave ; @139
   ret ; @88
 @14:
-  lea rcx, qword ptr [r12 + rbx*4] ; @58
-  mov edx, dword ptr [rcx] ; @22
-  cmp edx, r14d ; @81
+  lea rsi, qword ptr [r12 + rbx*4] ; @58
+  mov ecx, dword ptr [rsi] ; @22
+  cmp ecx, r14d ; @81
   jg @19 ; @82
 @18:
   lea r13, qword ptr [rax + 0x1] ; @61
   lea rdi, qword ptr [r12 + r13*4] ; @63
-  mov rsi, rcx ; @76
   call $swap ; rdi rsi ; @77
   mov rax, r13 ; @74
 @19:
-  inc rbx ; @71
+  inc rbx ; @60
   jmp @8 ; @73
 }
 
@@ -68,8 +67,8 @@ export function $qsort {
   push r14 ; @144
   push rbx ; @145
   mov r14, rdi ; @119
+  mov rbx, rsi ; @120
   mov r13, rdx ; @121
-  mov rbx, rsi ; @122
 @38:
   cmp rbx, r13 ; @114
   jl @39 ; @115
@@ -90,7 +89,6 @@ export function $qsort {
   mov rdi, r14 ; @44
   mov rsi, rbx ; @106
   call $qsort ; rdi rdx rsi ; @108
-  lea rax, qword ptr [r12 + 0x1] ; @66
-  mov rbx, rax ; @104
+  lea rbx, qword ptr [r12 + 0x1] ; @66
   jmp @38 ; @105
 }
diff --git a/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc
index b345194d..ab3f3f24 100644
--- a/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/qsort_inline_swap.vir.opt.sysv.amd64.regalloc
@@ -38,8 +38,8 @@ export function $qsort {
   push r14 ; @122
   push rbx ; @123
   mov r14, rdi ; @113
+  mov rbx, rsi ; @114
   mov r13, rdx ; @115
-  mov rbx, rsi ; @116
 @39:
   cmp rbx, r13 ; @108
   jl @40 ; @109
@@ -60,7 +60,6 @@ export function $qsort {
   mov rdi, r14 ; @45
   mov rsi, rbx ; @100
   call $qsort ; rdi rdx rsi ; @102
-  lea rax, qword ptr [r12 + 0x1] ; @67
-  mov rbx, rax ; @98
+  lea rbx, qword ptr [r12 + 0x1] ; @67
   jmp @39 ; @99
 }
diff --git a/src/test/data/opt/retmem.vir.opt.sysv b/src/test/data/opt/retmem.vir.opt.sysv
index 5514c598..096623cd 100644
--- a/src/test/data/opt/retmem.vir.opt.sysv
+++ b/src/test/data/opt/retmem.vir.opt.sysv
@@ -1,25 +1,13 @@
 module retmem
 
 export function $foo(l %3/rdi, l %a/rsi, l %b/rdx, l %c/rcx, l %d/r8) {
-  %x = slot 32, align 8
 @2:
-  st.l %a, %x ; @3
-  %0 = add.l %x, 0x8_l ; @11
-  st.l %b, %0 ; @5
-  %1 = add.l %x, 0x10_l ; @12
-  st.l %c, %1 ; @7
-  %2 = add.l %x, 0x18_l ; @13
-  st.l %d, %2 ; @9
-  %4 = ld.l %x ; @14
-  st.l %4, %3 ; @15
-  %6 = ld.l %0 ; @17
-  %7 = add.l %3, 0x8_l ; @18
-  st.l %6, %7 ; @19
-  %9 = ld.l %1 ; @21
-  %10 = add.l %3, 0x10_l ; @22
-  st.l %9, %10 ; @23
-  %12 = ld.l %2 ; @25
-  %13 = add.l %3, 0x18_l ; @26
-  st.l %12, %13 ; @27
+  st.l %a, %3 ; @15
+  %7.1 = add.l %3, 0x8_l ; @18
+  st.l %b, %7.1 ; @19
+  %10.1 = add.l %3, 0x10_l ; @22
+  st.l %c, %10.1 ; @23
+  %13.1 = add.l %3, 0x18_l ; @26
+  st.l %d, %13.1 ; @27
   ret rax/%3
 }
diff --git a/src/test/data/opt/sink1.driver.sysv.amd64.c b/src/test/data/opt/sink1.driver.sysv.amd64.c
new file mode 100644
index 00000000..a8ba4927
--- /dev/null
+++ b/src/test/data/opt/sink1.driver.sysv.amd64.c
@@ -0,0 +1,13 @@
+#include <assert.h>
+
+extern int foo(int x);
+
+int
+main() {
+  assert(foo(0) == 1);
+  assert(foo(1) == 0);
+  assert(foo(-1) == 0);
+  assert(foo(2) == 0);
+  assert(foo(1234) == 0);
+  assert(foo(-999) == 0);
+}
diff --git a/src/test/data/opt/sink1.vir b/src/test/data/opt/sink1.vir
new file mode 100644
index 00000000..4d261a19
--- /dev/null
+++ b/src/test/data/opt/sink1.vir
@@ -0,0 +1,15 @@
+module sink1
+
+export function w $foo(w %x) {
+@start:
+  %y = add.w %x, 1_w
+  %c = eq.w %x, 0_w
+  br %c, @zero, @notzero
+@zero:
+  %c = eq.w %y, 1_w
+  br %c, @one, @notzero
+@one:
+  ret 1_w
+@notzero:
+  ret 0_w
+}
diff --git a/src/test/data/opt/sink1.vir.opt b/src/test/data/opt/sink1.vir.opt
new file mode 100644
index 00000000..8d310f66
--- /dev/null
+++ b/src/test/data/opt/sink1.vir.opt
@@ -0,0 +1,14 @@
+module sink1
+
+export function w $foo(w %x) {
+@2:
+  %2 = eq.w %x, 0x0_w ; @11
+  br %2, @3, @9(0x0_w)
+@3:
+  %3 = add.w %x, 0x1_w ; @12
+  %4 = eq.w %3, 0x1_w ; @13
+  %5 = flag.w %4 ; @14
+  jmp @9(%5)
+@9(%0):
+  ret %0
+}
diff --git a/src/test/data/opt/spill1.driver.sysv.amd64.c b/src/test/data/opt/spill1.driver.sysv.amd64.c
index cb7ec92d..3bf27f70 100644
--- a/src/test/data/opt/spill1.driver.sysv.amd64.c
+++ b/src/test/data/opt/spill1.driver.sysv.amd64.c
@@ -3,7 +3,8 @@
 // NB: do not call this with 0
 extern int foo(int n);
 
-int main() {
+int
+main() {
   assert(foo(1) == 136);
   assert(foo(2) == 272);
   assert(foo(3) == 408);
diff --git a/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc
index 4e696f0a..e5a3c04c 100644
--- a/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/spill1.vir.opt.sysv.amd64.regalloc
@@ -35,48 +35,48 @@ export function $foo { ; returns: rax
   add eax, 0x3_w ; @59
   mov dword ptr [rsp + 0x40], eax ; @233
   mov eax, dword ptr [rsp + 0x30] ; @235
-  lea r8d, qword ptr [rax + 0x4] ; @60
+  lea r10d, qword ptr [rax + 0x4] ; @60
   mov eax, dword ptr [rsp + 0x2c] ; @236
-  lea esi, qword ptr [rax + 0x5] ; @61
+  lea r12d, qword ptr [rax + 0x5] ; @61
   mov eax, dword ptr [rsp + 0x28] ; @237
-  lea r13d, qword ptr [rax + 0x6] ; @62
+  lea esi, qword ptr [rax + 0x6] ; @62
   mov eax, dword ptr [rsp + 0x24] ; @238
-  lea r15d, qword ptr [rax + 0x7] ; @63
+  lea r11d, qword ptr [rax + 0x7] ; @63
   mov eax, dword ptr [rsp + 0x20] ; @239
-  lea r11d, qword ptr [rax + 0x8] ; @64
+  lea edx, qword ptr [rax + 0x8] ; @64
   mov eax, dword ptr [rsp + 0x1c] ; @240
-  lea ecx, qword ptr [rax + 0x9] ; @65
+  lea r9d, qword ptr [rax + 0x9] ; @65
   mov eax, dword ptr [rsp + 0x18] ; @241
-  lea r12d, qword ptr [rax + 0xa] ; @66
-  mov eax, dword ptr [rsp + 0x14] ; @242
-  lea r9d, qword ptr [rax + 0xb] ; @67
-  mov eax, dword ptr [rsp + 0x10] ; @243
-  lea r10d, qword ptr [rax + 0xc] ; @68
-  mov eax, dword ptr [rsp + 0xc] ; @244
-  lea ebx, qword ptr [rax + 0xd] ; @69
-  mov eax, dword ptr [rsp + 0x8] ; @245
-  lea edx, qword ptr [rax + 0xe] ; @70
-  mov eax, dword ptr [rsp + 0x4] ; @246
-  lea r14d, qword ptr [rax + 0xf] ; @71
-  mov eax, dword ptr [rsp] ; @247
-  add eax, 0x10_w ; @72
+  add eax, 0xa_w ; @66
+  mov ecx, dword ptr [rsp + 0x14] ; @242
+  lea r13d, qword ptr [rcx + 0xb] ; @67
+  mov ecx, dword ptr [rsp + 0x10] ; @243
+  add ecx, 0xc_w ; @68
+  mov r8d, dword ptr [rsp + 0xc] ; @244
+  lea ebx, qword ptr [r8 + 0xd] ; @69
+  mov r8d, dword ptr [rsp + 0x8] ; @245
+  lea r14d, qword ptr [r8 + 0xe] ; @70
+  mov r8d, dword ptr [rsp + 0x4] ; @246
+  lea r15d, qword ptr [r8 + 0xf] ; @71
+  mov r8d, dword ptr [rsp] ; @247
+  add r8d, 0x10_w ; @72
   dec edi ; @73
   test edi, edi ; @189
   je @21 ; @190
 @168:
-  mov dword ptr [rsp], eax ; @248
-  mov dword ptr [rsp + 0x4], r14d ; @249
-  mov dword ptr [rsp + 0x8], edx ; @250
+  mov dword ptr [rsp], r8d ; @248
+  mov dword ptr [rsp + 0x4], r15d ; @249
+  mov dword ptr [rsp + 0x8], r14d ; @250
   mov dword ptr [rsp + 0xc], ebx ; @251
-  mov dword ptr [rsp + 0x10], r10d ; @252
-  mov dword ptr [rsp + 0x14], r9d ; @253
-  mov dword ptr [rsp + 0x18], r12d ; @254
-  mov dword ptr [rsp + 0x1c], ecx ; @255
-  mov dword ptr [rsp + 0x20], r11d ; @256
-  mov dword ptr [rsp + 0x24], r15d ; @257
-  mov dword ptr [rsp + 0x28], r13d ; @258
-  mov dword ptr [rsp + 0x2c], esi ; @259
-  mov dword ptr [rsp + 0x30], r8d ; @260
+  mov dword ptr [rsp + 0x10], ecx ; @252
+  mov dword ptr [rsp + 0x14], r13d ; @253
+  mov dword ptr [rsp + 0x18], eax ; @254
+  mov dword ptr [rsp + 0x1c], r9d ; @255
+  mov dword ptr [rsp + 0x20], edx ; @256
+  mov dword ptr [rsp + 0x24], r11d ; @257
+  mov dword ptr [rsp + 0x28], esi ; @258
+  mov dword ptr [rsp + 0x2c], r12d ; @259
+  mov dword ptr [rsp + 0x30], r10d ; @260
   mov eax, dword ptr [rsp + 0x40] ; @261
   mov dword ptr [rsp + 0x34], eax ; @262
   mov eax, dword ptr [rsp + 0x44] ; @263
diff --git a/src/test/data/opt/spill2.driver.sysv.amd64.c b/src/test/data/opt/spill2.driver.sysv.amd64.c
index 2f8984d0..02451abf 100644
--- a/src/test/data/opt/spill2.driver.sysv.amd64.c
+++ b/src/test/data/opt/spill2.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern int foo(int *a, int *b, int *c, int *d, int n);
 
-int main() {
+int
+main() {
   int a0[] = {};
   int b0[] = {};
   int c0[] = {};
diff --git a/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc
index 66ef99ef..2eccda13 100644
--- a/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/spill2.vir.opt.sysv.amd64.regalloc
@@ -12,11 +12,11 @@ export function $foo { ; returns: rax
   mov qword ptr [rsp], rsi ; @99
   mov r15, rdx ; @92
   mov r14, rcx ; @93
-  mov r10d, r8d ; @94
+  mov r11d, r8d ; @94
   xor eax, eax ; @95
   xor r13d, r13d ; @96
 @3:
-  cmp r13d, r10d ; @85
+  cmp r13d, r11d ; @85
   jl @6 ; @86
 @7:
   add rsp, 0x10_l ; @108
@@ -37,29 +37,27 @@ export function $foo { ; returns: rax
   mov r12d, eax ; @79
   xor eax, eax ; @80
 @9:
-  cmp eax, r10d ; @74
+  cmp eax, r11d ; @74
   jl @21 ; @75
 @22:
   lea eax, qword ptr [rsi + rdi*1] ; @48
   lea ecx, qword ptr [r8 + r9*1] ; @49
   imul eax, ecx ; @73
   add eax, r12d ; @51
-  inc r13d ; @71
+  inc r13d ; @52
   jmp @3 ; @72
 @21:
-  lea edx, qword ptr [rsi + rax*1] ; @53
-  lea ecx, qword ptr [rdi + rax*1] ; @54
+  lea ecx, qword ptr [rsi + rax*1] ; @53
+  lea edx, qword ptr [rdi + rax*1] ; @54
   lea ebx, qword ptr [r8 + rax*1] ; @55
-  lea r11d, qword ptr [r9 + rax*1] ; @56
-  imul edx, ecx ; @69
-  mov ecx, ebx ; @58
-  imul ecx, r11d ; @68
-  add edx, ebx ; @59
-  add r11d, ecx ; @60
-  mov ecx, edx ; @61
-  xor ecx, r11d ; @67
-  add ecx, r12d ; @62
+  lea r10d, qword ptr [r9 + rax*1] ; @56
+  imul ecx, edx ; @69
+  mov edx, ebx ; @58
+  imul edx, r10d ; @68
+  add ecx, ebx ; @59
+  add edx, r10d ; @60
+  xor ecx, edx ; @67
+  add r12d, ecx ; @62
   inc eax ; @63
-  mov r12d, ecx ; @64
   jmp @9 ; @66
 }
diff --git a/src/test/data/opt/sroa.vir b/src/test/data/opt/sroa.vir
new file mode 100644
index 00000000..8227d9eb
--- /dev/null
+++ b/src/test/data/opt/sroa.vir
@@ -0,0 +1,22 @@
+module sroa
+
+export function w $foo(w %a, w %b, w %c, w %d) {
+  %s = slot 16, align 4
+@start:
+  %p1 = add.l %s, 0_l
+  st.w %a, %p1
+  %p2 = add.l %s, 4_l
+  st.w %b, %p2
+  %p3 = add.l %s, 8_l
+  st.w %c, %p3
+  %p4 = add.l %s, 12_l
+  st.w %d, %p4
+  %a = ld.w %p1
+  %b = ld.w %p2
+  %c = ld.w %p3
+  %d = ld.w %p4
+  %x = add.w %a, %b
+  %x = add.w %x, %c
+  %x = add.w %x, %d
+  ret %x
+}
diff --git a/src/test/data/opt/sroa.vir.opt b/src/test/data/opt/sroa.vir.opt
new file mode 100644
index 00000000..ed03491b
--- /dev/null
+++ b/src/test/data/opt/sroa.vir.opt
@@ -0,0 +1,9 @@
+module sroa
+
+export function w $foo(w %a, w %b, w %c, w %d) {
+@2:
+  %4 = add.w %a, %b ; @18
+  %6 = add.w %c, %d ; @20
+  %7 = add.w %4, %6 ; @21
+  ret %7
+}
diff --git a/src/test/data/opt/sumphi.driver.sysv.amd64.c b/src/test/data/opt/sumphi.driver.sysv.amd64.c
new file mode 100644
index 00000000..4fb55013
--- /dev/null
+++ b/src/test/data/opt/sumphi.driver.sysv.amd64.c
@@ -0,0 +1,26 @@
+#include <assert.h>
+
+struct t {
+  int a;
+  int b;
+};
+
+int sumphi(struct t a, struct t b, int x);
+
+int
+main() {
+  struct t s1 = {3, 4};
+  struct t s2 = {10, -2};
+  int res1 = sumphi(s1, s2, -1);
+  assert(res1 == 7);
+  int res2 = sumphi(s1, s2, 0);
+  assert(res2 == 8);
+
+  struct t s3 = {5, 6};
+  struct t s4 = {100, 200};
+  int res3 = sumphi(s3, s4, -5);
+  int res4 = sumphi(s3, s4, 42);
+  assert(res3 == 11);
+  assert(res4 == 300);
+  return 0;
+}
diff --git a/src/test/data/opt/sumphi.vir.opt.sysv b/src/test/data/opt/sumphi.vir.opt.sysv
index 30bdfc89..2e38fb39 100644
--- a/src/test/data/opt/sumphi.vir.opt.sysv
+++ b/src/test/data/opt/sumphi.vir.opt.sysv
@@ -3,31 +3,23 @@ module sumphi
 function $sum(l %s/rdi) {
   %p = slot 8, align 8
 @2:
-  st.l %s, %p ; @24
+  st.l %s, %p ; @22
   %a.1 = ld.w %p ; @4
-  %0.1 = add.l %p, 0x4_l ; @19
-  %b.1 = ld.w %0.1 ; @6
-  %1.1 = add.w %a.1, %b.1 ; @20
-  ret rax/%1.1
+  %0 = add.l %p, 0x4_l ; @19
+  %b.1 = ld.w %0 ; @6
+  %1 = add.w %a.1, %b.1 ; @20
+  ret rax/%1
 }
 
 export function $sumphi(l %a/rdi, l %b/rsi, w %x/rdx) {
-  %ra = slot 8, align 8
-  %9 = slot 8, align 8
-  %11 = slot 8, align 8
 @8:
-  st.l %a, %ra ; @28
   %2.1 = slt.w %x, 0x0_w ; @21
   br %2.1, @9, @10
 @9:
-  st.l %a, %11 ; @34
-  jmp @14(%11)
+  jmp @14(%a)
 @10:
-  st.l %b, %9 ; @32
-  jmp @14(%9)
-@14(%u.1):
-  st.w 0x5_w, %ra ; @17
-  %13.1 = ld.l %u.1 ; @35
-  %s.1/w/rax = call $sum(%13.1/rdi) ; @18
+  jmp @14(%b)
+@14(%rb.3):
+  %s.1/w/rax = call $sum(%rb.3/rdi) ; @18
   ret rax/%s.1
 }
diff --git a/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc
new file mode 100644
index 00000000..058c6ada
--- /dev/null
+++ b/src/test/data/opt/sumphi.vir.opt.sysv.amd64.regalloc
@@ -0,0 +1,27 @@
+module sumphi
+
+function $sum { ; returns: rax
+@2:
+  sub rsp, 0x8_l ; @55
+  mov qword ptr [rsp], rdi ; @38
+  mov eax, dword ptr [rsp + 0x4] ; @6
+  add eax, dword ptr [rsp] ; @20
+  add rsp, 0x8_l ; @56
+  ret ; @37
+}
+
+export function $sumphi { ; returns: rax
+@8:
+  push rbp ; @57
+  mov rbp, rsp ; @58
+  mov rax, rdi ; @21
+  mov rdi, rsi ; @50
+  test edx, edx ; @47
+  jns @14 ; @48
+@9:
+  mov rdi, rax ; @43
+@14:
+  call $sum ; rdi ; @41
+  leave ; @59
+  ret ; @40
+}
diff --git a/src/test/data/opt/switchcaseprop.vir b/src/test/data/opt/switchcaseprop.vir
index 6ef49f61..1c80f737 100644
--- a/src/test/data/opt/switchcaseprop.vir
+++ b/src/test/data/opt/switchcaseprop.vir
@@ -17,3 +17,22 @@ export function w $foo(w %x) {
   %x = add.w %x, 1_w
   ret %x
 }
+
+export function w $bar(w %x) {
+@start:
+  switch.w %x, @default [0x2_w -> @one,
+                         0x3_w -> @two,
+                         0x4_w -> @three,
+                         0x5_w -> @three]
+@default:
+  ret %x
+@one:
+  %x = add.w %x, 1_w
+  ret %x
+@two:
+  %x = add.w %x, 1_w
+  ret %x
+@three:
+  %x = add.w %x, 1_w
+  ret %x
+}
diff --git a/src/test/data/opt/switchcaseprop.vir.opt b/src/test/data/opt/switchcaseprop.vir.opt
index e75f970d..249a0cfb 100644
--- a/src/test/data/opt/switchcaseprop.vir.opt
+++ b/src/test/data/opt/switchcaseprop.vir.opt
@@ -2,9 +2,22 @@ module foo
 
 export function w $foo(w %x) {
 @2:
-  switch.w %x, @10(%x) [0x1_w -> @10(0x2_w),
-                        0x2_w -> @10(0x3_w),
-                        0x3_w -> @10(0x4_w)]
-@10(%0):
+  switch.w %x, @18(%x) [0x1_w -> @18(0x2_w),
+                        0x2_w -> @18(0x3_w),
+                        0x3_w -> @18(0x4_w)]
+@18(%0):
   ret %0
 }
+
+export function w $bar(w %x) {
+@10:
+  switch.w %x, @19(%x) [0x2_w -> @19(0x3_w),
+                        0x3_w -> @19(0x4_w),
+                        0x4_w -> @14,
+                        0x5_w -> @14]
+@14:
+  %2 = add.w %x, 0x1_w ; @20
+  jmp @19(%2)
+@19(%1):
+  ret %1
+}
diff --git a/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64 b/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64
index c71df64e..4311036e 100644
--- a/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64
+++ b/src/test/data/opt/switchcaseprop.vir.opt.sysv.amd64
@@ -2,31 +2,63 @@ module foo
 
 export function $foo { ; returns: rax
 @2:
-  mov %x:w, edi ; @26
-  test %x:w, %x:w ; @27
-  je @11 ; @28
-  mov %2:w, %x:w ; zx ; @29
-  dec %2:l ; @30
-  cmp %2:l, 0x2_l ; @31
-  ja @11 ; @32
-  lea %1:l, qword ptr [rip + @25] ; @33
-  movsxd %3:l, dword ptr [%1 + %2*4] ; @34
-  add %3:l, %1:l ; @35
-  jmp %3:l ; @36
-  .tbl @25 [@12, @13, @14] ; @37
-@11:
-  mov %0:w, %x:w ; @23
-  jmp @10 ; @24
-@12:
-  mov %0:w, 0x2_w ; @21
-  jmp @10 ; @22
-@13:
-  mov %0:w, 0x3_w ; @19
-  jmp @10 ; @20
-@14:
-  mov %0:w, 0x4_w ; @17
-  jmp @10 ; @18
+  mov %x:w, edi ; @36
+  test %x:w, %x:w ; @37
+  je @21 ; @38
+  mov %4:w, %x:w ; zx ; @39
+  dec %4:l ; @40
+  cmp %4:l, 0x2_l ; @41
+  ja @21 ; @42
+  lea %3:l, qword ptr [rip + @35] ; @43
+  movsxd %5:l, dword ptr [%3 + %4*4] ; @44
+  add %5:l, %3:l ; @45
+  jmp %5:l ; @46
+  .tbl @35 [@22, @23, @24] ; @47
+@21:
+  mov %0:w, %x:w ; @33
+  jmp @18 ; @34
+@22:
+  mov %0:w, 0x2_w ; @31
+  jmp @18 ; @32
+@23:
+  mov %0:w, 0x3_w ; @29
+  jmp @18 ; @30
+@24:
+  mov %0:w, 0x4_w ; @27
+  jmp @18 ; @28
+@18:
+  mov eax, %0:w ; @25
+  ret ; @26
+}
+
+export function $bar { ; returns: rax
 @10:
-  mov eax, %0:w ; @15
-  ret ; @16
+  mov %x:w, edi ; @62
+  cmp %x:w, 0x2_w ; @63
+  jb @48 ; @64
+  mov %7:w, %x:w ; zx ; @65
+  sub %7:l, 0x2_l ; @66
+  cmp %7:l, 0x3_l ; @67
+  ja @48 ; @68
+  lea %6:l, qword ptr [rip + @61] ; @69
+  movsxd %8:l, dword ptr [%6 + %7*4] ; @70
+  add %8:l, %6:l ; @71
+  jmp %8:l ; @72
+  .tbl @61 [@49, @50, @14, @14] ; @73
+@48:
+  mov %1:w, %x:w ; @59
+  jmp @19 ; @60
+@49:
+  mov %1:w, 0x3_w ; @57
+  jmp @19 ; @58
+@50:
+  mov %1:w, 0x4_w ; @55
+  jmp @19 ; @56
+@14:
+  lea %2:w, qword ptr [%x + 0x1] ; @20
+  mov %1:w, %2:w ; @53
+  jmp @19 ; @54
+@19:
+  mov eax, %1:w ; @51
+  ret ; @52
 }
diff --git a/src/test/data/opt/unref.driver.sysv.amd64.c b/src/test/data/opt/unref.driver.sysv.amd64.c
new file mode 100644
index 00000000..75604f07
--- /dev/null
+++ b/src/test/data/opt/unref.driver.sysv.amd64.c
@@ -0,0 +1,42 @@
+#include <assert.h>
+
+struct t {
+  int a;
+  int b;
+};
+
+extern int sump(struct t *);
+extern struct t mkt(int, int);
+extern int sumt(int, int);
+
+int
+main() {
+  struct t t1 = mkt(1, 2);
+  struct t t2 = mkt(3, 4);
+  struct t t3 = mkt(5, 6);
+  struct t t4 = mkt(7, 8);
+  struct t t5 = mkt(9, 10);
+
+  assert(t1.a == 1);
+  assert(t1.b == 2);
+  assert(t2.a == 3);
+  assert(t2.b == 4);
+  assert(t3.a == 5);
+  assert(t3.b == 6);
+  assert(t4.a == 7);
+  assert(t4.b == 8);
+  assert(t5.a == 9);
+  assert(t5.b == 10);
+
+  assert(sump(&t1) == 3);
+  assert(sump(&t2) == 7);
+  assert(sump(&t3) == 11);
+  assert(sump(&t4) == 15);
+  assert(sump(&t5) == 19);
+
+  assert(sumt(1, 2) == 3);
+  assert(sumt(3, 4) == 7);
+  assert(sumt(5, 6) == 11);
+  assert(sumt(7, 8) == 15);
+  assert(sumt(9, 10) == 19);
+}
diff --git a/src/test/data/opt/unref.vir.opt.sysv b/src/test/data/opt/unref.vir.opt.sysv
index f92df03c..3e6cf750 100644
--- a/src/test/data/opt/unref.vir.opt.sysv
+++ b/src/test/data/opt/unref.vir.opt.sysv
@@ -3,12 +3,12 @@ module unref
 function $sum(l %s/rdi) {
   %p = slot 8, align 8
 @2:
-  st.l %s, %p ; @24
+  st.l %s, %p ; @22
   %a.1 = ld.w %p ; @4
-  %0.1 = add.l %p, 0x4_l ; @19
-  %b.1 = ld.w %0.1 ; @6
-  %1.1 = add.w %a.1, %b.1 ; @20
-  ret rax/%1.1
+  %0 = add.l %p, 0x4_l ; @19
+  %b.1 = ld.w %0 ; @6
+  %1 = add.w %a.1, %b.1 ; @20
+  ret rax/%1
 }
 
 export function $sump(l %p/rdi) {
@@ -22,10 +22,11 @@ export function $mkt(w %a/rdi, w %b/rsi) {
   %r = slot 8, align 8
 @11:
   st.w %a, %r ; @12
-  %2.1 = add.l %r, 0x4_l ; @21
-  st.w %b, %2.1 ; @14
-  %9.1 = ld.l %r ; @28
-  ret rax/%9.1
+  %2 = add.l %r, 0x4_l ; @21
+  st.w %b, %2 ; @14
+  %9 = ld.l %r ; @28
+  st.l %9, %r ; @29
+  ret rax/%9
 }
 
 export function $sumt(w %a/rdi, w %b/rsi) {
diff --git a/src/test/data/opt/uremby7.driver.sysv.amd64.c b/src/test/data/opt/uremby7.driver.sysv.amd64.c
index 9c1e081a..a63d8329 100644
--- a/src/test/data/opt/uremby7.driver.sysv.amd64.c
+++ b/src/test/data/opt/uremby7.driver.sysv.amd64.c
@@ -2,7 +2,8 @@
 
 extern unsigned int foo(unsigned int x);
 
-int main() {
+int
+main() {
   assert(foo(0) == 0);
   assert(foo(1) == 1);
   assert(foo(2) == 2);
diff --git a/src/test/data/opt/vaarg1.driver.sysv.amd64.c b/src/test/data/opt/vaarg1.driver.sysv.amd64.c
index 41584fa9..305ffc92 100644
--- a/src/test/data/opt/vaarg1.driver.sysv.amd64.c
+++ b/src/test/data/opt/vaarg1.driver.sysv.amd64.c
@@ -7,7 +7,35 @@ struct S {
 
 extern long foo(long i, ...);
 
-int main() {
-  struct S s = {0.0, 1};
-  assert(foo(1, s) == 3);
+int
+main() {
+  struct S s1 = {0.0, 1};
+  assert(foo(1, s1) == 3);
+
+  struct S s2 = {1.0, 1};
+  assert(foo(1, s2) == 4);
+
+  struct S s3 = {2.0, 5};
+  assert(foo(10, s3) == 18);
+
+  struct S s4 = {-2.5, 100};
+  assert(foo(-4, s4) == 95);
+
+  struct S s5 = {12.75, -20};
+  assert(foo(0, s5) == -7);
+
+  struct S s6 = {0.999, 0};
+  assert(foo(1, s6) == 3);
+
+  struct S s7 = {-0.1, 7};
+  assert(foo(3, s7) == 11);
+
+  struct S s8 = {-1.234, 42};
+  assert(foo(0, s8) == 42);
+
+  struct S s9 = {1000.5, 5000};
+  assert(foo(1000, s9) == 7001);
+
+  struct S s10 = {-100.9, -50};
+  assert(foo(10, s10) == -139);
 }
diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv b/src/test/data/opt/vaarg1.vir.opt.sysv
index 8c6f33d9..46807d32 100644
--- a/src/test/data/opt/vaarg1.vir.opt.sysv
+++ b/src/test/data/opt/vaarg1.vir.opt.sysv
@@ -1,92 +1,72 @@
 module vaarg1
 
 export function $foo(b %11/rax, l %i/rdi, ...) {
-  %ap = slot 24, align 8
-  %r = slot 16, align 8
   %5 = slot 176, align 16
-  %21 = slot 16, align 8
+  %52 = slot 8, align 8
 @19:
-  %6 = add.l %5, 0x8_l ; @20
-  regstore rsi, %6 ; @21
-  %7 = add.l %5, 0x10_l ; @22
-  regstore rdx, %7 ; @23
-  %8 = add.l %5, 0x18_l ; @24
-  regstore rcx, %8 ; @25
-  %9 = add.l %5, 0x20_l ; @26
-  regstore r8, %9 ; @27
-  %10 = add.l %5, 0x28_l ; @28
-  regstore r9, %10 ; @29
-  %12 = eq.b %11, 0x0_b ; @30
-  br %12, @2, @18
+  %6.1 = add.l %5, 0x8_l ; @20
+  regstore rsi, %6.1 ; @21
+  %7.1 = add.l %5, 0x10_l ; @22
+  regstore rdx, %7.1 ; @23
+  %8.1 = add.l %5, 0x18_l ; @24
+  regstore rcx, %8.1 ; @25
+  %9.1 = add.l %5, 0x20_l ; @26
+  regstore r8, %9.1 ; @27
+  %10.1 = add.l %5, 0x28_l ; @28
+  regstore r9, %10.1 ; @29
+  %12.1 = eq.b %11, 0x0_b ; @30
+  br %12.1, @2, @18
 @18:
-  %13 = add.l %5, 0x30_l ; @31
-  regstore xmm0, %13 ; @32
-  %14 = add.l %5, 0x40_l ; @33
-  regstore xmm1, %14 ; @34
-  %15 = add.l %5, 0x50_l ; @35
-  regstore xmm2, %15 ; @36
-  %16 = add.l %5, 0x60_l ; @37
-  regstore xmm3, %16 ; @38
-  %17 = add.l %5, 0x70_l ; @39
-  regstore xmm4, %17 ; @40
-  %18 = add.l %5, 0x80_l ; @41
-  regstore xmm5, %18 ; @42
-  %19 = add.l %5, 0x90_l ; @43
-  regstore xmm6, %19 ; @44
-  %20 = add.l %5, 0xa0_l ; @45
-  regstore xmm7, %20 ; @46
+  %13.1 = add.l %5, 0x30_l ; @31
+  regstore xmm0, %13.1 ; @32
+  %14.1 = add.l %5, 0x40_l ; @33
+  regstore xmm1, %14.1 ; @34
+  %15.1 = add.l %5, 0x50_l ; @35
+  regstore xmm2, %15.1 ; @36
+  %16.1 = add.l %5, 0x60_l ; @37
+  regstore xmm3, %16.1 ; @38
+  %17.1 = add.l %5, 0x70_l ; @39
+  regstore xmm4, %17.1 ; @40
+  %18.1 = add.l %5, 0x80_l ; @41
+  regstore xmm5, %18.1 ; @42
+  %19.1 = add.l %5, 0x90_l ; @43
+  regstore xmm6, %19.1 ; @44
+  %20.1 = add.l %5, 0xa0_l ; @45
+  regstore xmm7, %20.1 ; @46
   jmp @2
 @2:
-  st.w 0x8_w, %ap ; @53
-  %26 = add.l %ap, 0x4_l ; @54
-  st.w 0x30_w, %26 ; @55
-  %27 = stkargs ; @56
-  %28 = add.l %ap, 0x8_l ; @57
-  st.l %27, %28 ; @58
-  %29 = add.l %ap, 0x10_l ; @59
-  st.l %5, %29 ; @60
+  st.w 0x8_w, %52 ; @53
+  %57.1 = add.l %52, 0x4_l ; @92
+  st.w 0x30_w, %57.1 ; @55
+  %27.1 = stkargs ; @56
   jmp @62
 @62:
-  %31 = ld.w %26 ; @68
-  %32 = le.w %31, 0xa0_w ; @69
-  br %32, @63, @65
+  jmp @63
 @63:
-  %33 = ld.w %ap ; @70
-  %34 = le.w %33, 0x28_w ; @71
-  br %34, @64, @65
+  %33.1 = ld.w %52 ; @70
+  %34.1 = le.w %33.1, 0x28_w ; @71
+  br %34.1, @64, @65
 @64:
-  %37 = zext.l %31 ; @74
-  %38 = add.l %5, %37 ; @75
-  %39 = zext.l %33 ; @76
-  %40 = add.l %5, %39 ; @77
-  %41 = add.w %31, 0x10_l ; @78
-  %42 = add.w %33, 0x8_l ; @79
-  st.w %41, %26 ; @80
-  st.w %42, %ap ; @81
-  jmp @66(%38, %40)
+  %38.1 = add.l %5, 0x30_l ; @75
+  %39.1 = zext.l %33.1 ; @76
+  %40.1 = add.l %5, %39.1 ; @77
+  %42.1 = add.w %33.1, 0x8_l ; @79
+  st.w 0x40_w, %57.1 ; @80
+  st.w %42.1, %52 ; @81
+  jmp @66(%38.1, %40.1)
 @65:
-  %44 = ld.l %28 ; @83
-  %45 = add.l %44, 0x8_l ; @84
-  %46 = add.l %44, 0x10_l ; @85
-  st.l %46, %28 ; @86
-  jmp @66(%44, %45)
-@66(%47, %48):
-  %49 = ld.l %47 ; @87
-  st.l %49, %21 ; @88
-  %50 = ld.l %48 ; @89
-  %51 = add.l %21, 0x8_l ; @90
-  st.l %50, %51 ; @91
+  %45.1 = add.l %27.1, 0x8_l ; @84
+  jmp @66(%27.1, %45.1)
+@66(%47.1, %48.1):
+  %49.1 = ld.l %47.1 ; @87
+  st.l %49.1, %52 ; @88
+  %50.1 = ld.l %48.1 ; @89
   jmp @61
 @61:
-  %22 = ld.l %21 ; @47
-  st.l %22, %r ; @48
-  %24 = ld.l %51 ; @50
-  %25 = add.l %r, 0x8_l ; @51
-  st.l %24, %25 ; @52
-  %f1.1 = ld.d %r ; @6
-  %0 = add.d %f1.1, 1.234_d ; @13
-  %2 = ftosi.d.l %0 ; @15
-  %3 = add.l %24, %2 ; @16
-  %4 = add.l %3, %i ; @17
-  ret rax/%4
+  %f1.1 = ld.d %52 ; @6
+  %0.1 = add.d %f1.1, 1.234_d ; @13
+  %2.1 = ftosi.d.l %0.1 ; @15
+  %3.1 = add.l %50.1, %2.1 ; @16
+  %4.1 = add.l %3.1, %i ; @17
+  ret rax/%4.1
 }
diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64 b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64
index 6320d075..6c098966 100644
--- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64
+++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64
@@ -1,21 +1,19 @@
 module vaarg1
 
 export function $foo { ; returns: rax
-  %ap = slot 24, align 8
-  %r = slot 16, align 8
   %5 = slot 176, align 16
-  %21 = slot 16, align 8
+  %52 = slot 8, align 8
 @19:
   mov %11:b, al ; @20
-  mov %i:l, rdi ; @121
+  mov %i:l, rdi ; @120
   mov qword ptr [%5 + 0x8], rsi ; @21
   mov qword ptr [%5 + 0x10], rdx ; @23
   mov qword ptr [%5 + 0x18], rcx ; @25
   mov qword ptr [%5 + 0x20], r8 ; @27
   mov qword ptr [%5 + 0x28], r9 ; @29
-  test %11:b, %11:b ; @116
-  je @2 ; @117
-  jmp @18 ; @118
+  test %11:b, %11:b ; @115
+  je @2 ; @116
+  jmp @18 ; @117
 @18:
   movdqa xmmword ptr [%5 + 0x30], xmm0 ; @32
   movdqa xmmword ptr [%5 + 0x40], xmm1 ; @34
@@ -25,62 +23,47 @@ export function $foo { ; returns: rax
   movdqa xmmword ptr [%5 + 0x80], xmm5 ; @42
   movdqa xmmword ptr [%5 + 0x90], xmm6 ; @44
   movdqa xmmword ptr [%5 + 0xa0], xmm7 ; @46
-  jmp @2 ; @115
+  jmp @2 ; @114
 @2:
-  mov dword ptr [%ap], 0x8_w ; @53
-  mov dword ptr [%ap + 0x4], 0x30_w ; @55
-  lea %27:l, qword ptr [rbp + 0x10] ; @56
-  mov qword ptr [%ap + 0x8], %27:l ; @58
-  mov qword ptr [%ap + 0x10], %5:l ; @60
-  jmp @62 ; @114
+  mov dword ptr [%52], 0x8_w ; @53
+  mov dword ptr [%52 + 0x4], 0x30_w ; @55
+  lea %27.1:l, qword ptr [rbp + 0x10] ; @56
+  jmp @62 ; @113
 @62:
-  mov %31:w, dword ptr [%ap + 0x4] ; @68
-  cmp %31:w, 0xa0_w ; @109
-  jbe @63 ; @110
-  jmp @65 ; @111
+  jmp @63 ; @112
 @63:
-  mov %33:w, dword ptr [%ap] ; @70
-  cmp %33:w, 0x28_w ; @104
-  jbe @64 ; @105
-  jmp @65 ; @106
+  mov %33.1:w, dword ptr [%52] ; @70
+  cmp %33.1:w, 0x28_w ; @107
+  jbe @64 ; @108
+  jmp @65 ; @109
 @65:
-  mov %44:l, qword ptr [%ap + 0x8] ; @83
-  lea %45:l, qword ptr [%44 + 0x8] ; @84
-  lea %46:l, qword ptr [%44 + 0x10] ; @85
-  mov qword ptr [%ap + 0x8], %46:l ; @86
-  mov %47:l, %44:l ; @101
-  mov %48:l, %45:l ; @102
-  jmp @66 ; @103
+  lea %45.1:l, qword ptr [%27.1 + 0x8] ; @84
+  mov %47.1:l, %27.1:l ; @104
+  mov %48.1:l, %45.1:l ; @105
+  jmp @66 ; @106
 @64:
-  mov %37:w, %31:w ; @74
-  lea %38:l, qword ptr [%5 + %37*1] ; @75
-  mov %39:w, %33:w ; @76
-  lea %40:l, qword ptr [%5 + %39*1] ; @77
-  lea %41:w, qword ptr [%31 + 0x10] ; @78
-  lea %42:w, qword ptr [%33 + 0x8] ; @79
-  mov dword ptr [%ap + 0x4], %41:w ; @80
-  mov dword ptr [%ap], %42:w ; @81
-  mov %47:l, %38:l ; @98
-  mov %48:l, %40:l ; @99
-  jmp @66 ; @100
+  lea %38.1:l, qword ptr [%5 + 0x30] ; @75
+  mov %39.1:w, %33.1:w ; @76
+  lea %40.1:l, qword ptr [%5 + %39.1*1] ; @77
+  lea %42.1:w, qword ptr [%33.1 + 0x8] ; @79
+  mov dword ptr [%52 + 0x4], 0x40_w ; @80
+  mov dword ptr [%52], %42.1:w ; @81
+  mov %47.1:l, %38.1:l ; @101
+  mov %48.1:l, %40.1:l ; @102
+  jmp @66 ; @103
 @66:
-  mov %49:l, qword ptr [%47] ; @87
-  mov qword ptr [%21], %49:l ; @88
-  mov %50:l, qword ptr [%48] ; @89
-  mov qword ptr [%21 + 0x8], %50:l ; @91
-  jmp @61 ; @97
+  mov %49.1:l, qword ptr [%47.1] ; @87
+  mov qword ptr [%52], %49.1:l ; @88
+  mov %50.1:l, qword ptr [%48.1] ; @89
+  jmp @61 ; @100
 @61:
-  mov %22:l, qword ptr [%21] ; @47
-  mov qword ptr [%r], %22:l ; @48
-  mov %24:l, qword ptr [%21 + 0x8] ; @50
-  mov qword ptr [%r + 0x8], %24:l ; @52
-  movsd %f1.1:d, qword ptr [%r] ; @6
-  movsd %0:d, %f1.1:d ; @13
-  addsd %0:d, qword ptr [rip + @94] ; @95
-  .fp64 @94, 1.234 ; @96
-  cvtsd2si %2:l, %0:d ; @15
-  lea %3:l, qword ptr [%24 + %2*1] ; @16
-  lea %4:l, qword ptr [%3 + %i*1] ; @17
-  mov rax, %4:l ; @92
-  ret ; @93
+  movsd %f1.1:d, qword ptr [%52] ; @6
+  movsd %0.1:d, %f1.1:d ; @13
+  addsd %0.1:d, qword ptr [rip + @97] ; @98
+  .fp64 @97, 1.234 ; @99
+  cvttsd2si %2.1:l, %0.1:d ; @15
+  lea %3.1:l, qword ptr [%50.1 + %2.1*1] ; @16
+  lea %4.1:l, qword ptr [%3.1 + %i*1] ; @17
+  mov rax, %4.1:l ; @95
+  ret ; @96
 }
diff --git a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc
index 0815c0cb..eeafde73 100644
--- a/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc
+++ b/src/test/data/opt/vaarg1.vir.opt.sysv.amd64.regalloc
@@ -2,69 +2,51 @@ module vaarg1
 
 export function $foo { ; returns: rax
 @19:
-  push rbp ; @124
-  mov rbp, rsp ; @125
-  sub rsp, 0xf0_l ; @126
-  mov qword ptr [rbp - 0xe8], rsi ; @21
-  mov qword ptr [rbp - 0xe0], rdx ; @23
-  mov qword ptr [rbp - 0xd8], rcx ; @25
-  mov qword ptr [rbp - 0xd0], r8 ; @27
-  mov qword ptr [rbp - 0xc8], r9 ; @29
-  test al, al ; @116
-  je @2 ; @117
+  push rbp ; @122
+  mov rbp, rsp ; @123
+  sub rsp, 0xc0_l ; @124
+  mov qword ptr [rbp - 0xb8], rsi ; @21
+  mov qword ptr [rbp - 0xb0], rdx ; @23
+  mov qword ptr [rbp - 0xa8], rcx ; @25
+  mov qword ptr [rbp - 0xa0], r8 ; @27
+  mov qword ptr [rbp - 0x98], r9 ; @29
+  test al, al ; @115
+  je @2 ; @116
 @18:
-  movdqa xmmword ptr [rbp - 0xc0], xmm0 ; @32
-  movdqa xmmword ptr [rbp - 0xb0], xmm1 ; @34
-  movdqa xmmword ptr [rbp - 0xa0], xmm2 ; @36
-  movdqa xmmword ptr [rbp - 0x90], xmm3 ; @38
-  movdqa xmmword ptr [rbp - 0x80], xmm4 ; @40
-  movdqa xmmword ptr [rbp - 0x70], xmm5 ; @42
-  movdqa xmmword ptr [rbp - 0x60], xmm6 ; @44
-  movdqa xmmword ptr [rbp - 0x50], xmm7 ; @46
+  movdqa xmmword ptr [rbp - 0x90], xmm0 ; @32
+  movdqa xmmword ptr [rbp - 0x80], xmm1 ; @34
+  movdqa xmmword ptr [rbp - 0x70], xmm2 ; @36
+  movdqa xmmword ptr [rbp - 0x60], xmm3 ; @38
+  movdqa xmmword ptr [rbp - 0x50], xmm4 ; @40
+  movdqa xmmword ptr [rbp - 0x40], xmm5 ; @42
+  movdqa xmmword ptr [rbp - 0x30], xmm6 ; @44
+  movdqa xmmword ptr [rbp - 0x20], xmm7 ; @46
 @2:
-  mov dword ptr [rbp - 0x40], 0x8_w ; @53
-  mov dword ptr [rbp - 0x3c], 0x30_w ; @55
+  mov dword ptr [rbp - 0x10], 0x8_w ; @53
+  mov dword ptr [rbp - 0xc], 0x30_w ; @55
   lea rax, qword ptr [rbp + 0x10] ; @56
-  mov qword ptr [rbp - 0x38], rax ; @58
-  lea rax, qword ptr [rbp - 0xf0] ; @60
-  mov qword ptr [rbp - 0x30], rax ; @123
-@62:
-  mov esi, dword ptr [rbp - 0x3c] ; @68
-  cmp esi, 0xa0_w ; @109
-  ja @65 ; @110
-@63:
-  mov edx, dword ptr [rbp - 0x40] ; @70
-  cmp edx, 0x28_w ; @104
-  jbe @64 ; @105
+  mov ecx, dword ptr [rbp - 0x10] ; @70
+  cmp ecx, 0x28_w ; @107
+  jbe @64 ; @108
 @65:
-  mov rcx, qword ptr [rbp - 0x38] ; @83
-  lea rax, qword ptr [rcx + 0x8] ; @84
-  lea rdx, qword ptr [rcx + 0x10] ; @85
-  mov qword ptr [rbp - 0x38], rdx ; @86
-  jmp @66 ; @103
+  lea rdx, qword ptr [rax + 0x8] ; @84
+  jmp @66 ; @106
 @64:
-  lea rcx, qword ptr [rbp + rsi*1 - 0xf0] ; @75
-  lea rax, qword ptr [rbp + rdx*1 - 0xf0] ; @77
-  add esi, 0x10_w ; @78
-  add edx, 0x8_w ; @79
-  mov dword ptr [rbp - 0x3c], esi ; @80
-  mov dword ptr [rbp - 0x40], edx ; @81
+  lea rax, qword ptr [rbp - 0x90] ; @75
+  lea rdx, qword ptr [rbp + rcx*1 - 0xc0] ; @77
+  add ecx, 0x8_w ; @79
+  mov dword ptr [rbp - 0xc], 0x40_w ; @80
+  mov dword ptr [rbp - 0x10], ecx ; @81
 @66:
-  mov rcx, qword ptr [rcx] ; @87
-  mov qword ptr [rbp - 0x18], rcx ; @88
-  mov rax, qword ptr [rax] ; @89
-  mov qword ptr [rbp - 0x10], rax ; @91
-@61:
-  mov rax, qword ptr [rbp - 0x18] ; @47
-  mov qword ptr [rbp - 0x28], rax ; @48
-  mov rcx, qword ptr [rbp - 0x10] ; @50
-  mov qword ptr [rbp - 0x20], rcx ; @52
-  movsd xmm0, qword ptr [rbp - 0x28] ; @6
-  addsd xmm0, qword ptr [rip + @94] ; @95
-  .fp64 @94, 1.234 ; @96
-  cvtsd2si rax, xmm0 ; @15
+  mov rax, qword ptr [rax] ; @87
+  mov qword ptr [rbp - 0x10], rax ; @88
+  mov rcx, qword ptr [rdx] ; @89
+  movsd xmm0, qword ptr [rbp - 0x10] ; @6
+  addsd xmm0, qword ptr [rip + @97] ; @98
+  .fp64 @97, 1.234 ; @99
+  cvttsd2si rax, xmm0 ; @15
   add rax, rcx ; @16
   add rax, rdi ; @17
-  leave ; @127
-  ret ; @93
+  leave ; @125
+  ret ; @96
 }
diff --git a/src/test/data/opt/vaarg2.driver.sysv.amd64.c b/src/test/data/opt/vaarg2.driver.sysv.amd64.c
new file mode 100644
index 00000000..54bb762d
--- /dev/null
+++ b/src/test/data/opt/vaarg2.driver.sysv.amd64.c
@@ -0,0 +1,17 @@
+#include <assert.h>
+
+extern long foo(long i, ...);
+extern char bar(char b, ...); /* NB: bar actually has no named arguments */
+
+struct t {
+  long l1;
+  long l2;
+};
+
+int
+main() {
+  struct t s = {10, 32};
+  assert(foo(5, s) == 47);
+  assert(!bar(0));
+  assert(bar(1));
+}
diff --git a/src/test/data/opt/vaarg2.vir.opt.sysv b/src/test/data/opt/vaarg2.vir.opt.sysv
index c6f36e3d..5a781bc6 100644
--- a/src/test/data/opt/vaarg2.vir.opt.sysv
+++ b/src/test/data/opt/vaarg2.vir.opt.sysv
@@ -1,148 +1,112 @@
 module vaarg2
 
 export function $foo(b %9/rax, l %i/rdi, ...) {
-  %ap = slot 24, align 8
-  %r = slot 16, align 8
   %3 = slot 176, align 16
-  %19 = slot 16, align 8
+  %76 = slot 8, align 8
 @18:
-  %4 = add.l %3, 0x8_l ; @19
-  regstore rsi, %4 ; @20
-  %5 = add.l %3, 0x10_l ; @21
-  regstore rdx, %5 ; @22
-  %6 = add.l %3, 0x18_l ; @23
-  regstore rcx, %6 ; @24
-  %7 = add.l %3, 0x20_l ; @25
-  regstore r8, %7 ; @26
-  %8 = add.l %3, 0x28_l ; @27
-  regstore r9, %8 ; @28
-  %10 = eq.b %9, 0x0_b ; @29
-  br %10, @2, @17
+  %4.1 = add.l %3, 0x8_l ; @19
+  regstore rsi, %4.1 ; @20
+  %5.1 = add.l %3, 0x10_l ; @21
+  regstore rdx, %5.1 ; @22
+  %6.1 = add.l %3, 0x18_l ; @23
+  regstore rcx, %6.1 ; @24
+  %7.1 = add.l %3, 0x20_l ; @25
+  regstore r8, %7.1 ; @26
+  %8.1 = add.l %3, 0x28_l ; @27
+  regstore r9, %8.1 ; @28
+  %10.1 = eq.b %9, 0x0_b ; @29
+  br %10.1, @2, @17
 @17:
-  %11 = add.l %3, 0x30_l ; @30
-  regstore xmm0, %11 ; @31
-  %12 = add.l %3, 0x40_l ; @32
-  regstore xmm1, %12 ; @33
-  %13 = add.l %3, 0x50_l ; @34
-  regstore xmm2, %13 ; @35
-  %14 = add.l %3, 0x60_l ; @36
-  regstore xmm3, %14 ; @37
-  %15 = add.l %3, 0x70_l ; @38
-  regstore xmm4, %15 ; @39
-  %16 = add.l %3, 0x80_l ; @40
-  regstore xmm5, %16 ; @41
-  %17 = add.l %3, 0x90_l ; @42
-  regstore xmm6, %17 ; @43
-  %18 = add.l %3, 0xa0_l ; @44
-  regstore xmm7, %18 ; @45
+  %11.1 = add.l %3, 0x30_l ; @30
+  regstore xmm0, %11.1 ; @31
+  %12.1 = add.l %3, 0x40_l ; @32
+  regstore xmm1, %12.1 ; @33
+  %13.1 = add.l %3, 0x50_l ; @34
+  regstore xmm2, %13.1 ; @35
+  %14.1 = add.l %3, 0x60_l ; @36
+  regstore xmm3, %14.1 ; @37
+  %15.1 = add.l %3, 0x70_l ; @38
+  regstore xmm4, %15.1 ; @39
+  %16.1 = add.l %3, 0x80_l ; @40
+  regstore xmm5, %16.1 ; @41
+  %17.1 = add.l %3, 0x90_l ; @42
+  regstore xmm6, %17.1 ; @43
+  %18.1 = add.l %3, 0xa0_l ; @44
+  regstore xmm7, %18.1 ; @45
   jmp @2
 @2:
-  st.w 0x8_w, %ap ; @52
-  %24 = add.l %ap, 0x4_l ; @53
-  st.w 0x30_w, %24 ; @54
-  %25 = stkargs ; @55
-  %26 = add.l %ap, 0x8_l ; @56
-  st.l %25, %26 ; @57
-  %27 = add.l %ap, 0x10_l ; @58
-  st.l %3, %27 ; @59
+  st.w 0x8_w, %76 ; @52
+  %79.1 = add.l %76, 0x4_l ; @139
+  st.w 0x30_w, %79.1 ; @54
+  %25.1 = stkargs ; @55
   jmp @61
 @61:
-  %28 = ld.w %ap ; @65
-  %29 = le.w %28, 0x20_w ; @66
-  br %29, @62, @63
+  %28.1 = ld.w %76 ; @65
+  %29.1 = le.w %28.1, 0x20_w ; @66
+  br %29.1, @62, @63
 @62:
-  %32 = zext.l %28 ; @69
-  %33 = add.l %3, %32 ; @70
-  %34 = add.w %28, 0x10_w ; @71
-  st.w %34, %ap ; @72
-  jmp @64(%33)
+  %32.1 = zext.l %28.1 ; @69
+  %33.1 = add.l %3, %32.1 ; @70
+  %34.1 = add.w %28.1, 0x10_w ; @71
+  st.w %34.1, %76 ; @72
+  jmp @64(%33.1)
 @63:
-  %36 = ld.l %26 ; @74
-  %37 = add.l %36, 0x10_l ; @75
-  st.l %37, %26 ; @76
-  jmp @64(%36)
-@64(%38):
-  %39 = ld.l %38 ; @77
-  st.l %39, %19 ; @78
-  %40 = add.l %38, 0x8_l ; @79
-  %41 = ld.l %40 ; @80
-  %42 = add.l %19, 0x8_l ; @81
-  st.l %41, %42 ; @82
+  jmp @64(%25.1)
+@64(%38.1):
+  %39.1 = ld.l %38.1 ; @77
+  %40.1 = add.l %38.1, 0x8_l ; @79
+  %41.1 = ld.l %40.1 ; @80
   jmp @60
 @60:
-  %20 = ld.l %19 ; @46
-  st.l %20, %r ; @47
-  %22 = ld.l %42 ; @49
-  %23 = add.l %r, 0x8_l ; @50
-  st.l %22, %23 ; @51
-  %f1.1 = ld.l %r ; @6
-  %1 = add.l %f1.1, %22 ; @15
-  %2 = add.l %1, %i ; @16
-  ret rax/%2
+  st.l %39.1, %76 ; @47
+  %1.1 = add.l %39.1, %41.1 ; @15
+  %2.1 = add.l %1.1, %i ; @16
+  ret rax/%2.1
 }
 
 export function $bar(b %49/rax, ...) {
-  %ap = slot 24, align 8
   %43 = slot 176, align 16
 @84:
   regstore rdi, %43 ; @85
-  %44 = add.l %43, 0x8_l ; @86
-  regstore rsi, %44 ; @87
-  %45 = add.l %43, 0x10_l ; @88
-  regstore rdx, %45 ; @89
-  %46 = add.l %43, 0x18_l ; @90
-  regstore rcx, %46 ; @91
-  %47 = add.l %43, 0x20_l ; @92
-  regstore r8, %47 ; @93
-  %48 = add.l %43, 0x28_l ; @94
-  regstore r9, %48 ; @95
-  %50 = eq.b %49, 0x0_b ; @96
-  br %50, @11, @83
+  %44.1 = add.l %43, 0x8_l ; @86
+  regstore rsi, %44.1 ; @87
+  %45.1 = add.l %43, 0x10_l ; @88
+  regstore rdx, %45.1 ; @89
+  %46.1 = add.l %43, 0x18_l ; @90
+  regstore rcx, %46.1 ; @91
+  %47.1 = add.l %43, 0x20_l ; @92
+  regstore r8, %47.1 ; @93
+  %48.1 = add.l %43, 0x28_l ; @94
+  regstore r9, %48.1 ; @95
+  %50.1 = eq.b %49, 0x0_b ; @96
+  br %50.1, @11, @83
 @83:
-  %51 = add.l %43, 0x30_l ; @97
-  regstore xmm0, %51 ; @98
-  %52 = add.l %43, 0x40_l ; @99
-  regstore xmm1, %52 ; @100
-  %53 = add.l %43, 0x50_l ; @101
-  regstore xmm2, %53 ; @102
-  %54 = add.l %43, 0x60_l ; @103
-  regstore xmm3, %54 ; @104
-  %55 = add.l %43, 0x70_l ; @105
-  regstore xmm4, %55 ; @106
-  %56 = add.l %43, 0x80_l ; @107
-  regstore xmm5, %56 ; @108
-  %57 = add.l %43, 0x90_l ; @109
-  regstore xmm6, %57 ; @110
-  %58 = add.l %43, 0xa0_l ; @111
-  regstore xmm7, %58 ; @112
+  %51.1 = add.l %43, 0x30_l ; @97
+  regstore xmm0, %51.1 ; @98
+  %52.1 = add.l %43, 0x40_l ; @99
+  regstore xmm1, %52.1 ; @100
+  %53.1 = add.l %43, 0x50_l ; @101
+  regstore xmm2, %53.1 ; @102
+  %54.1 = add.l %43, 0x60_l ; @103
+  regstore xmm3, %54.1 ; @104
+  %55.1 = add.l %43, 0x70_l ; @105
+  regstore xmm4, %55.1 ; @106
+  %56.1 = add.l %43, 0x80_l ; @107
+  regstore xmm5, %56.1 ; @108
+  %57.1 = add.l %43, 0x90_l ; @109
+  regstore xmm6, %57.1 ; @110
+  %58.1 = add.l %43, 0xa0_l ; @111
+  regstore xmm7, %58.1 ; @112
   jmp @11
 @11:
-  st.w 0x0_w, %ap ; @113
-  %59 = add.l %ap, 0x4_l ; @114
-  st.w 0x30_w, %59 ; @115
-  %60 = stkargs ; @116
-  %61 = add.l %ap, 0x8_l ; @117
-  st.l %60, %61 ; @118
-  %62 = add.l %ap, 0x10_l ; @119
-  st.l %43, %62 ; @120
   jmp @122
 @122:
-  %63 = ld.w %ap ; @126
-  %64 = le.w %63, 0x28_w ; @127
-  br %64, @123, @124
+  jmp @123
 @123:
-  %67 = zext.l %63 ; @130
-  %68 = add.l %43, %67 ; @131
-  %69 = add.w %63, 0x8_w ; @132
-  st.w %69, %ap ; @133
-  jmp @125(%68)
-@124:
-  %71 = ld.l %61 ; @135
-  %72 = add.l %71, 0x8_l ; @136
-  st.l %72, %61 ; @137
-  jmp @125(%71)
-@125(%73):
-  %t.1 = ld.b %73 ; @138
+  %68.1 = add.l %43, 0x0_l ; @131
+  jmp @125
+@125:
+  %t.1 = ld.b %68.1 ; @138
   jmp @121
 @121:
   ret rax/%t.1
diff --git a/src/test/data/opt/vasum.driver.sysv.amd64.c b/src/test/data/opt/vasum.driver.sysv.amd64.c
index 74579f54..e2ee8244 100644
--- a/src/test/data/opt/vasum.driver.sysv.amd64.c
+++ b/src/test/data/opt/vasum.driver.sysv.amd64.c
@@ -2,6 +2,7 @@
 
 extern int twenty_eight(void);
 
-int main() {
+int
+main() {
   assert(twenty_eight() == 28);
 }
diff --git a/src/test/data/opt/vasum.vir.opt.sysv b/src/test/data/opt/vasum.vir.opt.sysv
index 1183d6fd..3406b829 100644
--- a/src/test/data/opt/vasum.vir.opt.sysv
+++ b/src/test/data/opt/vasum.vir.opt.sysv
@@ -1,79 +1,66 @@
 module vasum
 
 export function $sum(b %9/rax, w %n/rdi, ...) {
-  %ap = slot 24, align 8
   %3 = slot 176, align 16
 @19:
-  %4 = add.l %3, 0x8_l ; @20
-  regstore rsi, %4 ; @21
-  %5 = add.l %3, 0x10_l ; @22
-  regstore rdx, %5 ; @23
-  %6 = add.l %3, 0x18_l ; @24
-  regstore rcx, %6 ; @25
-  %7 = add.l %3, 0x20_l ; @26
-  regstore r8, %7 ; @27
-  %8 = add.l %3, 0x28_l ; @28
-  regstore r9, %8 ; @29
-  %10 = eq.b %9, 0x0_b ; @30
-  br %10, @2, @18
+  %4.1 = add.l %3, 0x8_l ; @20
+  regstore rsi, %4.1 ; @21
+  %5.1 = add.l %3, 0x10_l ; @22
+  regstore rdx, %5.1 ; @23
+  %6.1 = add.l %3, 0x18_l ; @24
+  regstore rcx, %6.1 ; @25
+  %7.1 = add.l %3, 0x20_l ; @26
+  regstore r8, %7.1 ; @27
+  %8.1 = add.l %3, 0x28_l ; @28
+  regstore r9, %8.1 ; @29
+  %10.1 = eq.b %9, 0x0_b ; @30
+  br %10.1, @2, @18
 @18:
-  %11 = add.l %3, 0x30_l ; @31
-  regstore xmm0, %11 ; @32
-  %12 = add.l %3, 0x40_l ; @33
-  regstore xmm1, %12 ; @34
-  %13 = add.l %3, 0x50_l ; @35
-  regstore xmm2, %13 ; @36
-  %14 = add.l %3, 0x60_l ; @37
-  regstore xmm3, %14 ; @38
-  %15 = add.l %3, 0x70_l ; @39
-  regstore xmm4, %15 ; @40
-  %16 = add.l %3, 0x80_l ; @41
-  regstore xmm5, %16 ; @42
-  %17 = add.l %3, 0x90_l ; @43
-  regstore xmm6, %17 ; @44
-  %18 = add.l %3, 0xa0_l ; @45
-  regstore xmm7, %18 ; @46
+  %11.1 = add.l %3, 0x30_l ; @31
+  regstore xmm0, %11.1 ; @32
+  %12.1 = add.l %3, 0x40_l ; @33
+  regstore xmm1, %12.1 ; @34
+  %13.1 = add.l %3, 0x50_l ; @35
+  regstore xmm2, %13.1 ; @36
+  %14.1 = add.l %3, 0x60_l ; @37
+  regstore xmm3, %14.1 ; @38
+  %15.1 = add.l %3, 0x70_l ; @39
+  regstore xmm4, %15.1 ; @40
+  %16.1 = add.l %3, 0x80_l ; @41
+  regstore xmm5, %16.1 ; @42
+  %17.1 = add.l %3, 0x90_l ; @43
+  regstore xmm6, %17.1 ; @44
+  %18.1 = add.l %3, 0xa0_l ; @45
+  regstore xmm7, %18.1 ; @46
   jmp @2
 @2:
-  st.w 0x8_w, %ap ; @47
-  %19 = add.l %ap, 0x4_l ; @48
-  st.w 0x30_w, %19 ; @49
-  %20 = stkargs ; @50
-  %21 = add.l %ap, 0x8_l ; @51
-  st.l %20, %21 ; @52
-  %22 = add.l %ap, 0x10_l ; @53
-  st.l %3, %22 ; @54
-  jmp @3(0x0_w, 0x0_w)
-@3(%x.2, %i.2):
-  %0 = lt.w %i.2, %n ; @15
-  br %0, @7, @8
+  %20.1 = stkargs ; @50
+  jmp @3(%20.1, 0x8_w, 0x0_w, 0x0_w)
+@3(%36.2, %34.2, %x.1, %i.1):
+  %0.1 = lt.w %i.1, %n ; @15
+  br %0.1, @7, @8
 @7:
   jmp @56
 @56:
-  %23 = ld.w %ap ; @60
-  %24 = le.w %23, 0x28_w ; @61
-  br %24, @57, @58
+  %24.1 = le.w %34.2, 0x28_w ; @61
+  br %24.1, @57, @58
 @57:
-  %26 = ld.l %22 ; @63
-  %27 = zext.l %23 ; @64
-  %28 = add.l %26, %27 ; @65
-  %29 = add.w %23, 0x8_w ; @66
-  st.w %29, %ap ; @67
-  jmp @59(%28)
+  %27.1 = zext.l %34.2 ; @64
+  %28.1 = add.l %3, %27.1 ; @65
+  %29.1 = add.w %34.2, 0x8_w ; @66
+  jmp @59(%36.2, %29.1, %28.1)
 @58:
-  %31 = ld.l %21 ; @69
-  %32 = add.l %31, 0x8_l ; @70
-  st.l %32, %21 ; @71
-  jmp @59(%31)
-@59(%33):
-  %y.1 = ld.w %33 ; @72
+  %32.1 = add.l %36.2, 0x8_l ; @70
+  jmp @59(%32.1, %34.2, %36.2)
+@59(%36.3, %34.3, %33.1):
+  %y.1 = ld.w %33.1 ; @72
   jmp @55
 @55:
-  %1 = add.w %x.2, %y.1 ; @16
-  %2 = add.w %i.2, 0x1_w ; @17
-  jmp @3(%1, %2)
+  %1.1 = add.w %x.1, %y.1 ; @16
+  %2.1 = add.w %i.1, 0x1_w ; @17
+  jmp @3(%36.3, %34.3, %1.1, %2.1)
 @8:
-  ret rax/%x.2
+  ret rax/%x.1
 }
 
 export function $twenty_eight() {
diff --git a/src/test/data/opt/vasum.vir.opt.sysv.amd64 b/src/test/data/opt/vasum.vir.opt.sysv.amd64
index 887fe9c4..cd1095a8 100644
--- a/src/test/data/opt/vasum.vir.opt.sysv.amd64
+++ b/src/test/data/opt/vasum.vir.opt.sysv.amd64
@@ -1,19 +1,18 @@
 module vasum
 
 export function $sum { ; returns: rax
-  %ap = slot 24, align 8
   %3 = slot 176, align 16
 @19:
   mov %9:b, al ; @20
-  mov %n:w, edi ; @103
+  mov %n:w, edi ; @111
   mov qword ptr [%3 + 0x8], rsi ; @21
   mov qword ptr [%3 + 0x10], rdx ; @23
   mov qword ptr [%3 + 0x18], rcx ; @25
   mov qword ptr [%3 + 0x20], r8 ; @27
   mov qword ptr [%3 + 0x28], r9 ; @29
-  test %9:b, %9:b ; @98
-  je @2 ; @99
-  jmp @18 ; @100
+  test %9:b, %9:b ; @106
+  je @2 ; @107
+  jmp @18 ; @108
 @18:
   movdqa xmmword ptr [%3 + 0x30], xmm0 ; @32
   movdqa xmmword ptr [%3 + 0x40], xmm1 ; @34
@@ -23,70 +22,69 @@ export function $sum { ; returns: rax
   movdqa xmmword ptr [%3 + 0x80], xmm5 ; @42
   movdqa xmmword ptr [%3 + 0x90], xmm6 ; @44
   movdqa xmmword ptr [%3 + 0xa0], xmm7 ; @46
-  jmp @2 ; @97
+  jmp @2 ; @105
 @2:
-  mov dword ptr [%ap], 0x8_w ; @47
-  mov dword ptr [%ap + 0x4], 0x30_w ; @49
-  lea %20:l, qword ptr [rbp + 0x10] ; @50
-  mov qword ptr [%ap + 0x8], %20:l ; @52
-  mov qword ptr [%ap + 0x10], %3:l ; @54
-  xor %x.2:w, %x.2:w ; @94
-  xor %i.2:w, %i.2:w ; @95
-  jmp @3 ; @96
+  lea %20.1:l, qword ptr [rbp + 0x10] ; @50
+  mov %36.2:l, %20.1:l ; @100
+  mov %34.2:w, 0x8_w ; @101
+  xor %x.1:w, %x.1:w ; @102
+  xor %i.1:w, %i.1:w ; @103
+  jmp @3 ; @104
 @3:
-  cmp %i.2:w, %n:w ; @89
-  jb @7 ; @90
-  jmp @8 ; @91
+  cmp %i.1:w, %n:w ; @95
+  jb @7 ; @96
+  jmp @8 ; @97
 @8:
-  mov eax, %x.2:w ; @87
-  ret ; @88
+  mov eax, %x.1:w ; @93
+  ret ; @94
 @7:
-  jmp @56 ; @86
+  jmp @56 ; @92
 @56:
-  mov %23:w, dword ptr [%ap] ; @60
-  cmp %23:w, 0x28_w ; @81
-  jbe @57 ; @82
-  jmp @58 ; @83
+  cmp %34.2:w, 0x28_w ; @87
+  jbe @57 ; @88
+  jmp @58 ; @89
 @58:
-  mov %31:l, qword ptr [%ap + 0x8] ; @69
-  lea %32:l, qword ptr [%31 + 0x8] ; @70
-  mov qword ptr [%ap + 0x8], %32:l ; @71
-  mov %33:l, %31:l ; @79
-  jmp @59 ; @80
+  lea %32.1:l, qword ptr [%36.2 + 0x8] ; @70
+  mov %36.3:l, %32.1:l ; @83
+  mov %34.3:w, %34.2:w ; @84
+  mov %33.1:l, %36.2:l ; @85
+  jmp @59 ; @86
 @57:
-  mov %26:l, qword ptr [%ap + 0x10] ; @63
-  mov %27:w, %23:w ; @64
-  lea %28:l, qword ptr [%26 + %27*1] ; @65
-  lea %29:w, qword ptr [%23 + 0x8] ; @66
-  mov dword ptr [%ap], %29:w ; @67
-  mov %33:l, %28:l ; @77
-  jmp @59 ; @78
+  mov %27.1:w, %34.2:w ; @64
+  lea %28.1:l, qword ptr [%3 + %27.1*1] ; @65
+  lea %29.1:w, qword ptr [%34.2 + 0x8] ; @66
+  mov %36.3:l, %36.2:l ; @79
+  mov %34.3:w, %29.1:w ; @80
+  mov %33.1:l, %28.1:l ; @81
+  jmp @59 ; @82
 @59:
-  mov %y.1:w, dword ptr [%33] ; @72
-  jmp @55 ; @76
+  mov %y.1:w, dword ptr [%33.1] ; @72
+  jmp @55 ; @78
 @55:
-  lea %1:w, qword ptr [%x.2 + %y.1*1] ; @16
-  lea %2:w, qword ptr [%i.2 + 0x1] ; @17
-  mov %x.2:w, %1:w ; @73
-  mov %i.2:w, %2:w ; @74
-  jmp @3 ; @75
+  lea %1.1:w, qword ptr [%x.1 + %y.1*1] ; @16
+  lea %2.1:w, qword ptr [%i.1 + 0x1] ; @17
+  mov %36.2:l, %36.3:l ; @73
+  mov %34.2:w, %34.3:w ; @74
+  mov %x.1:w, %1.1:w ; @75
+  mov %i.1:w, %2.1:w ; @76
+  jmp @3 ; @77
 }
 
 export function $twenty_eight { ; returns: rax
 @13:
   mov edi, 0x7_w ; @14
-  mov esi, 0x1_w ; @107
-  mov edx, 0x2_w ; @108
-  mov ecx, 0x3_w ; @109
-  mov r8d, 0x4_w ; @110
-  mov r9d, 0x5_w ; @111
-  sub rsp, 0x10_l ; @112
-  mov dword ptr [rsp], 0x6_w ; @113
-  mov dword ptr [rsp + 0x8], 0x7_w ; @114
-  xor al, al ; @115
-  call $sum ; r8 r9 rax rcx rdi rdx rsi ; @116
-  add rsp, 0x10_l ; @117
-  mov %x.1:w, eax ; @118
-  mov eax, %x.1:w ; @105
-  ret ; @106
+  mov esi, 0x1_w ; @115
+  mov edx, 0x2_w ; @116
+  mov ecx, 0x3_w ; @117
+  mov r8d, 0x4_w ; @118
+  mov r9d, 0x5_w ; @119
+  sub rsp, 0x10_l ; @120
+  mov dword ptr [rsp], 0x6_w ; @121
+  mov dword ptr [rsp + 0x8], 0x7_w ; @122
+  xor al, al ; @123
+  call $sum ; r8 r9 rax rcx rdi rdx rsi ; @124
+  add rsp, 0x10_l ; @125
+  mov %x.1:w, eax ; @126
+  mov eax, %x.1:w ; @113
+  ret ; @114
 }
diff --git a/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc
new file mode 100644
index 00000000..7e857eef
--- /dev/null
+++ b/src/test/data/opt/vasum.vir.opt.sysv.amd64.regalloc
@@ -0,0 +1,72 @@
+module vasum
+
+export function $sum { ; returns: rax
+@19:
+  push rbp ; @127
+  mov rbp, rsp ; @128
+  sub rsp, 0xc0_l ; @129
+  mov r10d, edi ; @111
+  mov qword ptr [rbp - 0xa8], rsi ; @21
+  mov qword ptr [rbp - 0xa0], rdx ; @23
+  mov qword ptr [rbp - 0x98], rcx ; @25
+  mov qword ptr [rbp - 0x90], r8 ; @27
+  mov qword ptr [rbp - 0x88], r9 ; @29
+  test al, al ; @106
+  je @2 ; @107
+@18:
+  movdqa xmmword ptr [rbp - 0x80], xmm0 ; @32
+  movdqa xmmword ptr [rbp - 0x70], xmm1 ; @34
+  movdqa xmmword ptr [rbp - 0x60], xmm2 ; @36
+  movdqa xmmword ptr [rbp - 0x50], xmm3 ; @38
+  movdqa xmmword ptr [rbp - 0x40], xmm4 ; @40
+  movdqa xmmword ptr [rbp - 0x30], xmm5 ; @42
+  movdqa xmmword ptr [rbp - 0x20], xmm6 ; @44
+  movdqa xmmword ptr [rbp - 0x10], xmm7 ; @46
+@2:
+  lea rax, qword ptr [rbp + 0x10] ; @50
+  mov r8d, 0x8_w ; @101
+  xor edx, edx ; @102
+  xor ecx, ecx ; @103
+@3:
+  cmp ecx, r10d ; @95
+  jb @56 ; @96
+@8:
+  mov eax, edx ; @93
+  leave ; @130
+  ret ; @94
+@56:
+  cmp r8d, 0x28_w ; @87
+  jbe @57 ; @88
+@58:
+  lea rsi, qword ptr [rax + 0x8] ; @70
+  mov rdi, rax ; @85
+  jmp @59 ; @86
+@57:
+  lea rdi, qword ptr [rbp + r8*1 - 0xb0] ; @65
+  add r8d, 0x8_w ; @66
+  mov rsi, rax ; @79
+@59:
+  add edx, dword ptr [rdi] ; @16
+  inc ecx ; @17
+  mov rax, rsi ; @73
+  jmp @3 ; @77
+}
+
+export function $twenty_eight { ; returns: rax
+@13:
+  push rbp ; @131
+  mov rbp, rsp ; @132
+  mov edi, 0x7_w ; @14
+  mov esi, 0x1_w ; @115
+  mov edx, 0x2_w ; @116
+  mov ecx, 0x3_w ; @117
+  mov r8d, 0x4_w ; @118
+  mov r9d, 0x5_w ; @119
+  sub rsp, 0x10_l ; @120
+  mov dword ptr [rsp], 0x6_w ; @121
+  mov dword ptr [rsp + 0x8], 0x7_w ; @122
+  xor al, al ; @123
+  call $sum ; r8 r9 rax rcx rdi rdx rsi ; @124
+  leave ; @133
+  ret ; @114
+}
diff --git a/src/test/dune b/src/test/dune
index c4688699..a2d5bace 100644
--- a/src/test/dune
+++ b/src/test/dune
@@ -1,6 +1,34 @@
 (tests
- (names test_bv_interval test_opt test_type)
+ (names test_allen test_opt test_type)
  (libraries ocamldiff ounit2 cgen shexp.process)
+ (modules test_allen test_opt test_type)
  (ocamlopt_flags -O2)
  (deps
   (glob_files_rec data/*)))
+
+(library
+ (name test_float32)
+ (modules test_float32)
+ (libraries cgen)
+ (ocamlopt_flags -O2)
+ (inline_tests)
+ (preprocess
+  (pps ppx_jane)))
+
+(library
+ (name test_bv_interval)
+ (modules test_bv_interval)
+ (libraries cgen)
+ (ocamlopt_flags -O2)
+ (inline_tests)
+ (preprocess
+  (pps ppx_jane)))
+
+(library
+ (name test_bitset)
+ (modules test_bitset)
+ (libraries cgen)
+ (ocamlopt_flags -O2)
+ (inline_tests)
+ (preprocess
+  (pps ppx_jane)))
diff --git a/src/test/test_allen.ml b/src/test/test_allen.ml
new file mode 100644
index 00000000..99727129
--- /dev/null
+++ b/src/test/test_allen.ml
@@ -0,0 +1,47 @@
+open Core
+open OUnit2
+open Cgen
+open Allen_interval_algebra
+
+type range = {
+  lo : int;
+  hi : int;
+}
+
+let (--) lo hi = {lo; hi}
+
+module A = Make(struct
+    type point = int
+    type t = range
+    let lo t = t.lo
+    let hi t = t.hi
+    include Int.Replace_polymorphic_compare
+  end)
+
+let printer = Format.asprintf "%a" pp
+
+let test expect a b _ =
+  let got = A.relate a b in
+  assert_equal ~printer expect got;
+  let expect' = converse expect in
+  let got' = A.relate b a in
+  assert_equal ~msg:"Converse" ~printer expect' got'
+
+let suite =
+  "Allen interval algebra" >::: [
+    "before" >:: test Before (1--3) (5--7);
+    "meets" >:: test Meets (1--3) (3--8);
+    "overlaps" >:: test Overlaps (1--5) (3--8);
+    "finished_by" >:: test Finished_by (1--10) (4--10);
+    "contains" >:: test Contains (1--10) (3--7);
+    "starts" >:: test Starts (1--3) (1--10);
+    "equal" >:: test Equal (2--7) (2--7);
+    "started_by" >:: test Started_by (1--10) (1--3);
+    "during" >:: test During (4--6) (1--10);
+    "finishes" >:: test Finishes (4--10) (1--10);
+    "overlapped_by" >:: test Overlapped_by (5--10) (1--7);
+    "met_by" >:: test Met_by (10--15) (5--10);
+    "after" >:: test After (10--15) (1--7);
+  ]
+
+let () = run_test_tt_main suite
diff --git a/src/test/test_bitset.ml b/src/test/test_bitset.ml
new file mode 100644
index 00000000..a7121fc3
--- /dev/null
+++ b/src/test/test_bitset.ml
@@ -0,0 +1,112 @@
+open Core
+open Cgen
+
+module Seq = Sequence
+module B = Bitset.Id
+module Q = Quickcheck
+module G = Q.Generator
+module S = Q.Shrinker
+module O = Q.Observer
+module T = Base_quickcheck.Test
+
+(* Keys must be positive, but if we include really large ones
+   in our tests then we might OOM. *)
+let key_ok i = i >= 0 && i <= 2000
+let gen_key = Int.quickcheck_generator |> G.filter ~f:key_ok
+
+let gen_set =
+  G.(list (tuple2 gen_key bool)) |> G.map ~f:(fun bits ->
+      List.fold bits ~init:B.empty ~f:(fun acc (k, on) ->
+          if on then B.set acc k else acc))
+
+let shr_set = S.create @@ fun s ->
+  B.enum s |> Seq.map ~f:(B.clear s)
+
+let obs_set = O.unmap [%observer : int list]
+    ~f:(Fn.compose Seq.to_list B.enum)
+
+module Single = struct
+  type t = B.t [@@deriving sexp]
+  let quickcheck_generator = gen_set
+  let quickcheck_observer = obs_set
+  let quickcheck_shrinker = shr_set
+end
+
+module Pair = struct
+  type t = B.t * B.t [@@deriving sexp]
+  let quickcheck_generator = G.tuple2 gen_set gen_set
+  let quickcheck_observer = O.tuple2 obs_set obs_set
+  let quickcheck_shrinker = S.tuple2 shr_set shr_set
+end
+
+module Triple = struct
+  type t = B.t * B.t * B.t [@@deriving sexp]
+  let quickcheck_generator = G.tuple3 gen_set gen_set gen_set
+  let quickcheck_observer = O.tuple3 obs_set obs_set obs_set
+  let quickcheck_shrinker = S.tuple3 shr_set shr_set shr_set
+end
+
+module Key = struct
+  type t = int [@@deriving sexp]
+  let quickcheck_generator = gen_key
+  let quickcheck_observer = [%observer : int]
+  let quickcheck_shrinker = S.filter Int.quickcheck_shrinker ~f:key_ok
+end
+
+module With_key = struct
+  type t = B.t * int [@@deriving sexp]
+  let quickcheck_generator = G.tuple2 gen_set gen_key
+  let quickcheck_observer = O.tuple2 obs_set [%observer : int]
+  let quickcheck_shrinker = S.tuple2 shr_set [%shrinker : int]
+end
+
+let%test_unit "union is commutative" =
+  T.run_exn (module Pair) ~f:(fun (a, b) ->
+      [%test_eq : B.t] (B.union a b) (B.union b a))
+
+let%test_unit "union is associative" =
+  T.run_exn (module Triple) ~f:(fun (a, b, c) ->
+      [%test_eq : B.t]
+        (B.union a (B.union b c))
+        (B.union (B.union a b) c))
+
+let%test_unit "inter is commutative" =
+  T.run_exn (module Pair) ~f:(fun (a, b) ->
+      [%test_eq : B.t] (B.inter a b) (B.inter b a))
+
+let%test_unit "inter is associative" =
+  T.run_exn (module Triple) ~f:(fun (a, b, c) ->
+      [%test_eq : B.t]
+        (B.inter a (B.inter b c))
+        (B.inter (B.inter a b) c))
+
+let%test_unit "set / mem consistency" =
+  T.run_exn (module With_key) ~f:(fun (s, k) ->
+      let s' = B.set s k in
+      assert (B.mem s' k))
+
+let%test_unit "clear / mem consistency" =
+  T.run_exn (module With_key) ~f:(fun (s, k) ->
+      let s' = B.clear s k in
+      assert (not (B.mem s' k)))
+
+let%test_unit "singleton has exactly one element" =
+  T.run_exn (module Key) ~f:(fun k ->
+      [%test_result : int list]
+        (Seq.to_list (B.enum (B.singleton k)))
+        ~expect:[k])
+
+let%test_unit "init n = [0..n-1]" =
+  T.run_exn (module Key) ~f:(fun n ->
+      [%test_result : int list]
+        (Seq.to_list (B.enum (B.init n)))
+        ~expect:(List.init n ~f:Fn.id))
+
+let%test_unit "min_elt matches smallest element in enum" =
+  T.run_exn (module Single) ~f:(fun s ->
+      match B.min_elt s with
+      | None ->
+        [%test_eq : int list] (Seq.to_list (B.enum s)) []
+      | Some k ->
+        let hd = List.hd_exn (Seq.to_list (B.enum s)) in
+        [%test_eq : int] k hd)
diff --git a/src/test/test_bv_interval.ml b/src/test/test_bv_interval.ml
index 1dcb6363..313817bc 100644
--- a/src/test/test_bv_interval.ml
+++ b/src/test/test_bv_interval.ml
@@ -1,28 +1,447 @@
 open Core
-open OUnit2
 open Cgen
 
 module I = Bv_interval
+module Q = Quickcheck
 
-let expect x i =
-  let size = I.size i in
-  let i' = match x with
-    | `empty -> I.create_empty ~size
-    | `full -> I.create_full ~size
-    | `def (lo, hi) -> I.create ~lo ~hi ~size in
-  let msg = Format.asprintf "Expected %a, got %a" I.pp i' I.pp i in
-  assert_bool msg (I.equal i i')
-
-let test_1 _ =
-  let size = 32 in
-  let module B = (val Bv.modular size) in
-  let i1 = I.create ~lo:Bv.one ~hi:B.(int 5) ~size in
-  let i2 = I.create ~lo:B.(int 3) ~hi:B.(int 7) ~size in
-  expect (`def (B.int 1, B.int 7)) @@ I.union i1 i2;
-  expect (`def (B.int 3, B.int 5)) @@ I.intersect i1 i2
-
-let suite = "Test BV intervals" >::: [
-    "Test 1" >:: test_1;
-  ]
-
-let () = run_test_tt_main suite
+let gen_size =
+  Q.Generator.small_positive_int |>
+  Q.Generator.filter ~f:(fun n -> n > 0 && n <= 64)
+
+let gen_small_size =
+  Q.Generator.small_positive_int |>
+  Q.Generator.filter ~f:(fun n -> n > 0 && n <= 8)
+
+let gen_bv size =
+  Q.Generator.map Int64.quickcheck_generator ~f:(fun x ->
+      Bv.(int64 x mod modulus size))
+
+let gen_interval size =
+  Q.Generator.map (gen_bv size) ~f:(fun bv ->
+      I.create_single ~value:bv ~size)
+
+let gen_sized_interval =
+  Q.Generator.bind gen_size ~f:(fun size ->
+      gen_interval size |> Q.Generator.map
+        ~f:(fun t -> size, t))
+
+let gen_interval_pair =
+  Q.Generator.bind gen_size ~f:(fun size ->
+      Q.Generator.both (gen_interval size) (gen_interval size) |>
+      Q.Generator.map ~f:(fun p -> size, p))
+
+let gen_small_sized_interval =
+  Q.Generator.bind gen_small_size ~f:(fun size ->
+      gen_interval size |> Q.Generator.map
+        ~f:(fun t -> size, t))
+
+let gen_small_interval_pair =
+  Q.Generator.bind gen_small_size ~f:(fun size ->
+      Q.Generator.both (gen_interval size) (gen_interval size) |>
+      Q.Generator.map ~f:(fun p -> size, p))
+
+let all_bvs size =
+  List.init (1 lsl size) ~f:(fun n -> Bv.(int n mod modulus size))
+
+let concrete_values size t =
+  all_bvs size |> List.filter ~f:(I.contains_value t)
+
+let oracle_sound_binop ~size ~interval_op ~concrete_op t1 t2 =
+  let vs1 = concrete_values size t1 in
+  let vs2 = concrete_values size t2 in
+  let results = List.bind vs1 ~f:(fun a ->
+      let ia = Bv.to_int a in
+      List.filter_map vs2 ~f:(fun b ->
+          let ib = Bv.to_int b in
+          concrete_op size ia ib |>
+          Option.map ~f:(fun n ->
+              Bv.(int n mod modulus size)))) in
+  List.is_empty results ||
+  let r_int = interval_op t1 t2 in
+  List.for_all results ~f:(I.contains_value r_int)
+
+let oracle_sound_unop ~size ~interval_op ~concrete_op t =
+  let vs = concrete_values size t in
+  let results = List.filter_map vs ~f:(fun v ->
+      let i = Bv.to_int v in
+      concrete_op size i |> Option.map ~f:(fun r ->
+          Bv.(int r mod modulus size))) in
+  List.is_empty results ||
+  let t_int = interval_op t in
+  List.for_all results ~f:(I.contains_value t_int)
+
+let prop_size_preserved (size, t) = Int.equal size (I.size t)
+
+let prop_bounds_valid (_size, t) =
+  let lo = I.lower t in
+  let hi = I.upper t in
+  I.is_full t || I.is_empty t || not (Bv.equal lo hi)
+
+let prop_empty_singleton_consistency (_size, t) =
+  not (I.is_empty t) || begin
+    Option.is_none (I.single_of t) &&
+    not (I.is_single t)
+  end
+
+let prop_full_contains_all size =
+  let t = I.create_full ~size in
+  Q.test (gen_bv size)
+    ~trials:100
+    ~f:(fun bv -> assert (I.contains_value t bv));
+  true
+
+let prop_single_roundtrip size =
+  Q.test (gen_bv size)
+    ~trials:50
+    ~f:(fun bv ->
+        let t = I.create_single ~value:bv ~size in
+        match I.single_of t with
+        | Some bv' -> assert (Bv.equal bv bv')
+        | None -> assert false);
+  true
+
+let prop_negative_nonnegative_disjoint (_size, t) =
+  not (I.is_all_negative t && I.is_all_non_negative t)
+
+let prop_binop_size_stable op (size, (t1, t2)) =
+  try
+    let r = op t1 t2 in
+    Int.equal (I.size r) size
+  with _ -> true
+
+let prop_singleton_contains (_size, t) =
+  match I.single_of t with
+  | Some v -> I.contains_value t v
+  | None -> true
+
+let prop_contains_implies_single_contains (_size, (t1, t2)) =
+  not (I.contains t1 t2) || match I.single_of t2 with
+  | None -> true
+  | Some v -> I.contains_value t1 v
+
+let prop_intersect_comm (_size, (t1, t2)) =
+  let r1 = I.intersect t1 t2 in
+  let r2 = I.intersect t2 t1 in
+  I.equal r1 r2
+
+let prop_union_sound (_size, (t1, t2)) =
+  let u1 = I.union t1 t2 in
+  let u2 = I.union t2 t1 in
+  I.contains u1 t1 &&
+  I.contains u1 t2 &&
+  I.contains u2 t1 &&
+  I.contains u2 t2
+
+let prop_inverse_involutive (_size, t) =
+  I.equal (I.inverse (I.inverse t)) t
+
+let prop_extract_shrinks (size,t) =
+  if size > 1 then
+    let hi = size - 1 in
+    let lo = size / 2 in
+    let t' = I.extract t ~hi ~lo in
+    I.size t' = hi - lo + 1
+  else true
+
+let prop_concat_size ((size1, t1), (size2, t2)) =
+  let t = I.concat t1 t2 in
+  Int.equal (I.size t) (size1 + size2)
+
+let mask_of_size size = (1 lsl size) - 1
+let add_mod size a b = (a + b) land mask_of_size size
+let sub_mod size a b = (a - b) land mask_of_size size
+let mul_mod size a b = (a * b) land mask_of_size size
+let udiv_opt _size a b = if b = 0 then None else Some (a / b)
+let urem_opt _size a b = if b = 0 then None else Some (a mod b)
+
+let to_signed size n =
+  let sign_bit = 1 lsl (size - 1) in
+  if (n land sign_bit) = 0 then n
+  else n - (1 lsl size)
+
+let sdiv_opt size a b =
+  if b = 0 then None else
+    let sa = to_signed size a in
+    let sb = to_signed size b in
+    if sb = 0 then None else
+      let q = sa / sb in
+      (* re-wrap as unsigned *)
+      let res = q land mask_of_size size in
+      Some res
+
+let srem_opt size a b =
+  if b = 0 then None else
+    let sa = to_signed size a in
+    let sb = to_signed size b in
+    if sb = 0 then None else
+      let m = Bv.modulus size in
+      let ba = Bv.(int sa mod m) in
+      let bb = Bv.(int sb mod m) in
+      let r = Bv.(srem ba bb mod m) in
+      Some (Bv.to_int r)
+
+let logand_mod _size a b = a land b
+let logor_mod  _size a b = a lor b
+let logxor_mod _size a b = a lxor b
+
+let shl_opt size a b =
+  if b >= size then None else Some ((a lsl b) land mask_of_size size)
+
+let lshr_opt size a b =
+  if b >= size then None else Some (a lsr b)
+
+let ashr_opt size a b =
+  if b >= size then None else
+    let sa = to_signed size a in
+    let shifted = sa asr b in
+    Some (shifted land mask_of_size size)
+
+let popcount size a =
+  let a = a land mask_of_size size in
+  let rec loop x acc = if x = 0 then acc
+    else loop (x land (x - 1)) (acc + 1) in
+  loop a 0
+
+let bv_clz x n =
+  let x = Bv.to_bigint x in
+  let rec aux i =
+    if i < 0 then n
+    else if Z.testbit x i then n - i - 1
+    else aux (i - 1) in
+  aux (n - 1)
+
+let bv_ctz x n =
+  let x = Bv.to_bigint x in
+  let rec aux i =
+    if i >= n then n
+    else if Z.testbit x i then i + 1
+    else aux (i + 1) in
+  aux 0
+
+let clz size a = bv_clz Bv.(int a mod modulus size) size
+let ctz size a = bv_ctz Bv.(int a mod modulus size) size
+
+let prop_add_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.add
+    ~concrete_op:(fun size a b -> Some (add_mod size a b))
+    t1 t2
+
+let prop_sub_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.sub
+    ~concrete_op:(fun size a b -> Some (sub_mod size a b))
+    t1 t2
+
+let prop_mul_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.mul
+    ~concrete_op:(fun size a b -> Some (mul_mod size a b))
+    t1 t2
+
+let prop_udiv_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.udiv
+    ~concrete_op:udiv_opt
+    t1 t2
+
+let prop_urem_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.urem
+    ~concrete_op:urem_opt
+    t1 t2
+
+let prop_sdiv_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.sdiv
+    ~concrete_op:sdiv_opt
+    t1 t2
+
+let prop_srem_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.srem
+    ~concrete_op:srem_opt
+    t1 t2
+
+let prop_logand_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.logand
+    ~concrete_op:(fun size a b -> Some (logand_mod size a b))
+    t1 t2
+
+let prop_logor_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.logor
+    ~concrete_op:(fun size a b -> Some (logor_mod size a b))
+    t1 t2
+
+let prop_logxor_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.logxor
+    ~concrete_op:(fun size a b -> Some (logxor_mod size a b))
+    t1 t2
+
+let prop_lsl_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.logical_shift_left
+    ~concrete_op:shl_opt
+    t1 t2
+
+let prop_lsr_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.logical_shift_right
+    ~concrete_op:lshr_opt
+    t1 t2
+
+let prop_asr_sound (size, (t1, t2)) =
+  oracle_sound_binop
+    ~size
+    ~interval_op:I.arithmetic_shift_right
+    ~concrete_op:ashr_opt
+    t1 t2
+
+let prop_lnot_sound (size, t) =
+  oracle_sound_unop
+    ~size
+    ~interval_op:I.lnot
+    ~concrete_op:(fun size a -> Some ((lnot a) land mask_of_size size))
+    t
+
+let prop_neg_sound (size, t) =
+  oracle_sound_unop
+    ~size
+    ~interval_op:I.neg
+    ~concrete_op:(fun size a -> Some ((0 - a) land ((1 lsl size) - 1)))
+    t
+
+let prop_clz_sound (size, t) =
+  oracle_sound_unop
+    ~size
+    ~interval_op:(I.clz ~zero_is_poison:false)
+    ~concrete_op:(fun size a -> Some (clz size a))
+    t
+
+let prop_ctz_sound (size, t) =
+  oracle_sound_unop
+    ~size
+    ~interval_op:(I.ctz ~zero_is_poison:false)
+    ~concrete_op:(fun size a -> Some (ctz size a))
+    t
+
+let prop_popcnt_sound (size, t) =
+  oracle_sound_unop
+    ~size
+    ~interval_op:I.popcnt
+    ~concrete_op:(fun size a -> Some (popcount size a))
+    t
+
+let qc1 sexp name gen prop =
+  match
+    Q.test_or_error gen
+      ~seed:(`Deterministic name)
+      ~trials:3000
+      ~f:(fun x -> if prop x then Ok () else
+             Or_error.error_s Sexp.(List [Atom "failed"; sexp x]))
+  with Ok () -> () | Error e -> Error.raise e
+
+let sexp_binop x = [%sexp (x : int * (I.t * I.t))]
+let sexp_unop x = [%sexp (x : int * I.t)]
+
+let%test_unit "add preserves size" =
+  qc1 sexp_binop "add-size" gen_interval_pair (prop_binop_size_stable I.add)
+
+let%test_unit "sub preserves size" =
+  qc1 sexp_binop "sub-size" gen_interval_pair (prop_binop_size_stable I.sub)
+
+let%test_unit "mul preserves size" =
+  qc1 sexp_binop "mul-size" gen_interval_pair (prop_binop_size_stable I.mul)
+
+let%test_unit "size preserved" =
+  qc1 sexp_unop "size" gen_sized_interval prop_size_preserved
+
+let%test_unit "bounds valid" =
+  qc1 sexp_unop "bounds" gen_sized_interval prop_bounds_valid
+
+let%test_unit "singleton roundtrip" =
+  qc1 sexp_of_int "single" gen_size prop_single_roundtrip
+
+let%test_unit "empty singleton consistency" =
+  qc1 sexp_unop "empty-single" gen_sized_interval prop_empty_singleton_consistency
+
+let%test_unit "intersect commutative" =
+  qc1 sexp_binop "intersect-comm" gen_interval_pair prop_intersect_comm
+
+let%test_unit "union commutative" =
+  qc1 sexp_binop "union-comm" gen_interval_pair prop_union_sound
+
+let%test_unit "inverse involutive" =
+  qc1 sexp_unop "inverse" gen_sized_interval prop_inverse_involutive
+
+let%test_unit "contains → contains single" =
+  qc1 sexp_binop "contains-single" gen_interval_pair prop_contains_implies_single_contains
+
+let%test_unit "add sound" =
+  qc1 sexp_binop "bv-add" gen_small_interval_pair prop_add_sound
+
+let%test_unit "sub sound" =
+  qc1 sexp_binop "bv-sub" gen_small_interval_pair prop_sub_sound
+
+let%test_unit "mul sound" =
+  qc1 sexp_binop "bv-mul" gen_small_interval_pair prop_mul_sound
+
+let%test_unit "udiv sound" =
+  qc1 sexp_binop "bv-udiv" gen_small_interval_pair prop_udiv_sound
+
+let%test_unit "urem sound" =
+  qc1 sexp_binop "bv-urem" gen_small_interval_pair prop_urem_sound
+
+let%test_unit "sdiv sound" =
+  qc1 sexp_binop "bv-sdiv" gen_small_interval_pair prop_sdiv_sound
+
+let%test_unit "srem sound" =
+  qc1 sexp_binop "bv-srem" gen_small_interval_pair prop_srem_sound
+
+let%test_unit "logand sound" =
+  qc1 sexp_binop "bv-logand" gen_small_interval_pair prop_logand_sound
+
+let%test_unit "logor sound" =
+  qc1 sexp_binop "bv-logor" gen_small_interval_pair prop_logor_sound
+
+let%test_unit "logxor sound" =
+  qc1 sexp_binop "bv-logxor" gen_small_interval_pair prop_logxor_sound
+
+let%test_unit "lsl sound" =
+  qc1 sexp_binop "bv-lsl" gen_small_interval_pair prop_lsl_sound
+
+let%test_unit "lsr sound" =
+  qc1 sexp_binop "bv-lsr" gen_small_interval_pair prop_lsr_sound
+
+let%test_unit "asr sound" =
+  qc1 sexp_binop "bv-asr" gen_small_interval_pair prop_asr_sound
+
+let%test_unit "neg sound" =
+  qc1 sexp_unop "bv-neg" gen_small_sized_interval prop_neg_sound
+
+let%test_unit "lnot sound" =
+  qc1 sexp_unop "bv-lnot" gen_small_sized_interval prop_lnot_sound
+
+let%test_unit "clz sound" =
+  qc1 sexp_unop "bv-clz" gen_small_sized_interval prop_clz_sound
+
+let%test_unit "ctz sound" =
+  qc1 sexp_unop "bv-ctz" gen_small_sized_interval prop_ctz_sound
+
+let%test_unit "popcnt sound" =
+  qc1 sexp_unop "bv-popcnt" gen_small_sized_interval prop_popcnt_sound
diff --git a/src/test/test_float32.ml b/src/test/test_float32.ml
new file mode 100644
index 00000000..00051b95
--- /dev/null
+++ b/src/test/test_float32.ml
@@ -0,0 +1,136 @@
+open Core
+open Cgen.Float32
+
+module Q = Quickcheck
+
+let gen_float32 = Q.Generator.map Int32.quickcheck_generator ~f:of_bits
+let gen_pair = Q.Generator.both gen_float32 gen_float32
+
+let f64 x = to_float x
+let near ?(eps = 1e-5) a b = Float.(abs (f64 a - f64 b) <= eps)
+
+let prop_add_approx a b =
+  is_nan a || is_nan b || is_inf a || is_inf b ||
+  let r = add a b in
+  let e = of_float Float.(f64 a + f64 b) in
+  is_nan r || is_inf r || is_nan e || is_inf e || near r e
+
+let prop_sub_approx a b =
+  is_nan a || is_nan b || is_inf a || is_inf b ||
+  let r = sub a b in
+  let e = of_float Float.(f64 a - f64 b) in
+  is_nan r || is_inf r || is_nan e || is_inf e || near r e
+
+let prop_mul_approx a b =
+  is_nan a || is_nan b || is_inf a || is_inf b ||
+  let r = mul a b in
+  let e = of_float Float.(f64 a * f64 b) in
+  is_nan r || is_inf r || is_nan e || is_inf e || near r e
+
+let prop_div_approx a b =
+  is_nan a || is_nan b || is_inf a || is_inf b || Float.(f64 b = 0.0) ||
+  let r = div a b in
+  let e = of_float Float.(f64 a / f64 b) in
+  is_nan r || is_inf r || is_nan e || is_inf e || near r e
+
+let prop_neg_approx x =
+  let r = neg x in
+  let fx = to_float x in
+  let e = of_float Float.(-fx) in
+  if is_nan x then is_nan r
+  else if is_inf x
+  then is_inf r && Bool.equal (is_negative r) (not @@ is_negative x)
+  else near r e
+
+let prop_compare_consistent a b =
+  is_unordered a b ||
+  let c = compare a b in
+  let eq = Int.(c = 0) in
+  let lt = Int.(c < 0) in
+  let gt = Int.(c > 0) in
+  Bool.equal eq (a = b) &&
+  Bool.equal (not eq) (a <> b) &&
+  Bool.equal lt (a < b) &&
+  Bool.equal (lt || eq) (a <= b) &&
+  Bool.equal gt (a > b) &&
+  Bool.equal (gt || eq) (a >= b)
+
+let prop_is_zero x = Bool.equal (is_zero x) Float.(f64 x = 0.0)
+
+let prop_is_negative x =
+  let sign_bit = Int32.(bits x land 0x8000_0000l <> 0l) in
+  Bool.equal (is_negative x) sign_bit
+
+let prop_is_inf x = Bool.equal (is_inf x) (Float.is_inf @@ f64 x)
+let prop_is_nan x = Bool.equal (is_nan x) (Float.is_nan @@ f64 x)
+
+let prop_bits_roundtrip x =
+  let b = bits x in
+  let x' = of_bits b in
+  Int32.equal b (bits x')
+
+let prop_string_roundtrip x =
+  let s = to_string x in
+  let x' = of_string s in
+  if is_nan x then is_nan x'
+  else if is_inf x
+  then is_inf x' && Bool.(is_negative x = is_negative x')
+  else not @@ is_nan x'
+
+let prop_int_conversions x =
+  ignore (to_int8   x  : int);
+  ignore (to_int16  x  : int);
+  ignore (to_int32  x  : int32);
+  ignore (to_int64  x  : int64);
+  ignore (to_uint8  x  : int);
+  ignore (to_uint16 x  : int);
+  ignore (to_uint32 x  : int32);
+  ignore (to_uint64 x  : int64);
+  ignore (of_int8   0  : t);
+  ignore (of_int16  0  : t);
+  ignore (of_int32  0l : t);
+  ignore (of_int64  0L : t);
+  ignore (of_uint8  0  : t);
+  ignore (of_uint16 0  : t);
+  ignore (of_uint32 0l : t);
+  ignore (of_uint64 0L : t);
+  true
+
+(* prints failing inputs! *)
+let qc1 name gen prop =
+  match
+    Q.test_or_error gen
+      ~seed:(`Deterministic name)
+      ~trials:5000
+      ~f:(fun x ->
+          if prop x then Ok ()
+          else Or_error.error_s [%sexp "failed", (x : t)])
+  with
+  | Ok () -> ()
+  | Error e -> Error.raise e
+
+let qc2 name gen prop =
+  match
+    Q.test_or_error gen
+      ~seed:(`Deterministic name)
+      ~trials:5000
+      ~f:(fun (a, b) ->
+          if prop a b then Ok ()
+          else Or_error.error_s [%sexp "failed", (a : t), (b : t)])
+  with
+  | Ok () -> ()
+  | Error e -> Error.raise e
+
+let%test_unit "bits roundtrip" = qc1 "bits" gen_float32 prop_bits_roundtrip
+let%test_unit "add approx" = qc2 "add" gen_pair prop_add_approx
+let%test_unit "sub approx" = qc2 "sub" gen_pair prop_sub_approx
+let%test_unit "mul approx" = qc2 "mul" gen_pair prop_mul_approx
+let%test_unit "div approx" = qc2 "div" gen_pair prop_div_approx
+let%test_unit "neg approx" = qc1 "neg" gen_float32 prop_neg_approx
+let%test_unit "compare consistency" = qc2 "cmp" gen_pair prop_compare_consistent
+let%test_unit "is_zero" = qc1 "zero" gen_float32 prop_is_zero
+let%test_unit "is_negative" = qc1 "neg" gen_float32 prop_is_negative
+let%test_unit "is_inf" = qc1 "inf" gen_float32 prop_is_inf
+let%test_unit "is_nan" = qc1 "nan" gen_float32 prop_is_nan
+let%test_unit "int conversions" = qc1 "int" gen_float32 prop_int_conversions
+let%test_unit "string roundtrip" = qc1 "str" gen_float32 prop_string_roundtrip
diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml
index 08b24787..c6a57f0e 100644
--- a/src/test/test_opt.ml
+++ b/src/test/test_opt.ml
@@ -2,17 +2,6 @@ open Core
 open OUnit2
 open Cgen
 
-let fmt s =
-  (* Ignore lines starting with a double comment *)
-  let s =
-    String.split_lines s |> List.filter ~f:(fun ln ->
-        not @@ String.is_prefix ln ~prefix:";;") |>
-    String.concat in
-  (* Ignore returns/newlines/tabs/spaces *)
-  String.filter s ~f:(function
-      | '\r' | '\n' | '\t' | ' ' -> false
-      | _ -> true)
-
 let from_file filename =
   let open Context.Syntax in
   let* m = Parse.Virtual.from_file filename in
@@ -22,16 +11,19 @@ let from_file filename =
 (* Toggle this to overwrite cases that differ. *)
 let overwrite = false
 
-let compare_outputs filename' expected p' =
-  if String.(fmt p' <> fmt expected) then
+let compare_outputs ?(chop_end = true) filename' expected p' =
+  let expected' = if chop_end
+    then String.chop_suffix_if_exists expected ~suffix:"\n"
+    else expected in
+  if String.(p' <> expected') then
     if overwrite then
       (* Assume we're being tested via `dune test`, which runs with
          "_build/default/test/" as the CWD. *)
       Out_channel.write_all ("../../../test/" ^ filename') ~data:(p' ^ "\n")
     else
-      let expected = String.chop_suffix_if_exists expected ~suffix:"\n" in
-      let diff = Odiff.strings_diffs expected p' in
-      let msg = Format.sprintf "Diff:\n\n%s" (Odiff.string_of_diffs diff) in
+      let diff = Odiff.strings_diffs expected' p' in
+      let msg = Format.sprintf "Diff (%s):\n\n%s"
+          filename' (Odiff.string_of_diffs diff) in
       assert_failure msg
 
 let from_file_abi filename =
@@ -41,20 +33,29 @@ let from_file_abi filename =
   let* () = Context.iter_seq_err (Virtual.Abi.Module.funs m) ~f:Passes.Ssa.check_abi in
   Passes.optimize_abi m
 
-let test name _ =
+let test ?(f = from_file) name _ =
   let filename = Format.sprintf "data/opt/%s.vir" name in
   let filename' = filename ^ ".opt" in
   let expected = In_channel.read_all filename' in
   Context.init Machine.X86.Amd64_sysv.target |>
   Context.eval begin
     let open Context.Syntax in
-    let* _, m = from_file filename in
+    let* _, m = f filename in
     let* () = Virtual.Module.funs m |> Context.iter_seq_err ~f:Passes.Ssa.check in
     !!(Format.asprintf "%a" Virtual.Module.pp m)
   end |> function
   | Ok p' -> compare_outputs filename' expected p'
   | Error err -> assert_failure @@ Format.asprintf "%a" Error.pp err
 
+let coalesce_only filename =
+  let open Context.Syntax in
+  let* m = Parse.Virtual.from_file filename in
+  let* tenv, m = Passes.initialize m in
+  let*? m = Virtual.Module.map_funs_err m ~f:Passes.Coalesce_slots.run in
+  let*? m = Virtual.Module.map_funs_err m ~f:Passes.Resolve_constant_blk_args.run in
+  let*? m = Virtual.Module.map_funs_err m ~f:Passes.Remove_dead_vars.run in
+  !!(tenv, m)
+
 let test_abi target ext name _ =
   let filename = Format.sprintf "data/opt/%s.vir" name in
   let filename' = Format.sprintf "%s.opt.%s" filename ext in
@@ -175,14 +176,7 @@ let test_native target abi ext name _ =
     end;
     if Shexp_process.(eval @@ file_exists driver_output) then
       let contents = In_channel.read_all driver_output in
-      let msg = Format.asprintf
-          "Unequal output\n\
-           ---------------------------\n\
-           Got:\n%s\n\
-           ---------------------------\n\
-           Expected:\n%s\n"
-          p.stdout contents in
-      assert_bool msg @@ String.(contents = p.stdout)
+      compare_outputs ~chop_end:false driver_output contents p.stdout
 
 (* Specific ABI lowering tests. *)
 let test_sysv = test_abi Machine.X86.Amd64_sysv.target "sysv"
@@ -307,6 +301,14 @@ let opt_suite = "Test optimizations" >::: [
     "No sinking" >:: test "nosink";
     "Spill test 2" >:: test "spill2";
     "Parallel moves" >:: test "parallel";
+    "SROA" >:: test "sroa";
+    "Sink 1" >:: test "sink1";
+    "Escape 1" >:: test "esc1";
+    "Slot coalesce 1 (no other opts)" >:: test ~f:coalesce_only "coalesce1";
+    "Slot coalesce 1 (full opts)" >:: test "coalesce1a";
+    "Bad load 1" >:: test "badload1";
+    "Bad load 2" >:: test "badload2";
+    "Binary search" >:: test "bsearch";
   ]
 
 let abi_suite = "Test ABI lowering" >::: [
@@ -339,6 +341,7 @@ let abi_suite = "Test ABI lowering" >::: [
     "Collatz recursive (SysV)" >:: test_sysv "collatz_rec";
     "Ackermann (SysV)" >:: test_sysv "ackermann";
     "Quicksort (SysV)" >:: test_sysv "qsort";
+    "Binary search (SysV)" >:: test "bsearch";
   ]
 
 let isel_suite = "Test instruction selection" >::: [
@@ -371,6 +374,7 @@ let isel_suite = "Test instruction selection" >::: [
     "Ackermann (SysV AMD64)" >:: test_sysv_amd64 "ackermann";
     "Quicksort (SysV AMD64)" >:: test_sysv_amd64 "qsort";
     "Parallel moves (SysV AMD64)" >:: test_sysv_amd64 "parallel";
+    "Binary search (SysV AMD64)" >:: test_sysv_amd64 "bsearch";
   ]
 
 let regalloc_suite = "Test register allocation" >::: [
@@ -413,6 +417,9 @@ let regalloc_suite = "Test register allocation" >::: [
     "Analyze array (SysV AMD64)" >:: test_sysv_amd64_regalloc "analyze_array";
     "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_regalloc "promote2-partial";
     "Parallel moves (SysV AMD64)" >:: test_sysv_amd64_regalloc "parallel";
+    "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_regalloc "sumphi";
+    "Variadic sum (SysV AMD64)" >:: test_sysv_amd64_regalloc "vasum";
+    "Binary search (SysV AMD64)" >:: test_sysv_amd64_regalloc "bsearch";
   ]
 
 let native_suite = "Test native code" >::: [
@@ -430,6 +437,7 @@ let native_suite = "Test native code" >::: [
     "Quicksort, swap inlined (SysV AMD64)" >:: test_sysv_amd64_native "qsort_inline_swap";
     "Spill test 1 (SysV AMD64)" >:: test_sysv_amd64_native "spill1";
     "Variadic function arguments 1 (SysV AMD64)" >:: test_sysv_amd64_native "vaarg1";
+    "Variadic function arguments 2 (SysV AMD64)" >:: test_sysv_amd64_native "vaarg2";
     "Palindrome (SysV AMD64)" >:: test_sysv_amd64_native "palindrome";
     "Integer pow (SysV AMD64)" >:: test_sysv_amd64_native "int_pow";
     "AND test (SysV AMD64)" >:: test_sysv_amd64_native "and_test";
@@ -438,6 +446,10 @@ let native_suite = "Test native code" >::: [
     "Analyze array (SysV AMD64)" >:: test_sysv_amd64_native "analyze_array";
     "Unsigned remainder by 7 (SysV AMD64)" >:: test_sysv_amd64_native "uremby7";
     "Slot promotion 2 (GCD, partial) (SysV AMD64)" >:: test_sysv_amd64_native "promote2-partial";
+    "Struct in a block argument (SysV AMD64)" >:: test_sysv_amd64_native "sumphi";
+    "Returning, passing, and dereferencing a struct (SysV AMD64)" >:: test_sysv_amd64_native "unref";
+    "Sink 1 (SysV AMD64)" >:: test_sysv_amd64_native "sink1";
+    "Binary search (SysV AMD64)" >:: test_sysv_amd64_native "bsearch";
   ]
 
 let () = run_test_tt_main @@ test_list [
diff --git a/src/test/test_type.ml b/src/test/test_type.ml
index a2e230ba..93c1c75d 100644
--- a/src/test/test_type.ml
+++ b/src/test/test_type.ml
@@ -47,6 +47,45 @@ let c9 : Type.compound = `compound ("c9", None, [
     `elt (`i32, 1);
   ])
 
+let c10 : Type.compound = `compound ("c10", None, [
+    `name ("c1", 2);
+  ])
+
+let c11 : Type.compound = `compound ("c11", None, [
+    `elt (`i16, 3);
+    `elt (`i32, 1);
+  ])
+
+let c12 : Type.compound = `compound ("c12", Some 8, [
+    `elt (`i32, 1);
+  ])
+
+let c13 : Type.compound = `compound ("c13", None, [
+    `elt (`i8, 3);
+  ])
+
+let c14 : Type.compound = `compound ("c14", Some 8, [
+    `elt (`i16, 1);
+  ])
+
+let c15 : Type.compound = `compound ("c15", Some 8, [
+    `elt (`i8, 1);
+    `elt (`i16, 1);
+    `elt (`i32, 1);
+    `elt (`i8, 1);
+  ])
+
+let opaque4 : Type.compound = `opaque ("opaque4", 4, 4)
+
+let c16 : Type.compound = `compound ("c16", Some 4, [
+    `name ("opaque4", 1);
+    `name ("opaque4", 1);
+  ])
+
+let c1arr : Type.compound = `compound ("c1arr", None, [
+    `name ("c1", 3);
+  ])
+
 let sexp = Fn.compose Type.Layout.t_of_sexp Sexp.of_string
 
 let l1 = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2))))"
@@ -58,6 +97,15 @@ let l6 = sexp "((align 8) (data ()))"
 let l7 = sexp "((align 8) (data (i32 (pad 4) i32 (pad 4))))"
 let l8 = sexp "((align 1) (data ()))"
 let l9 = sexp "((align 4) (data (f32 i8 i8 (pad 2) i32)))"
+let l10 = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2))))"
+let l11 = sexp "((align 4) (data (i16 i16 i16 (pad 2) i32)))"
+let l12 = sexp "((align 8) (data (i32 (pad 4))))"
+let l13 = sexp "((align 1) (data (i8 i8 i8)))"
+let l14 = sexp "((align 8) (data (i16 (pad 6))))"
+let l15 = sexp "((align 8) (data (i8 (pad 1) i16 i32 i8 (pad 7))))"
+let lopaque4 = sexp "((align 4) (data ((opaque 4))))"
+let l16 = sexp "((align 4) (data ((opaque 8))))"
+let l1arr = sexp "((align 4) (data (i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2) i32 i32 i32 i32 i8 i8 (pad 2))))"
 
 let gamma = function
   | "c1" -> l1
@@ -69,6 +117,15 @@ let gamma = function
   | "c7" -> l7
   | "c8" -> l8
   | "c9" -> l9
+  | "c10" -> l10
+  | "c11" -> l11
+  | "c12" -> l12
+  | "c13" -> l13
+  | "c14" -> l14
+  | "c15" -> l15
+  | "opaque4" -> lopaque4
+  | "c16" -> l16
+  | "c1arr" -> l1arr
   | s -> failwithf "gamma: %s is undefined" s ()
 
 let _sexp_of_layout l = Sexp.List (List.map l ~f:Type.sexp_of_datum)
@@ -97,6 +154,15 @@ let test_c6 _ = test_sizeof_compound c6 ~expected:0
 let test_c7 _ = test_sizeof_compound c7 ~expected:16
 let test_c8 _ = test_sizeof_compound c8 ~expected:0
 let test_c9 _ = test_sizeof_compound c9 ~expected:12
+let test_c10 _ = test_sizeof_compound c10 ~expected:40
+let test_c11 _ = test_sizeof_compound c11 ~expected:12
+let test_c12 _ = test_sizeof_compound c12 ~expected:8
+let test_c13 _ = test_sizeof_compound c13 ~expected:3
+let test_c14 _ = test_sizeof_compound c14 ~expected:8
+let test_c15 _ = test_sizeof_compound c15 ~expected:16
+let test_opaque4 _ = test_sizeof_compound opaque4 ~expected:4
+let test_c16 _ = test_sizeof_compound c16 ~expected:8
+let test_c1arr _ = test_sizeof_compound c1arr ~expected:60
 
 let suite = "Test types" >::: [
     "Test sizeof compound 1" >:: test_c1;
@@ -108,6 +174,15 @@ let suite = "Test types" >::: [
     "Test sizeof compound 7" >:: test_c7;
     "Test sizeof compound 8" >:: test_c8;
     "Test sizeof compound 9" >:: test_c9;
+    "Test sizeof compound 10" >:: test_c10;
+    "Test sizeof compound 11" >:: test_c11;
+    "Test sizeof compound 12" >:: test_c12;
+    "Test sizeof compound 13" >:: test_c13;
+    "Test sizeof compound 14" >:: test_c14;
+    "Test sizeof compound 15" >:: test_c15;
+    "Test sizeof compound opaque4" >:: test_opaque4;
+    "Test sizeof compound 16" >:: test_c16;
+    "Test sizeof compound 1 (array)" >:: test_c1arr;
   ]
 
 let () = run_test_tt_main suite
diff --git a/tools/clang-format-all.sh b/tools/clang-format-all.sh
new file mode 100755
index 00000000..516180d1
--- /dev/null
+++ b/tools/clang-format-all.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# C/C++ sources
+files_to_indent='.*\.\(c\|h\|cc\|cpp\|cxx\|hpp\)$'
+
+VERSION=$(clang-format --version 2>/dev/null)
+
+if [ -z "$VERSION" ]; then
+    echo "Please install clang-format"
+    exit 1
+else
+    echo "Reindenting with $VERSION"
+fi
+
+git ls-files | grep -e "$files_to_indent" | while read file
+do
+    clang-format -i "$file"
+done
diff --git a/tools/ocp-indent-all.sh b/tools/ocp-indent-all.sh
index 1020d842..a3564f73 100755
--- a/tools/ocp-indent-all.sh
+++ b/tools/ocp-indent-all.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# a regex for files to indent
+# OCaml sources
 files_to_indent='.*\.\(ml\|mli\)$'
 
 VERSION=$(ocp-indent --version)