diff --git a/src/lib/egraph/extractor/extractor_core.ml b/src/lib/egraph/extractor/extractor_core.ml index 7c61a188..04b72215 100644 --- a/src/lib/egraph/extractor/extractor_core.ml +++ b/src/lib/egraph/extractor/extractor_core.ml @@ -91,6 +91,13 @@ let rec pp_ext ppf = function Format.fprintf ppf "(%a %a %a)" pp_prov p Enode.pp_op op (Format.pp_print_list ~pp_sep pp_ext) args +(* Pure cost of an operation. + + It's important to note that some of these costs are relative + to what we can rewrite the operation into. For example, + integer division/remainder has an enormous cost because we + want to incentivize rewrites a la Hacker's Delight. +*) let op_cost : Enode.op -> cost = function | Oint (i, t) -> (* In practice, a negative constant might need some work to @@ -116,7 +123,7 @@ let op_cost : Enode.op -> cost = function | Ovastart _ -> Cost.pure 0 | Obr | Ovar _ -> Cost.pure 2 | Osw _ | Obinop #Insn.bitwise_binop | Ounop _ -> Cost.pure 3 - | Obinop (`div _ | `udiv _ | `rem _ | `urem _) -> Cost.pure 90 + | Obinop (`div _ | `udiv _ | `rem _ | `urem _) -> Cost.pure 97 | Obinop (`mul _) -> Cost.pure 42 | Obinop (`mulh _ | `umulh _) -> Cost.pure 11 | Obinop _ -> Cost.pure 4 diff --git a/src/lib/machine/x86/x86_amd64_common.ml b/src/lib/machine/x86/x86_amd64_common.ml index 82b377a2..f62bae29 100644 --- a/src/lib/machine/x86/x86_amd64_common.ml +++ b/src/lib/machine/x86/x86_amd64_common.ml @@ -631,7 +631,7 @@ module Insn = struct This is morally equivalent to: - `x := cc ? b : a` + `a := cc ? b : a` *) -> Set.union (rset' [`rflags]) diff --git a/src/lib/magic_div.ml b/src/lib/magic_div.ml index 96c102ad..5ed3e9ca 100644 --- a/src/lib/magic_div.ml +++ b/src/lib/magic_div.ml @@ -67,10 +67,11 @@ let signed d t = then B.(q2 + one, r2 - ad) else q2, r2 in let delta = B.(ad - r2) in - if Bv.(q1 < delta || (q1 = delta && r1 = zero)) then + if Bv.(q1 < delta || (q1 = delta && r1 = zero)) + then loop p q1 r1 q2 r2 + else let m = B.(q2 + one) in - (if B.msb d then B.neg m else m), p - sz - else loop p q1 r1 q2 r2 in + (if B.msb d then B.neg m else m), p - sz in let q1 = B.(mins / anc) in let r1 = B.(mins - q1 * anc) in let q2 = B.(mins / ad) in diff --git a/src/lib/passes/simplify_cfg/simplify_cfg.ml b/src/lib/passes/simplify_cfg/simplify_cfg.ml index 1b96cfd3..a8f39041 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg.ml @@ -2,6 +2,7 @@ open Core open Virtual open Simplify_cfg_common +module Ifc = Simplify_cfg_ifc module Brsel = Simplify_cfg_brsel module Merge_blks = Simplify_cfg_merge_blks module Contract = Simplify_cfg_contract @@ -26,6 +27,7 @@ let run tenv fn = || Contract.run env || Short_circ.run env || Duplicate_br.run env + || Ifc.run tenv env fn || brsel then loop @@ recompute_cfg env @@ update_fn env fn else !!fn in diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_common.ml b/src/lib/passes/simplify_cfg/simplify_cfg_common.ml index 7cc347b6..3e5239c2 100644 --- a/src/lib/passes/simplify_cfg/simplify_cfg_common.ml +++ b/src/lib/passes/simplify_cfg/simplify_cfg_common.ml @@ -42,14 +42,9 @@ let is_ret env l = match env.ret with | None -> false let update_fn env fn = - Func.blks fn |> - Seq.fold ~init:[] ~f:(fun acc b -> - let l = Blk.label b in - if Hashtbl.mem env.blks l then acc - else l :: acc) |> - Func.remove_blks_exn fn |> - Func.map_blks ~f:(fun b -> - Hashtbl.find_exn env.blks @@ Blk.label b) + Func.blks fn |> Seq.filter_map ~f:(fun b -> + Hashtbl.find env.blks @@ Blk.label b) |> + Seq.to_list |> Func.with_blks_exn fn let not_pseudo = Fn.non Label.is_pseudo diff --git a/src/lib/passes/simplify_cfg/simplify_cfg_ifc.ml b/src/lib/passes/simplify_cfg/simplify_cfg_ifc.ml new file mode 100644 index 00000000..a53edd29 --- /dev/null +++ b/src/lib/passes/simplify_cfg/simplify_cfg_ifc.ml @@ -0,0 +1,182 @@ +(* If-conversion canonicalization. + + For the `Brsel` pass, we handle the "degenerate diamond" + case for if-conversion, where the join block is targeted + by both arms of the branch for our header block. + + For other cases (e.g. a proper diamond, or a triangle), + we want to canonicalize the shape enough for other passes + to fire, so that `Brsel` can do the final part of if-conversion. +*) + +open Core +open Regular.Std +open Virtual +open Simplify_cfg_common + +module BR = Simplify_cfg_brsel + +type join = + | Diamond of { + pred1 : Label.t; + pred2 : Label.t; + hdr : Label.t; + } + | Triangle of { + pred : Label.t; + hdr : Label.t; + } + +let diamond p1 p2 h = Diamond {pred1 = p1; pred2 = p2; hdr = h} +let triangle p h = Triangle {pred = p; hdr = h} + +let pp_join ppf = function + | Diamond {pred1; pred2; hdr} -> + Format.fprintf ppf "(diamond (hdr %a) (pred1 %a) (pred2 %a))" + Label.pp hdr Label.pp pred1 Label.pp pred2 + | Triangle {pred; hdr} -> + Format.fprintf ppf "(triangle (hdr %a) (pred %a))" + Label.pp hdr Label.pp pred + +exception Not_hoistable + +let insn_cost : Insn.op -> int = function + (* Provably non-trapping div/rem gets a high cost. *) + | `bop (_, (`div #Type.imm | `rem #Type.imm), _, `int (i, _)) + when Bv.(i <> zero) -> 4 + (* Effectful or potentially trapping instructions cannot + be hoisted. *) + | `bop (_, (`div #Type.imm | `rem #Type.imm), _, _) + | `load _ + | `store _ + | #Insn.variadic + | `call _ + -> raise_notrace Not_hoistable + (* Cheap operations. *) + | `uop (_, #Insn.copy, _) + | `bop (_, #Insn.arith_binop, _, _) + | `bop (_, #Insn.bitwise_binop, _, _) + | `bop (_, #Insn.cmp, _, _) + | `uop (_, #Insn.arith_unop, _) + | `uop (_, #Insn.bitwise_unop, _) + -> 1 + (* Moderate operations. *) + | `uop (_, #Insn.cast, _) + | `sel _ + -> 2 + +let blk_cost b = + Blk.insns b |> Seq.map ~f:Insn.op |> + Seq.sum (module Int) ~f:insn_cost + +let max_join_args = 2 +let max_pred_cost = 4 + +let check_args tenv env fn b = + let args = Seq.to_list @@ Blk.args b in + List.length args <= max_join_args && try + List.iter args ~f:(fun x -> + ignore @@ BR.basicty tenv env fn x); + true + with BR.Non_basic -> false + +let (.?[]) env l = Hashtbl.find env.blks l +let (.![]) env l = Hashtbl.find_exn env.blks l + +let check_blk env l j = match env.?[l] with + | None -> false + | Some b -> + try match Blk.ctrl b with + | `jmp `label (l', args) when Label.(l' = j) -> + (* Ensure that this block has no parameters. + We have an invariant that, in its canonical + form, the only time a block can have parameters + is if it exists as a join point. *) + Seq.is_empty (Blk.args b) && + (* Ensure correct arity *) + Seq.length (Blk.args env.![j]) = List.length args && + (* Ensure that the block isn't too expensive to copy + to the header. *) + blk_cost b <= max_pred_cost + | _ -> false + with Not_hoistable -> false + +let check_hdr env l s1 s2 = match env.?[l] with + | None -> false + | Some b -> match Blk.ctrl b with + | `br (_, `label (l1, _), `label (l2, _)) -> + Label.((l1 = s1 && l2 = s2) || (l1 = s2 && l2 = s1)) + | _ -> false + +let plist env l = Seq.to_list @@ Cfg.Node.preds l env.cfg + +let find_join tenv env fn = + with_return @@ fun {return} -> + Hashtbl.iteri env.blks ~f:(fun ~key ~data:b -> + let found j = return @@ Some (key, j) in + if check_args tenv env fn b then match plist env key with + | [p1; p2] when Label.(p1 <> p2) -> + begin match plist env p1, plist env p2 with + | [p1'], _ when Label.(p1' = p2) -> + if check_blk env p1 key + && check_hdr env p2 p1 key + then found @@ triangle p1 p2 + | _, [p2'] when Label.(p2' = p1) -> + if check_blk env p2 key + && check_hdr env p1 p2 key + then found @@ triangle p2 p1 + | [p1'], [p2'] when Label.(p1' = p2') -> + if check_blk env p1 key + && check_blk env p2 key + && check_hdr env p1' p1 p2 + then found @@ diamond p1 p2 p1' + | _ -> () + end + | _ -> ()); + None + +let canonicalize_diamond env p1 p2 hdr = + let b1 = env.![p1] and b2 = env.![p2] and h = env.![hdr] in + let ctrl = match Blk.(ctrl h, ctrl b1, ctrl b2) with + | `br (c, `label (l1, _), `label (l2, _)), + `jmp (`label _ as d1), + `jmp (`label _ as d2) -> + if Label.(l1 = p1 && l2 = p2) then + `br (c, d1, d2) + else if Label.(l1 = p2 && l2 = p1) then + `br (c, d2, d1) + else assert false + | _ -> assert false in + let i1 = Seq.append (Blk.insns b1) (Blk.insns b2) in + let h' = Blk.append_insns h @@ Seq.to_list i1 in + let h' = Blk.with_ctrl h' ctrl in + Hashtbl.set env.blks ~key:hdr ~data:h' + +let canonicalize_triangle env key pred hdr = + let b = env.![pred] and h = env.![hdr] in + let ctrl = match Blk.(ctrl h, ctrl b) with + | `br (c, `label (l1, a1), `label (l2, a2)), + `jmp (`label _ as d) -> + if Label.(key = l1 && pred = l2) then + `br (c, `label (l1, a1), d) + else if Label.(key = l2 && pred = l1) then + `br (c, d, `label (l2, a2)) + else assert false + | _ -> assert false in + let h' = Blk.append_insns h @@ Seq.to_list @@ Blk.insns b in + let h' = Blk.with_ctrl h' ctrl in + Hashtbl.set env.blks ~key:hdr ~data:h' + +let canonicalize env key = function + | Diamond {pred1; pred2; hdr} -> canonicalize_diamond env pred1 pred2 hdr + | Triangle {pred; hdr} -> canonicalize_triangle env key pred hdr + +let run tenv env fn = match find_join tenv env fn with + | None -> false + | Some (key, data) -> + canonicalize env key data; + Logs.debug (fun m -> + m "%s: join in $%s at %a: %a%!" + __FUNCTION__ (Func.name fn) + Label.pp key pp_join data); + true diff --git a/src/test/data/opt/and_test.vir.opt b/src/test/data/opt/and_test.vir.opt index 29a7af3b..34f715ea 100644 --- a/src/test/data/opt/and_test.vir.opt +++ b/src/test/data/opt/and_test.vir.opt @@ -2,45 +2,40 @@ module and_test export function w $foo(w %x, w %y) { @2: - %0 = and.w %x, 0x3_w ; @29 - %1 = eq.w %0, 0x0_w ; @30 - %11 = sel.w %1, %y, %x ; @40 - ret %11 + %2 = and.w %x, 0x3_w ; @31 + %3 = eq.w %2, 0x0_w ; @32 + %4 = sel.w %3, %y, %x ; @33 + ret %4 } export function w $bar(w %x, w %y) { @10: - %2 = and.w %x, 0x3_w ; @31 - %3 = eq.w %2, 0x0_w ; @32 - br %3, @11, @12 -@11: - %4 = add.w %y, 0x1_w ; @33 - jmp @15(%4) -@12: - %5 = add.w %x, 0x1_w ; @34 - jmp @15(%5) -@15(%r.1): - ret %r.1 + %5 = and.w %x, 0x3_w ; @34 + %6 = eq.w %5, 0x0_w ; @35 + %7 = add.w %y, 0x1_w ; @36 + %8 = add.w %x, 0x1_w ; @37 + %9 = sel.w %6, %7, %8 ; @38 + ret %9 } export function l $baz(l %x) { @18: - %6 = and.l %x, 0x3_l ; @35 - switch.l %6, @19 [0x1_l -> @20, - 0x2_l -> @21, - 0x3_l -> @22] + %10 = and.l %x, 0x3_l ; @39 + switch.l %10, @19 [0x1_l -> @20, + 0x2_l -> @21, + 0x3_l -> @22] @19: - %7 = add.l %x, 0x1_l ; @36 - jmp @24(%7) + %11 = add.l %x, 0x1_l ; @40 + jmp @24(%11) @20: - %8 = add.l %x, 0x2_l ; @37 - jmp @24(%8) + %12 = add.l %x, 0x2_l ; @41 + jmp @24(%12) @21: - %9 = add.l %x, 0x3_l ; @38 - jmp @24(%9) + %13 = add.l %x, 0x3_l ; @42 + jmp @24(%13) @22: - %10 = add.l %x, 0x4_l ; @39 - jmp @24(%10) + %14 = add.l %x, 0x4_l ; @43 + jmp @24(%14) @24(%r.1): ret %r.1 } diff --git a/src/test/data/opt/and_test.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/and_test.vir.opt.sysv.amd64.regalloc index db19282c..488618c3 100644 --- a/src/test/data/opt/and_test.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/and_test.vir.opt.sysv.amd64.regalloc @@ -2,50 +2,48 @@ module and_test export function $foo { ; returns: rax @2: - mov eax, edi ; @48 - and eax, 0x3_w ; @49 - cmove edi, esi ; @44 - mov eax, edi ; @41 - ret ; @42 + mov eax, edi ; @51 + and eax, 0x3_w ; @52 + cmove edi, esi ; @47 + mov eax, edi ; @44 + ret ; @45 } export function $bar { ; returns: rax @10: - test edi, 0x3_w ; @56 - je @11 ; @57 -@12: - lea eax, qword ptr [rdi + 0x1] ; @34 - jmp @15 ; @55 -@11: - lea eax, qword ptr [rsi + 0x1] ; @33 -@15: - ret ; @51 + mov ecx, edi ; @60 + and ecx, 0x3_w ; @61 + lea edx, qword ptr [rsi + 0x1] ; @36 + lea eax, qword ptr [rdi + 0x1] ; @37 + cmp ecx, 0x0_w ; @55 + cmove eax, edx ; @56 + ret ; @54 } export function $baz { ; returns: rax @18: - mov rax, rdi ; @86 - and rax, 0x3_l ; @87 - je @19 ; @76 - dec rax ; @78 - cmp rax, 0x2_l ; @79 - ja @19 ; @80 - lea rcx, qword ptr [rip + @74] ; @81 - movsxd rax, dword ptr [rcx + rax*4] ; @82 - add rax, rcx ; @83 - jmp rax ; @84 - .tbl @74 [@20, @21, @22] ; @85 + mov rax, rdi ; @84 + and rax, 0x3_l ; @85 + je @19 ; @74 + dec rax ; @76 + cmp rax, 0x2_l ; @77 + ja @19 ; @78 + lea rcx, qword ptr [rip + @72] ; @79 + movsxd rax, dword ptr [rcx + rax*4] ; @80 + add rax, rcx ; @81 + jmp rax ; @82 + .tbl @72 [@20, @21, @22] ; @83 @22: - lea rax, qword ptr [rdi + 0x4] ; @39 - jmp @24 ; @73 -@21: - lea rax, qword ptr [rdi + 0x3] ; @38 + lea rax, qword ptr [rdi + 0x4] ; @43 jmp @24 ; @71 -@20: - lea rax, qword ptr [rdi + 0x2] ; @37 +@21: + lea rax, qword ptr [rdi + 0x3] ; @42 jmp @24 ; @69 +@20: + lea rax, qword ptr [rdi + 0x2] ; @41 + jmp @24 ; @67 @19: - lea rax, qword ptr [rdi + 0x1] ; @36 + lea rax, qword ptr [rdi + 0x1] ; @40 @24: - ret ; @65 + ret ; @63 } diff --git a/src/test/data/opt/branchless.vir.opt b/src/test/data/opt/branchless.vir.opt index a7a6d766..bf48e885 100644 --- a/src/test/data/opt/branchless.vir.opt +++ b/src/test/data/opt/branchless.vir.opt @@ -2,7 +2,7 @@ module foo export function w $foo(w %x, w %y, w %z) { @2: - %0 = ne.w %x, 0x0_w ; @15 - %1 = sel.w %0, %y, %z ; @16 - ret %1 + %2 = ne.w %x, 0x0_w ; @17 + %3 = sel.w %2, %y, %z ; @18 + ret %3 } diff --git a/src/test/data/opt/collatz.vir.opt b/src/test/data/opt/collatz.vir.opt index 0d30d4d2..df7a2132 100644 --- a/src/test/data/opt/collatz.vir.opt +++ b/src/test/data/opt/collatz.vir.opt @@ -4,23 +4,18 @@ export function w $foo(w %n) { @2: jmp @3(%n, 0x0_w) @3(%x.2, %count.2): - %0 = gt.w %x.2, 0x1_w ; @27 - br %0, @6, @7 + %1 = gt.w %x.2, 0x1_w ; @28 + br %1, @6, @7 @6: - %1 = and.w %x.2, 0x1_w ; @28 - %2 = eq.w %1, 0x0_w ; @29 - br %2, @10, @11 -@10: + %2 = and.w %x.2, 0x1_w ; @29 + %3 = eq.w %2, 0x0_w ; @30 %4 = lsr.w %x.2, 0x1_w ; @31 - jmp @15(%4) -@11: %5 = lsl.w %x.2, 0x1_w ; @32 %6 = add.w %5, %x.2 ; @33 %7 = add.w %6, 0x1_w ; @34 - jmp @15(%7) -@15(%x.3): - %3 = add.w %count.2, 0x1_w ; @30 - jmp @3(%x.3, %3) + %8 = sel.w %3, %4, %7 ; @35 + %9 = add.w %count.2, 0x1_w ; @36 + jmp @3(%8, %9) @7: ret %count.2 } diff --git a/src/test/data/opt/collatz.vir.opt.sir b/src/test/data/opt/collatz.vir.opt.sir index 5bbdd8f2..491c3182 100644 --- a/src/test/data/opt/collatz.vir.opt.sir +++ b/src/test/data/opt/collatz.vir.opt.sir @@ -5,19 +5,16 @@ start: %x.2 = copy.w %n; %count.2 = copy.w 0x0_w; while gt.w %x.2, 0x1_w { - %1 = and.w %x.2, 0x1_w; - if eq.w %1, 0x0_w { - %4 = lsr.w %x.2, 0x1_w; - %x.3 = copy.w %4 - } else { - %5 = lsl.w %x.2, 0x1_w; - %6 = add.w %5, %x.2; - %7 = add.w %6, 0x1_w; - %x.3 = copy.w %7 - }; - %3 = add.w %count.2, 0x1_w; - %x.2 = copy.w %x.3; - %count.2 = copy.w %3 + %2 = and.w %x.2, 0x1_w; + %3 = eq.w %2, 0x0_w; + %4 = lsr.w %x.2, 0x1_w; + %5 = lsl.w %x.2, 0x1_w; + %6 = add.w %5, %x.2; + %7 = add.w %6, 0x1_w; + %8 = sel.w %3, %4, %7; + %9 = add.w %count.2, 0x1_w; + %x.2 = copy.w %8; + %count.2 = copy.w %9 }; ret %count.2 } diff --git a/src/test/data/opt/collatz.vir.opt.sysv b/src/test/data/opt/collatz.vir.opt.sysv index fd38f3a8..cc94b534 100644 --- a/src/test/data/opt/collatz.vir.opt.sysv +++ b/src/test/data/opt/collatz.vir.opt.sysv @@ -4,23 +4,18 @@ export function $foo(w %n/rdi) { @2: jmp @3(%n, 0x0_w) @3(%x.2, %count.2): - %0 = gt.w %x.2, 0x1_w ; @27 - br %0, @6, @7 + %1 = gt.w %x.2, 0x1_w ; @28 + br %1, @6, @7 @6: - %1 = and.w %x.2, 0x1_w ; @28 - %2 = eq.w %1, 0x0_w ; @29 - br %2, @10, @11 -@10: + %2 = and.w %x.2, 0x1_w ; @29 + %3 = eq.w %2, 0x0_w ; @30 %4 = lsr.w %x.2, 0x1_w ; @31 - jmp @15(%4) -@11: %5 = lsl.w %x.2, 0x1_w ; @32 %6 = add.w %5, %x.2 ; @33 %7 = add.w %6, 0x1_w ; @34 - jmp @15(%7) -@15(%x.3): - %3 = add.w %count.2, 0x1_w ; @30 - jmp @3(%x.3, %3) + %8 = sel.w %3, %4, %7 ; @35 + %9 = add.w %count.2, 0x1_w ; @36 + jmp @3(%8, %9) @7: ret rax/%count.2 } diff --git a/src/test/data/opt/collatz.vir.opt.sysv.amd64 b/src/test/data/opt/collatz.vir.opt.sysv.amd64 index f651c79e..4bc3dcd5 100644 --- a/src/test/data/opt/collatz.vir.opt.sysv.amd64 +++ b/src/test/data/opt/collatz.vir.opt.sysv.amd64 @@ -2,33 +2,28 @@ module foo export function $foo { ; returns: rax @2: - mov %n:w, edi ; @56 - mov %x.2:w, %n:w ; @57 - xor %count.2:w, %count.2:w ; @58 - jmp @3 ; @59 + mov %n:w, edi ; @53 + mov %x.2:w, %n:w ; @54 + xor %count.2:w, %count.2:w ; @55 + jmp @3 ; @56 @3: - cmp %x.2:w, 0x1_w ; @51 - ja @6 ; @52 - jmp @7 ; @53 + cmp %x.2:w, 0x1_w ; @48 + ja @6 ; @49 + jmp @7 ; @50 @7: - mov eax, %count.2:w ; @49 - ret ; @50 + mov eax, %count.2:w ; @46 + ret ; @47 @6: - test %x.2:w, 0x1_w ; @43 - je @10 ; @44 - jmp @11 ; @45 -@11: - lea %7:w, qword ptr [%x.2 + %x.2*2 + 0x1] ; @34 - mov %x.3:w, %7:w ; @41 - jmp @15 ; @42 -@10: + mov %2:w, %x.2:w ; @29 + and %2:w, 0x1_w ; @45 mov %4:w, %x.2:w ; @31 - shr %4:w, 0x1_b ; @40 - mov %x.3:w, %4:w ; @38 - jmp @15 ; @39 -@15: - lea %3:w, qword ptr [%count.2 + 0x1] ; @30 - mov %x.2:w, %x.3:w ; @35 - mov %count.2:w, %3:w ; @36 - jmp @3 ; @37 + shr %4:w, 0x1_b ; @42 + lea %7:w, qword ptr [%x.2 + %x.2*2 + 0x1] ; @34 + mov %8:w, %7:w ; @35 + cmp %2:w, 0x0_w ; @40 + cmove %8:w, %4:w ; @41 + lea %9:w, qword ptr [%count.2 + 0x1] ; @36 + mov %x.2:w, %8:w ; @37 + mov %count.2:w, %9:w ; @38 + jmp @3 ; @39 } diff --git a/src/test/data/opt/collatz.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/collatz.vir.opt.sysv.amd64.regalloc index a0cd04de..788c2a34 100644 --- a/src/test/data/opt/collatz.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/collatz.vir.opt.sysv.amd64.regalloc @@ -2,21 +2,20 @@ module foo export function $foo { ; returns: rax @2: - xor eax, eax ; @58 + xor eax, eax ; @55 @3: - cmp edi, 0x1_w ; @51 - ja @6 ; @52 + cmp edi, 0x1_w ; @48 + ja @6 ; @49 @7: - ret ; @50 + ret ; @47 @6: - test edi, 0x1_w ; @43 - je @10 ; @44 -@11: + mov edx, edi ; @29 + and edx, 0x1_w ; @45 + mov ecx, edi ; @31 + shr ecx, 0x1_b ; @42 lea edi, qword ptr [rdi + rdi*2 + 0x1] ; @34 - jmp @15 ; @42 -@10: - shr edi, 0x1_b ; @40 -@15: - inc eax ; @30 - jmp @3 ; @37 + cmp edx, 0x0_w ; @40 + cmove edi, ecx ; @41 + inc eax ; @36 + jmp @3 ; @39 } diff --git a/src/test/data/opt/contractsel.vir.opt b/src/test/data/opt/contractsel.vir.opt index 48755380..f5a98f58 100644 --- a/src/test/data/opt/contractsel.vir.opt +++ b/src/test/data/opt/contractsel.vir.opt @@ -3,11 +3,8 @@ module foo function w $foo(w %x) { @2: %0 = sgt.w %x, 0x2a_w ; @14 - br %0, @6(0x5_w), @4 -@4: %1 = slt.w %x, 0x0_w ; @15 %2 = sel.w %1, 0x3_w, %x ; @16 - jmp @6(%2) -@6(%y.1): - ret %y.1 + %3 = sel.w %0, 0x5_w, %2 ; @17 + ret %3 } diff --git a/src/test/data/opt/contractsel.vir.opt.sysv.amd64 b/src/test/data/opt/contractsel.vir.opt.sysv.amd64 index fed727aa..4d338f6f 100644 --- a/src/test/data/opt/contractsel.vir.opt.sysv.amd64 +++ b/src/test/data/opt/contractsel.vir.opt.sysv.amd64 @@ -3,19 +3,12 @@ module foo function $foo { ; returns: rax @2: mov %x:w, edi ; @14 - cmp %x:w, 0x2a_w ; @28 - jg @17 ; @29 - jmp @4 ; @30 -@17: - mov %y.1:w, 0x5_w ; @26 - jmp @6 ; @27 -@4: mov %2:w, 0x3_w ; @16 cmp %x:w, 0x0_w ; @22 cmovge %2:w, %x:w ; @23 - mov %y.1:w, %2:w ; @20 - jmp @6 ; @21 -@6: - mov eax, %y.1:w ; @18 + mov %3:w, 0x5_w ; @17 + cmp %x:w, 0x2a_w ; @20 + cmovle %3:w, %2:w ; @21 + mov eax, %3:w ; @18 ret ; @19 } diff --git a/src/test/data/opt/contractsel.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/contractsel.vir.opt.sysv.amd64.regalloc index 92b029f2..356588fd 100644 --- a/src/test/data/opt/contractsel.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/contractsel.vir.opt.sysv.amd64.regalloc @@ -2,15 +2,11 @@ module foo function $foo { ; returns: rax @2: - cmp edi, 0x2a_w ; @28 - jle @4 ; @29 -@17: - mov eax, 0x5_w ; @26 - jmp @6 ; @27 -@4: - mov eax, 0x3_w ; @16 + mov ecx, 0x3_w ; @16 cmp edi, 0x0_w ; @22 - cmovge eax, edi ; @23 -@6: + cmovge ecx, edi ; @23 + mov eax, 0x5_w ; @17 + cmp edi, 0x2a_w ; @20 + cmovle eax, ecx ; @21 ret ; @19 } diff --git a/src/test/data/opt/csehoistandmerge2.vir.opt b/src/test/data/opt/csehoistandmerge2.vir.opt index 3e46773d..1c922dce 100644 --- a/src/test/data/opt/csehoistandmerge2.vir.opt +++ b/src/test/data/opt/csehoistandmerge2.vir.opt @@ -2,9 +2,9 @@ module foo export function w $foo(w %x, w %y) { @2: - %0 = slt.w %y, 0x0_w ; @11 - %1 = add.w %x, 0x1_w ; @12 - %3 = sel.w %0, %1, 0x5_w ; @14 - %2 = add.w %3, %1 ; @13 - ret %2 + %1 = slt.w %y, 0x0_w ; @12 + %2 = add.w %x, 0x1_w ; @13 + %3 = sel.w %1, %2, 0x5_w ; @14 + %4 = add.w %3, %2 ; @15 + ret %4 } diff --git a/src/test/data/opt/csehoistandmerge3.vir.opt b/src/test/data/opt/csehoistandmerge3.vir.opt index 79de6569..5b54e671 100644 --- a/src/test/data/opt/csehoistandmerge3.vir.opt +++ b/src/test/data/opt/csehoistandmerge3.vir.opt @@ -2,13 +2,10 @@ module foo export function w $foo(w %x, w %y) { @2: - %0 = slt.w %y, 0x0_w ; @11 - %1 = add.w %x, 0x1_w ; @12 - br %0, @6(%1), @4 -@4: + %1 = slt.w %y, 0x0_w ; @12 + %2 = add.w %x, 0x1_w ; @13 %3 = sub.w %x, 0x1_w ; @14 - jmp @6(%3) -@6(%a.1): - %2 = add.w %a.1, %1 ; @13 - ret %2 + %4 = sel.w %1, %2, %3 ; @15 + %5 = add.w %4, %2 ; @16 + ret %5 } diff --git a/src/test/data/opt/csehoistandmergecommute.vir.opt b/src/test/data/opt/csehoistandmergecommute.vir.opt index 9a98d14e..d8aa7d99 100644 --- a/src/test/data/opt/csehoistandmergecommute.vir.opt +++ b/src/test/data/opt/csehoistandmergecommute.vir.opt @@ -2,6 +2,6 @@ module foo export function w $foo(w %x, w %y) { @2: - %1 = add.w %y, %x ; @10 - ret %1 + %2 = add.w %x, %y ; @11 + ret %2 } diff --git a/src/test/data/opt/esc1.vir.opt b/src/test/data/opt/esc1.vir.opt index 6d33a0e6..a4781860 100644 --- a/src/test/data/opt/esc1.vir.opt +++ b/src/test/data/opt/esc1.vir.opt @@ -4,17 +4,17 @@ export function w $foo(w %x) { %a = slot 8, align 8 %b = slot 8, align 8 @2: - %0 = add.w %x, 0x1_w ; @19 - %1 = add.w %x, 0x2_w ; @20 - %2 = add.w %x, 0x3_w ; @21 + %1 = add.w %x, 0x1_w ; @20 + %2 = add.w %x, 0x2_w ; @21 + %3 = add.w %x, 0x3_w ; @22 st.w %x, %a ; @8 - %3 = add.l %a, 0x4_l ; @22 - st.w %0, %3 ; @10 - st.w %1, %b ; @11 - %4 = add.l %b, 0x4_l ; @23 - st.w %2, %4 ; @13 - %5 = slt.w %x, 0x0_w ; @24 - %6 = sel.l %5, %4, %3 ; @25 - %r.1 = ld.w %6 ; @18 + %4 = add.l %a, 0x4_l ; @23 + st.w %1, %4 ; @10 + st.w %2, %b ; @11 + %5 = add.l %b, 0x4_l ; @24 + st.w %3, %5 ; @13 + %6 = slt.w %x, 0x0_w ; @25 + %7 = sel.l %6, %5, %4 ; @26 + %r.1 = ld.w %7 ; @18 ret %r.1 } diff --git a/src/test/data/opt/evenodd.vir.opt b/src/test/data/opt/evenodd.vir.opt index 20bdda3a..c824fcdf 100644 --- a/src/test/data/opt/evenodd.vir.opt +++ b/src/test/data/opt/evenodd.vir.opt @@ -2,37 +2,34 @@ module evenodd function w $abs(w %n) { @2: - %3 = slt.w %n, 0x0_w ; @24 - br %3, @3, @21(%n) -@3: - %4 = neg.w %n ; @25 - jmp @21(%4) -@21(%0): - ret %0 + %4 = slt.w %n, 0x0_w ; @25 + %5 = neg.w %n ; @26 + %6 = sel.w %4, %5, %n ; @27 + ret %6 } export function b $even(w %n) { @7: %n.1 = call.w $abs(%n) ; @10 - %5 = eq.w %n.1, 0x0_w ; @26 - br %5, @22(0x1_b), @9 + %7 = eq.w %n.1, 0x0_w ; @28 + br %7, @23(0x1_b), @9 @9: - %6 = sub.w %n.1, 0x1_w ; @27 - %b.1 = call.b $odd(%6) ; @13 - jmp @22(%b.1) -@22(%1): - ret %1 + %8 = sub.w %n.1, 0x1_w ; @29 + %b.1 = call.b $odd(%8) ; @13 + jmp @23(%b.1) +@23(%2): + ret %2 } export function b $odd(w %n) { @14: %n.1 = call.w $abs(%n) ; @17 - %7 = eq.w %n.1, 0x0_w ; @28 - br %7, @23(0x0_b), @16 + %9 = eq.w %n.1, 0x0_w ; @30 + br %9, @24(0x0_b), @16 @16: - %8 = sub.w %n.1, 0x1_w ; @29 - %b.1 = call.b $even(%8) ; @20 - jmp @23(%b.1) -@23(%2): - ret %2 + %10 = sub.w %n.1, 0x1_w ; @31 + %b.1 = call.b $even(%10) ; @20 + jmp @24(%b.1) +@24(%3): + ret %3 } diff --git a/src/test/data/opt/evenodd.vir.opt.sysv b/src/test/data/opt/evenodd.vir.opt.sysv index cb1d7432..16d865e5 100644 --- a/src/test/data/opt/evenodd.vir.opt.sysv +++ b/src/test/data/opt/evenodd.vir.opt.sysv @@ -2,37 +2,34 @@ module evenodd function $abs(w %n/rdi) { @2: - %3 = slt.w %n, 0x0_w ; @24 - br %3, @3, @21(%n) -@3: - %4 = neg.w %n ; @25 - jmp @21(%4) -@21(%0): - ret rax/%0 + %4 = slt.w %n, 0x0_w ; @25 + %5 = neg.w %n ; @26 + %6 = sel.w %4, %5, %n ; @27 + ret rax/%6 } export function $even(w %n/rdi) { @7: %n.1/w/rax = call $abs(%n/rdi) ; @10 - %5 = eq.w %n.1, 0x0_w ; @26 - br %5, @22(0x1_b), @9 + %7 = eq.w %n.1, 0x0_w ; @28 + br %7, @23(0x1_b), @9 @9: - %6 = sub.w %n.1, 0x1_w ; @27 - %b.1/b/rax = call $odd(%6/rdi) ; @13 - jmp @22(%b.1) -@22(%1): - ret rax/%1 + %8 = sub.w %n.1, 0x1_w ; @29 + %b.1/b/rax = call $odd(%8/rdi) ; @13 + jmp @23(%b.1) +@23(%2): + ret rax/%2 } export function $odd(w %n/rdi) { @14: %n.1/w/rax = call $abs(%n/rdi) ; @17 - %7 = eq.w %n.1, 0x0_w ; @28 - br %7, @23(0x0_b), @16 + %9 = eq.w %n.1, 0x0_w ; @30 + br %9, @24(0x0_b), @16 @16: - %8 = sub.w %n.1, 0x1_w ; @29 - %b.1/b/rax = call $even(%8/rdi) ; @20 - jmp @23(%b.1) -@23(%2): - ret rax/%2 + %10 = sub.w %n.1, 0x1_w ; @31 + %b.1/b/rax = call $even(%10/rdi) ; @20 + jmp @24(%b.1) +@24(%3): + ret rax/%3 } diff --git a/src/test/data/opt/evenodd.vir.opt.sysv.amd64 b/src/test/data/opt/evenodd.vir.opt.sysv.amd64 index b095ecec..545b965b 100644 --- a/src/test/data/opt/evenodd.vir.opt.sysv.amd64 +++ b/src/test/data/opt/evenodd.vir.opt.sysv.amd64 @@ -2,67 +2,60 @@ module evenodd function $abs { ; returns: rax @2: - mov %n:w, edi ; @24 - test %n:w, %n:w ; @38 - js @3 ; @39 - jmp @30 ; @40 -@30: - mov %0:w, %n:w ; @36 - jmp @21 ; @37 -@3: - mov %4:w, %n:w ; @25 - neg %4:w ; @35 - mov %0:w, %4:w ; @33 - jmp @21 ; @34 -@21: - mov eax, %0:w ; @31 - ret ; @32 + mov %n:w, edi ; @25 + mov %5:w, %n:w ; @26 + neg %5:w ; @36 + mov %6:w, %n:w ; @27 + cmp %n:w, 0x0_w ; @34 + cmovl %6:w, %5:w ; @35 + mov eax, %6:w ; @32 + ret ; @33 } export function $even { ; returns: rax @7: mov %n:w, edi ; @10 - mov edi, %n:w ; @58 - call $abs ; rdi ; @59 - mov %n.1:w, eax ; @60 - test %n.1:w, %n.1:w ; @53 - je @44 ; @54 - jmp @9 ; @55 -@44: - mov %1:b, 0x1_b ; @51 - jmp @22 ; @52 + mov edi, %n:w ; @54 + call $abs ; rdi ; @55 + mov %n.1:w, eax ; @56 + test %n.1:w, %n.1:w ; @49 + je @40 ; @50 + jmp @9 ; @51 +@40: + mov %2:b, 0x1_b ; @47 + jmp @23 ; @48 @9: - lea %6:w, qword ptr [%n.1 - 0x1] ; @27 - mov edi, %6:w ; @13 - call $odd ; rdi ; @49 - mov %b.1:b, al ; @50 - mov %1:b, %b.1:b ; @47 - jmp @22 ; @48 -@22: - mov al, %1:b ; @45 - ret ; @46 + lea %8:w, qword ptr [%n.1 - 0x1] ; @29 + mov edi, %8:w ; @13 + call $odd ; rdi ; @45 + mov %b.1:b, al ; @46 + mov %2:b, %b.1:b ; @43 + jmp @23 ; @44 +@23: + mov al, %2:b ; @41 + ret ; @42 } export function $odd { ; returns: rax @14: mov %n:w, edi ; @17 - mov edi, %n:w ; @75 - call $abs ; rdi ; @76 - mov %n.1:w, eax ; @77 - test %n.1:w, %n.1:w ; @70 - je @61 ; @71 - jmp @16 ; @72 -@61: - xor %2:b, %2:b ; @68 - jmp @23 ; @69 + mov edi, %n:w ; @71 + call $abs ; rdi ; @72 + mov %n.1:w, eax ; @73 + test %n.1:w, %n.1:w ; @66 + je @57 ; @67 + jmp @16 ; @68 +@57: + xor %3:b, %3:b ; @64 + jmp @24 ; @65 @16: - lea %8:w, qword ptr [%n.1 - 0x1] ; @29 - mov edi, %8:w ; @20 - call $even ; rdi ; @66 - mov %b.1:b, al ; @67 - mov %2:b, %b.1:b ; @64 - jmp @23 ; @65 -@23: - mov al, %2:b ; @62 - ret ; @63 + lea %10:w, qword ptr [%n.1 - 0x1] ; @31 + mov edi, %10:w ; @20 + call $even ; rdi ; @62 + mov %b.1:b, al ; @63 + mov %3:b, %b.1:b ; @60 + jmp @24 ; @61 +@24: + mov al, %3:b ; @58 + ret ; @59 } diff --git a/src/test/data/opt/evenodd.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/evenodd.vir.opt.sysv.amd64.regalloc index 503dbb1e..405c2107 100644 --- a/src/test/data/opt/evenodd.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/evenodd.vir.opt.sysv.amd64.regalloc @@ -2,47 +2,46 @@ module evenodd function $abs { ; returns: rax @2: - test edi, edi ; @38 - jns @21 ; @39 -@3: - neg edi ; @35 -@21: - mov eax, edi ; @31 - ret ; @32 + mov eax, edi ; @26 + neg eax ; @36 + cmp edi, 0x0_w ; @34 + cmovl edi, eax ; @35 + mov eax, edi ; @32 + ret ; @33 } export function $even { ; returns: rax @7: - push rbp ; @78 - mov rbp, rsp ; @79 - call $abs ; rdi ; @59 - test eax, eax ; @53 - jne @9 ; @54 -@44: - mov al, 0x1_b ; @51 - jmp @22 ; @52 + push rbp ; @74 + mov rbp, rsp ; @75 + call $abs ; rdi ; @55 + test eax, eax ; @49 + jne @9 ; @50 +@40: + mov al, 0x1_b ; @47 + jmp @23 ; @48 @9: - lea edi, qword ptr [rax - 0x1] ; @27 - call $odd ; rdi ; @49 -@22: - leave ; @80 - ret ; @46 + lea edi, qword ptr [rax - 0x1] ; @29 + call $odd ; rdi ; @45 +@23: + leave ; @76 + ret ; @42 } export function $odd { ; returns: rax @14: - push rbp ; @81 - mov rbp, rsp ; @82 - call $abs ; rdi ; @76 - test eax, eax ; @70 - jne @16 ; @71 -@61: - xor al, al ; @68 - jmp @23 ; @69 + push rbp ; @77 + mov rbp, rsp ; @78 + call $abs ; rdi ; @72 + test eax, eax ; @66 + jne @16 ; @67 +@57: + xor al, al ; @64 + jmp @24 ; @65 @16: - lea edi, qword ptr [rax - 0x1] ; @29 - call $even ; rdi ; @66 -@23: - leave ; @83 - ret ; @63 + lea edi, qword ptr [rax - 0x1] ; @31 + call $even ; rdi ; @62 +@24: + leave ; @79 + ret ; @59 } diff --git a/src/test/data/opt/ifc1.vir b/src/test/data/opt/ifc1.vir new file mode 100644 index 00000000..c2a54be6 --- /dev/null +++ b/src/test/data/opt/ifc1.vir @@ -0,0 +1,202 @@ +module ifc1 + +;; a simple diamond where both blocks are cheap +export function w $simple_diamond(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @neg, @pos +@neg: + %y = add.w %x, 5_w + jmp @join +@pos: + %y = sub.w %x, 5_w + jmp @join +@join: + ret %y +} + +;; simple triangle +export function w $right_triangle(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @join, @pos +@pos: + %x = sub.w %x, 5_w + jmp @join +@join: + ret %x +} + +;; other case of a simple triangle +export function w $left_triangle(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @neg, @join +@neg: + %x = add.w %x, 5_w + jmp @join +@join: + ret %x +} + +;; diamond where both blocks are too expensive +export function w $expensive_diamond(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @neg, @pos +@neg: + %y = add.w %x, 5_w + %y = mul.w %y, 13_w + jmp @join +@pos: + %y = sub.w %x, 5_w + %y = div.w %y, 3_w + jmp @join +@join: + ret %y +} + +;; diamond with one cheap block and one expensive block +export function w $mixed_cost(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @cheap, @expensive +@cheap: + %y = add.w %x, 1_w + jmp @join +@expensive: + %y = mul.w %x, 97_w + %y = div.w %y, 7_w + jmp @join +@join: + ret %y +} + +;; TODO: only inner triangle should canonicalize, +;; but currently @join1 has more than two predecessors +export function w $nested_tri(w %x) { +@start: + %c1 = slt.w %x, 0_w + br %c1, @join1, @mid +@mid: + %c2 = eq.w %x, 10_w + br %c2, @join1, @body +@body: + %x = add.w %x, 1_w + jmp @join1 +@join1: + ret %x +} + +;; back-to-back diamonds: make sure that +;; the canonicalization happens in the +;; correct order +export function w $chain(w %x) { +@start: + %c1 = slt.w %x, 0_w + br %c1, @neg1, @pos1 +@neg1: + %x = add.w %x, 1_w + jmp @mid +@pos1: + %x = sub.w %x, 1_w + jmp @mid +@mid: + %c2 = slt.w %x, 10_w + br %c2, @neg2, @pos2 +@neg2: + %x = add.w %x, 2_w + jmp @join +@pos2: + %x = sub.w %x, 2_w + jmp @join +@join: + ret %x +} + +;; GVN should collapse %y to %x +export function w $redundant_phi(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @neg, @pos +@neg: + %y = add.w %x, 0_w + jmp @join +@pos: + %y = add.w %x, 0_w + jmp @join +@join: + ret %y +} + +;; this should canonicalize to a triangle +;; before if-conversion happens +export function w $dead_arm(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @neg, @pos +@neg: + jmp @join +@pos: + %x = add.w %x, 1_w + jmp @join +@join: + ret %x +} + +;; TODO: the @loop header has more than two +;; prececessors, so we don't canonicalize +export function w $loop_if(w %x) { +@start: + jmp @loop +@loop: + %c1 = slt.w %x, 100_w + br %c1, @body, @exit +@body: + %c2 = slt.w %x, 0_w + br %c2, @neg, @pos +@neg: + %x = add.w %x, 1_w + jmp @join +@pos: + %x = sub.w %x, 1_w + jmp @join +@join: + jmp @loop +@exit: + ret %x +} + +;; %y's dominance of %z must be preserved +export function w $dominance(w %x) { +@start: + %c = slt.w %x, 0_w + br %c, @neg, @pos +@neg: + %y = add.w %x, 1_w + jmp @join +@pos: + %y = add.w %x, 2_w + jmp @join +@join: + %z = mul.w %y, %x + ret %z +} + +;; TODO: handle switch? +export function w $switch_like(w %x) { +@start: + switch.w %x, @dflt [0_w -> @c0, + 1_w -> @c1] +@c0: + %y = add.w %x, 1_w + jmp @join +@c1: + %y = sub.w %x, 1_w + jmp @join +@dflt: + %y = mul.w %x, 2_w + jmp @join +@join: + ret %y +} diff --git a/src/test/data/opt/ifc1.vir.opt b/src/test/data/opt/ifc1.vir.opt new file mode 100644 index 00000000..8b30df5e --- /dev/null +++ b/src/test/data/opt/ifc1.vir.opt @@ -0,0 +1,145 @@ +module ifc1 + +export function w $simple_diamond(w %x) { +@2: + %8 = slt.w %x, 0x0_w ; @104 + %9 = add.w %x, 0x5_w ; @105 + %10 = sub.w %x, 0x5_w ; @106 + %11 = sel.w %8, %9, %10 ; @107 + ret %11 +} + +export function w $right_triangle(w %x) { +@9: + %12 = slt.w %x, 0x0_w ; @108 + %13 = sub.w %x, 0x5_w ; @109 + %14 = sel.w %12, %x, %13 ; @110 + ret %14 +} + +export function w $left_triangle(w %x) { +@14: + %15 = slt.w %x, 0x0_w ; @111 + %16 = add.w %x, 0x5_w ; @112 + %17 = sel.w %15, %16, %x ; @113 + ret %17 +} + +export function w $expensive_diamond(w %x) { +@19: + %18 = slt.w %x, 0x0_w ; @114 + br %18, @20, @21 +@20: + %19 = add.w %x, 0x5_w ; @115 + %20 = lsl.w %19, 0x4_w ; @116 + %21 = lsl.w %19, 0x1_w ; @117 + %22 = sub.w %20, %21 ; @118 + %23 = sub.w %22, %19 ; @119 + jmp @23(%23) +@21: + %25 = sub.w %x, 0x5_w ; @121 + %26 = mulh.w %25, 0x55555556_w ; @122 + %27 = lsr.w %26, 0x1f_w ; @123 + %28 = add.w %26, %27 ; @124 + jmp @23(%28) +@23(%y.1): + ret %y.1 +} + +export function w $mixed_cost(w %x) { +@28: + %29 = slt.w %x, 0x0_w ; @125 + br %29, @29, @30 +@29: + %30 = add.w %x, 0x1_w ; @126 + jmp @32(%30) +@30: + %31 = lsl.w %x, 0x7_w ; @127 + %32 = lsl.w %x, 0x5_w ; @128 + %33 = sub.w %32, %x ; @129 + %34 = sub.w %31, %33 ; @130 + %35 = div.w %34, 0x7_w ; @131 + jmp @32(%35) +@32(%y.1): + ret %y.1 +} + +export function w $nested_tri(w %x) { +@36: + %41 = slt.w %x, 0x0_w ; @137 + br %41, @37(%x), @38 +@38: + %42 = eq.w %x, 0xa_w ; @138 + br %42, @37(%x), @40 +@40: + %43 = add.w %x, 0x1_w ; @139 + jmp @37(%43) +@37(%x.1): + ret %x.1 +} + +export function w $chain(w %x) { +@43: + %44 = slt.w %x, 0x0_w ; @140 + %45 = add.w %x, 0x1_w ; @141 + %46 = sub.w %x, 0x1_w ; @142 + %47 = sel.w %44, %45, %46 ; @143 + %48 = slt.w %47, 0xa_w ; @144 + %49 = add.w %47, 0x2_w ; @145 + %50 = sub.w %47, 0x2_w ; @146 + %51 = sel.w %48, %49, %50 ; @147 + ret %51 +} + +export function w $redundant_phi(w %x) { +@56: + ret %x +} + +export function w $dead_arm(w %x) { +@63: + %53 = slt.w %x, 0x0_w ; @149 + %54 = add.w %x, 0x1_w ; @150 + %55 = sel.w %53, %x, %54 ; @151 + ret %55 +} + +export function w $loop_if(w %x) { +@69: + jmp @70(%x) +@70(%x.1): + %56 = slt.w %x.1, 0x64_w ; @152 + br %56, @71, @72 +@71: + %57 = slt.w %x.1, 0x0_w ; @153 + br %57, @74, @75 +@74: + %58 = add.w %x.1, 0x1_w ; @154 + jmp @70(%58) +@75: + %59 = sub.w %x.1, 0x1_w ; @155 + jmp @70(%59) +@72: + ret %x.1 +} + +export function w $dominance(w %x) { +@80: + %60 = slt.w %x, 0x0_w ; @156 + %61 = add.w %x, 0x1_w ; @157 + %62 = add.w %x, 0x2_w ; @158 + %63 = sel.w %60, %61, %62 ; @159 + %64 = mul.w %63, %x ; @160 + ret %64 +} + +export function w $switch_like(w %x) { +@88: + switch.w %x, @89 [0x0_w -> @92(0x1_w), + 0x1_w -> @92(0x0_w)] +@89: + %65 = lsl.w %x, 0x1_w ; @161 + jmp @92(%65) +@92(%y.1): + ret %y.1 +} diff --git a/src/test/data/opt/palindrome.vir.opt b/src/test/data/opt/palindrome.vir.opt index b2b1df22..3bcc02eb 100644 --- a/src/test/data/opt/palindrome.vir.opt +++ b/src/test/data/opt/palindrome.vir.opt @@ -7,18 +7,19 @@ export function b $palindrome(w %n) { %0 = ne.w %n.1, 0x0_w ; @15 br %0, @6, @7 @6: - %1 = mulh.w %n.1, 0x1999999a_w ; @16 - %2 = lsr.w %1, 0x1f_w ; @17 - %3 = add.w %1, %2 ; @18 - %4 = mul.w %3, 0xa_w ; @19 - %5 = sub.w %n.1, %4 ; @20 - %11 = lsl.w %rev.2, 0x3_w ; @26 - %12 = lsl.w %rev.2, 0x1_w ; @27 - %13 = add.w %11, %12 ; @28 - %14 = add.w %13, %5 ; @29 - jmp @3(%14, %3) + %1 = mulh.w %n.1, 0x66666667_w ; @16 + %2 = asr.w %1, 0x2_w ; @17 + %3 = lsr.w %1, 0x1f_w ; @18 + %4 = add.w %2, %3 ; @19 + %5 = mul.w %4, 0xa_w ; @20 + %6 = sub.w %n.1, %5 ; @21 + %12 = lsl.w %rev.2, 0x3_w ; @27 + %13 = lsl.w %rev.2, 0x1_w ; @28 + %14 = add.w %12, %13 ; @29 + %15 = add.w %14, %6 ; @30 + jmp @3(%15, %4) @7: - %17 = eq.w %n, %rev.2 ; @32 - %18 = flag.b %17 ; @33 - ret %18 + %18 = eq.w %n, %rev.2 ; @33 + %19 = flag.b %18 ; @34 + ret %19 } diff --git a/src/test/data/opt/palindrome.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/palindrome.vir.opt.sysv.amd64.regalloc index b703877a..770918d8 100644 --- a/src/test/data/opt/palindrome.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/palindrome.vir.opt.sysv.amd64.regalloc @@ -2,29 +2,30 @@ module palindrome export function $palindrome { ; returns: rax @2: - mov ecx, edi ; @50 - xor eax, eax ; @51 - mov edx, ecx ; @52 + mov ecx, edi ; @52 + xor eax, eax ; @53 + mov edx, ecx ; @54 @3: - test edx, edx ; @45 - jne @6 ; @46 + test edx, edx ; @47 + jne @6 ; @48 @7: - xor edx, edx ; @32 - cmp ecx, eax ; @43 - sete dl ; @44 - mov al, dl ; @41 - ret ; @42 + xor edx, edx ; @33 + cmp ecx, eax ; @45 + sete dl ; @46 + mov al, dl ; @43 + ret ; @44 @6: - imul rsi, rdx, 0x1999999a ; @16 - shr rsi, 0x20_b ; @40 - mov edi, esi ; @17 - shr edi, 0x1f_b ; @39 - add esi, edi ; @18 - imul edi, esi, 0xa ; @19 - sub edx, edi ; @38 - lea edi, qword ptr [rax + rax*1] ; @27 - lea eax, qword ptr [rdi + rax*8] ; @28 - add eax, edx ; @29 - mov edx, esi ; @35 - jmp @3 ; @36 + imul rdi, rdx, 0x66666667 ; @16 + shr rdi, 0x20_b ; @42 + mov esi, edi ; @17 + sar esi, 0x2_b ; @41 + shr edi, 0x1f_b ; @40 + add esi, edi ; @19 + imul edi, esi, 0xa ; @20 + sub edx, edi ; @39 + lea edi, qword ptr [rax + rax*1] ; @28 + lea eax, qword ptr [rdi + rax*8] ; @29 + add eax, edx ; @30 + mov edx, esi ; @36 + jmp @3 ; @37 } diff --git a/src/test/data/opt/sdivby11.vir.opt b/src/test/data/opt/sdivby11.vir.opt index 1f780ab0..04364413 100644 --- a/src/test/data/opt/sdivby11.vir.opt +++ b/src/test/data/opt/sdivby11.vir.opt @@ -2,8 +2,9 @@ module foo export function w $foo(w %x) { @2: - %0 = mulh.w %x, 0x1745d175_w ; @4 - %1 = lsr.w %0, 0x1f_w ; @5 - %2 = add.w %0, %1 ; @6 - ret %2 + %0 = mulh.w %x, 0x2e8ba2e9_w ; @4 + %1 = asr.w %0, 0x1_w ; @5 + %2 = lsr.w %0, 0x1f_w ; @6 + %3 = add.w %1, %2 ; @7 + ret %3 } diff --git a/src/test/data/opt/sdivbyn5.vir.opt b/src/test/data/opt/sdivbyn5.vir.opt index 053eac1b..dabcb3a5 100644 --- a/src/test/data/opt/sdivbyn5.vir.opt +++ b/src/test/data/opt/sdivbyn5.vir.opt @@ -2,8 +2,9 @@ module foo export function w $foo(w %x) { @2: - %0 = mulh.w %x, 0xcccccccc_w ; @4 - %1 = lsr.w %0, 0x1f_w ; @5 - %2 = add.w %0, %1 ; @6 - ret %2 + %0 = mulh.w %x, 0x99999999_w ; @4 + %1 = asr.w %0, 0x1_w ; @5 + %2 = lsr.w %0, 0x1f_w ; @6 + %3 = add.w %1, %2 ; @7 + ret %3 } diff --git a/src/test/data/opt/sdivbyn5.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sdivbyn5.vir.opt.sysv.amd64.regalloc index 9396cab1..302db54c 100644 --- a/src/test/data/opt/sdivbyn5.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/sdivbyn5.vir.opt.sysv.amd64.regalloc @@ -2,11 +2,12 @@ module foo export function $foo { ; returns: rax @2: - mov eax, 0xcccccccc_w ; @10 - imul rax, rdi ; @11 - shr rax, 0x20_b ; @12 + mov eax, 0x99999999_w ; @12 + imul rax, rdi ; @13 + shr rax, 0x20_b ; @14 mov ecx, eax ; @5 - shr ecx, 0x1f_b ; @9 - add eax, ecx ; @6 - ret ; @8 + sar ecx, 0x1_b ; @11 + shr eax, 0x1f_b ; @10 + add eax, ecx ; @7 + ret ; @9 } diff --git a/src/test/data/opt/sink1.vir.opt b/src/test/data/opt/sink1.vir.opt index 8d310f66..b1ebe026 100644 --- a/src/test/data/opt/sink1.vir.opt +++ b/src/test/data/opt/sink1.vir.opt @@ -2,13 +2,10 @@ module sink1 export function w $foo(w %x) { @2: - %2 = eq.w %x, 0x0_w ; @11 - br %2, @3, @9(0x0_w) -@3: %3 = add.w %x, 0x1_w ; @12 - %4 = eq.w %3, 0x1_w ; @13 - %5 = flag.w %4 ; @14 - jmp @9(%5) -@9(%0): - ret %0 + %4 = eq.w %x, 0x0_w ; @13 + %5 = eq.w %3, 0x1_w ; @14 + %6 = flag.w %5 ; @15 + %7 = sel.w %4, %6, 0x0_w ; @16 + ret %7 } diff --git a/src/test/data/opt/sinking.vir.opt b/src/test/data/opt/sinking.vir.opt index efd831a0..0adddeef 100644 --- a/src/test/data/opt/sinking.vir.opt +++ b/src/test/data/opt/sinking.vir.opt @@ -2,13 +2,10 @@ module foo export function w $foo(w %x, w %y) { @2: - %0 = add.w %x, 0x1_w ; @11 - %3 = slt.w %x, 0x0_w ; @14 - br %3, @3, @8(%y) -@3: - %4 = lsl.w %0, 0x1_w ; @15 - %5 = add.w %4, %y ; @16 - jmp @8(%5) -@8(%b.1): - ret %b.1 + %1 = add.w %x, 0x1_w ; @12 + %2 = lsl.w %1, 0x1_w ; @13 + %5 = slt.w %x, 0x0_w ; @16 + %6 = add.w %2, %y ; @17 + %8 = sel.w %5, %6, %y ; @19 + ret %8 } diff --git a/src/test/data/opt/sremby7.vir.opt b/src/test/data/opt/sremby7.vir.opt index f0a40cbd..3a60c5e0 100644 --- a/src/test/data/opt/sremby7.vir.opt +++ b/src/test/data/opt/sremby7.vir.opt @@ -2,11 +2,12 @@ module foo export function w $foo(w %x) { @2: - %0 = mulh.w %x, 0x24924925_w ; @4 - %1 = lsr.w %0, 0x1f_w ; @5 - %2 = add.w %0, %1 ; @6 - %3 = lsl.w %2, 0x3_w ; @7 - %4 = sub.w %x, %3 ; @8 - %5 = add.w %4, %2 ; @9 - ret %5 + %0 = mulh.w %x, 0x92492493_w ; @4 + %1 = add.w %0, %x ; @5 + %2 = asr.w %1, 0x2_w ; @6 + %3 = lsr.w %1, 0x1f_w ; @7 + %4 = add.w %2, %3 ; @8 + %5 = mul.w %4, 0x7_w ; @9 + %6 = sub.w %x, %5 ; @10 + ret %6 } diff --git a/src/test/data/opt/sremby7.vir.opt.sysv.amd64.regalloc b/src/test/data/opt/sremby7.vir.opt.sysv.amd64.regalloc index abac14f4..8e2409f1 100644 --- a/src/test/data/opt/sremby7.vir.opt.sysv.amd64.regalloc +++ b/src/test/data/opt/sremby7.vir.opt.sysv.amd64.regalloc @@ -2,14 +2,16 @@ module foo export function $foo { ; returns: rax @2: - imul rax, rdi, 0x24924925 ; @16 - shr rax, 0x20_b ; @17 - mov ecx, eax ; @5 - shr ecx, 0x1f_b ; @15 - add eax, ecx ; @6 - mov ecx, eax ; @7 - shl ecx, 0x3_b ; @14 - sub edi, ecx ; @13 - add eax, edi ; @9 - ret ; @12 + mov eax, edi ; @4 + mov ecx, 0x92492493_w ; @18 + imul rcx, edi ; @19 + shr rcx, 0x20_b ; @20 + lea edx, qword ptr [rcx + rax*1] ; @5 + mov ecx, edx ; @6 + sar ecx, 0x2_b ; @17 + shr edx, 0x1f_b ; @16 + add ecx, edx ; @8 + imul ecx, ecx, 0x7 ; @9 + sub eax, ecx ; @15 + ret ; @14 } diff --git a/src/test/data/opt/switchsimpl.vir.opt b/src/test/data/opt/switchsimpl.vir.opt index 79defcc1..3ecf1365 100644 --- a/src/test/data/opt/switchsimpl.vir.opt +++ b/src/test/data/opt/switchsimpl.vir.opt @@ -3,10 +3,7 @@ module foo export function w $foo(w %x) { @2: %2 = eq.w %x, 0x1_w ; @9 - br %2, @7(0x2_w), @3 -@3: %3 = sub.w %x, 0x1_w ; @10 - jmp @7(%3) -@7(%0): - ret %0 + %4 = sel.w %2, 0x2_w, %3 ; @11 + ret %4 } diff --git a/src/test/test_opt.ml b/src/test/test_opt.ml index 84da395f..5bc123f0 100644 --- a/src/test/test_opt.ml +++ b/src/test/test_opt.ml @@ -333,6 +333,7 @@ let opt_suite = "Test optimizations" >::: [ "Bad load 1" >:: test "badload1"; "Bad load 2" >:: test "badload2"; "Binary search" >:: test "bsearch"; + "If-conversion 1" >:: test "ifc1"; ] let abi_suite = "Test ABI lowering" >::: [