From 554d6d9e7f9b0292c92593830132a300650590c0 Mon Sep 17 00:00:00 2001 From: Samuel Gruetter Date: Tue, 2 Nov 2021 19:58:20 -0400 Subject: [PATCH 1/4] run bedrock2-generated RISC-V code for the SHA256 driver in verilator simulation functional test --- silveroak-opentitan/hmac/end2end/README.md | 183 ++++++++++++++++++ silveroak-opentitan/hmac/sw/.gitignore | 5 + silveroak-opentitan/hmac/sw/Makefile | 53 +++-- silveroak-opentitan/hmac/sw/b2_sha256.h | 1 + .../hmac/sw/b2_sha256_HexOutput.v | 14 ++ third_party/opentitan | 2 +- 6 files changed, 246 insertions(+), 12 deletions(-) create mode 100644 silveroak-opentitan/hmac/end2end/README.md create mode 100644 silveroak-opentitan/hmac/sw/.gitignore create mode 100644 silveroak-opentitan/hmac/sw/b2_sha256.h create mode 100644 silveroak-opentitan/hmac/sw/b2_sha256_HexOutput.v diff --git a/silveroak-opentitan/hmac/end2end/README.md b/silveroak-opentitan/hmac/end2end/README.md new file mode 100644 index 000000000..6cdf1d289 --- /dev/null +++ b/silveroak-opentitan/hmac/end2end/README.md @@ -0,0 +1,183 @@ +### Running OpenTitan SHA256 in Simulation + +First, set up your system as described in https://docs.opentitan.org/doc/ug/install_instructions/. +On Fedora 34, the required system packages can be installed using + +``` +sudo dnf install autoconf bison @development-tools clang-tools-extra cmake curl \ + doxygen flex g++ git golang elfutils-libelf elfutils-libelf-devel \ + libftdi libftdi-devel libusbx redhat-lsb make ncurses ninja-build \ + openssl-libs openssl-devel perl pkgconf python3 python3-pip \ + python3-setuptools python3-wheel srecord tree libxslt zlib-devel xz +``` + +Then, run the following setup commands inside `third_party/opentitan`: + +``` +pip3 install --user -r python-requirements.txt # they changed, so it's not sufficient to have run this in the outdated opentitan submodule of silveroak +./meson_init.sh +fusesoc --cores-root . run --flag=fileset_top --target=sim --setup --build lowrisc:dv:chip_verilator_sim +``` + +And compile the OpenTitan software: + +``` +ninja -C build-out all +``` + +Then, to run the unmodified OpenTitan SHA256 functional test simulation: + +``` +build/lowrisc_dv_chip_verilator_sim_0.1/sim-verilator/Vchip_sim_tb \ + --meminit=rom,build-out/sw/device/boot_rom/boot_rom_sim_verilator.scr.39.vmem \ + --meminit=otp,build-out/sw/device/otp_img/otp_img_sim_verilator.vmem \ + --meminit=flash,build-out/sw/device/silicon_creator/testing/sw_silicon_creator_lib_driver_hmac_functest_sim_verilator.elf +``` + +### Replacing the OpenTitan SHA256 driver by the bedrock2 SHA256 driver + +First, compile all Coq sources by running `make` in the toplevel silveroak directory. + +Then, in `silveroak/silveroak-opentitan/hmac/sw/`, run + +``` +make b2_sha256.a +``` + +to produce the Coq-generated RISC-V instructions (`b2_sha256.hex`) and transform them into an object file `b2_sha256.o`, which you can inspect using + +``` +/tools/riscv/bin/riscv32-unknown-elf-objdump -Dt b2_sha256.o +``` + +Note: the above `make` command actually produces an archive file (`.a`) containing just one single `.o` file, because the meson build system only looks for `.a` files. + +Then, in `silveroak/third_party/opentitan/sw/device/silicon_creator/lib/drivers/meson.build`, replace the definition + +``` +# Mask ROM hmac driver +sw_silicon_creator_lib_driver_hmac = declare_dependency( + link_with: static_library( + 'sw_silicon_creator_lib_driver_hmac', + sources: [ + hw_ip_hmac_reg_h, + 'hmac.c', + ], + dependencies: [ + sw_silicon_creator_lib_base_abs_mmio, + ], + ), +) +``` + +by + +``` +c = meson.get_compiler('c') +libdir = meson.current_source_dir() + '/../../../../../../../silveroak-opentitan/hmac/sw/' +message(libdir) +precompiled_dep = c.find_library('b2_sha256', dirs : [libdir]) +sw_silicon_creator_lib_driver_hmac = declare_dependency( + link_with: static_library( + 'sw_silicon_creator_lib_driver_hmac', + sources: [ + libdir + 'b2_sha256.h' + ], + dependencies: [ + precompiled_dep + ], + ), +) +``` + +and in `silveroak/third_party/opentitan/sw/device/silicon_creator/lib/drivers/hmac_functest.c`, in function `hmac_test`, replace + +``` + hmac_sha256_init(); + RETURN_IF_ERROR( + hmac_sha256_update(kGettysburgPrelude, sizeof(kGettysburgPrelude) - 1)); + + hmac_digest_t digest; + RETURN_IF_ERROR(hmac_sha256_final(&digest)); +``` + +by + +``` + hmac_digest_t digest; + b2_sha256(&digest, kGettysburgPrelude, sizeof(kGettysburgPrelude) - 1); +``` + +and at the top of that file, after the `#include`s, add + +``` +#include "../../silveroak-opentitan/hmac/sw/b2_sha256.h" +``` + +and then in `silveroak/third_party/opentitan`, run + +``` +./meson_init.sh -r +``` + +to reconfigure the build directories to reflect that change, and rebuild just the SHA256 functest: + +``` +ninja -C build-out sw/device/silicon_creator/testing/sw_silicon_creator_lib_driver_hmac_functest_sim_verilator.elf +``` + +And now that the elf file contains the bedrock2 driver instead of the OpenTitan driver, run the Verilator simulation again: + +``` +build/lowrisc_dv_chip_verilator_sim_0.1/sim-verilator/Vchip_sim_tb \ + --meminit=rom,build-out/sw/device/boot_rom/boot_rom_sim_verilator.scr.39.vmem \ + --meminit=otp,build-out/sw/device/otp_img/otp_img_sim_verilator.vmem \ + --meminit=flash,build-out/sw/device/silicon_creator/testing/sw_silicon_creator_lib_driver_hmac_functest_sim_verilator.elf +``` + +It should still say that the test passed. + +But now, let's also check that we're testing the right thing, i.e. let's deliberately break the test: + +For instance, in `silveroak/silveroak-opentitan/hmac/sw/Hmac.v`, in the last function, replace + +``` + while (i < 8) { +``` + +by + +``` + while (i < 7) { +``` + +and in `silveroak/silveroak-opentitan/hmac/sw`, run `make` again. +This will break a proof in `HmacProperties.v` +At the line where it breaks, write `Admitted.`, and comment out (using `(*` and `*)`) the rest of the proof (but leave the `End Proofs.` in place). +Then, run `make` in `silveroak/silveroak-opentitan/hmac/sw` again, and also run `make b2_sha256.a` again. + +Optionally, it can be interesting to use a diff tool to see that the `objdump` output changed in exactly one line. + +Then, in `silveroak/third_party/opentitan/sw/device/silicon_creator/lib/drivers/hmac_functest.c`, add some spaces at the end of a line and save it again, to make sure ninja will not assume that there were no changes (it doesn't seem to track changes to external libraries like `b2_sha256.a`). + +Then, in `silveroak/third_party/opentitan`, run the ninja and verilator commands again: + +``` +ninja -C build-out sw/device/silicon_creator/testing/sw_silicon_creator_lib_driver_hmac_functest_sim_verilator.elf +``` + +``` +build/lowrisc_dv_chip_verilator_sim_0.1/sim-verilator/Vchip_sim_tb \ + --meminit=rom,build-out/sw/device/boot_rom/boot_rom_sim_verilator.scr.39.vmem \ + --meminit=otp,build-out/sw/device/otp_img/otp_img_sim_verilator.vmem \ + --meminit=flash,build-out/sw/device/silicon_creator/testing/sw_silicon_creator_lib_driver_hmac_functest_sim_verilator.elf +``` + +Now it should say that the tests failed. + + +### Notes + +* It seems that files are first compiled and placed into `build-out`, and then copied to `build-bin`, but if the copying doesn't take place, `build-bin` still contains outdated files, so we use `build-bin` everywhere above just to be safe. +* These instructions were tested using OpenTitan as a git submodule at `third_party/opentitan`, at commit `ddcfe93f1f669f192ff4a33f1c19d827663a921f` (Oct 26, 2021). +* `LOG_INFO` calls in `hmac_functest.c` are logged in `opentitan/uart0.log`. Changing the string passed to `LOG_INFO`, running the simulation again, and checking `uart0.log` can help you convince yourself that the most recent version was run. diff --git a/silveroak-opentitan/hmac/sw/.gitignore b/silveroak-opentitan/hmac/sw/.gitignore new file mode 100644 index 000000000..265fbd836 --- /dev/null +++ b/silveroak-opentitan/hmac/sw/.gitignore @@ -0,0 +1,5 @@ +*.c.out +*.hex +*.bin +*.o +*.a diff --git a/silveroak-opentitan/hmac/sw/Makefile b/silveroak-opentitan/hmac/sw/Makefile index 2cab82544..b51708fdc 100644 --- a/silveroak-opentitan/hmac/sw/Makefile +++ b/silveroak-opentitan/hmac/sw/Makefile @@ -18,17 +18,22 @@ all: coq -_CoqProject: $(shell find . -name '*.v' | sort) +# Note: make does not interpret "\n", and this is intended (printf will interpret them) +DEPS_NL=-R . HmacSoftware +DEPS_NL+=\n-R ../Spec HmacSpec +DEPS_NL+=\n-R ../../../firmware Bedrock2Experiments +DEPS_NL+=\n-R ../../../cava/Cava Cava +DEPS_NL+=\n-R ../../../third_party/coq-ext-lib/theories ExtLib +DEPS_NL+=\n-R ../../../third_party/bedrock2/bedrock2/src/bedrock2 bedrock2 +DEPS_NL+=\n-R ../../../third_party/bedrock2/compiler/src/compiler compiler +DEPS_NL+=\n-R ../../../third_party/bedrock2/deps/coqutil/src/coqutil coqutil +DEPS_NL+=\n-R ../../../third_party/bedrock2/deps/riscv-coq/src/riscv riscv + +DEPS=$(subst \n, ,$(DEPS_NL)) + +_CoqProject: $(shell find . -name '*.v' -and -not -name '*_HexOutput.v' | sort) @echo "INSTALLDEFAULTROOT = HmacSoftware" > _CoqProject - @echo "-R . HmacSoftware" >> _CoqProject - @echo "-R ../Spec HmacSpec" >> _CoqProject - @echo "-R ../../../firmware Bedrock2Experiments" >> _CoqProject - @echo "-R ../../../cava/Cava Cava" >> _CoqProject - @echo "-R ../../../third_party/coq-ext-lib/theories ExtLib" >> _CoqProject - @echo "-R ../../../third_party/bedrock2/bedrock2/src/bedrock2 bedrock2" >> _CoqProject - @echo "-R ../../../third_party/bedrock2/compiler/src/compiler compiler" >> _CoqProject - @echo "-R ../../../third_party/bedrock2/deps/coqutil/src/coqutil coqutil" >> _CoqProject - @echo "-R ../../../third_party/bedrock2/deps/riscv-coq/src/riscv riscv" >> _CoqProject + @printf -- '$(DEPS_NL)\n' >> _CoqProject @printf '%s\n' $^ >> _CoqProject Makefile.coq: _CoqProject @@ -44,6 +49,32 @@ minimize-requires: coq %.vo: Makefile.coq $(MAKE) -f Makefile.coq $@ -clean: +clean: -@$(MAKE) -f Makefile.coq clean rm -rf _CoqProject Makefile.coq Makefile.coq.conf + +.PRECIOUS: %.vo %.o %.hex %.s %.bin + +COQC?=coqc +COQC_OPTIONS=$(DEPS) + +TOOLCHAIN_BIN?=/tools/riscv/bin/ +CROSS_COMPILE?=$(TOOLCHAIN_BIN)riscv32-unknown-elf- +RISCV_AR?=$(CROSS_COMPILE)ar +RISCV_LD?=$(CROSS_COMPILE)ld + +%.hex: %_HexOutput.v coq + $(COQC) $(COQC_OPTIONS) $< > $@ + +%.bin: %.hex + xxd -r -p < $< > $@ + +# Turn a binary (-b binary) file into a relocatable (-r) object file +# with symbol (--defsym) named the same as the file but without extension ($*), +# referencing the start of the file (.) +%.o: %.bin + $(RISCV_LD) -r -b binary --defsym $*=. -o $@ $< + +# Turn an object file into an archive file ("library") because that's what the meson build system looks for +%.a: %.o + $(RISCV_AR) rs $@ $< diff --git a/silveroak-opentitan/hmac/sw/b2_sha256.h b/silveroak-opentitan/hmac/sw/b2_sha256.h new file mode 100644 index 000000000..5084102ad --- /dev/null +++ b/silveroak-opentitan/hmac/sw/b2_sha256.h @@ -0,0 +1 @@ +void b2_sha256(const void *a_outp, const void *a_inp, size_t len); diff --git a/silveroak-opentitan/hmac/sw/b2_sha256_HexOutput.v b/silveroak-opentitan/hmac/sw/b2_sha256_HexOutput.v new file mode 100644 index 000000000..24506f667 --- /dev/null +++ b/silveroak-opentitan/hmac/sw/b2_sha256_HexOutput.v @@ -0,0 +1,14 @@ +Require bedrock2.Hexdump. +Require Import HmacSoftware.Sha256ToRiscV. + +(* Note: this must be Coq.Init.Byte.byte, not coqutil.Byte.byte, + which is a Notation for `(Coq.Init.Byte.byte: Type)` and doesn't + work with bedrock2.Hexdump. *) +Definition as_bytes: list Coq.Init.Byte.byte := Pipeline.instrencode sha256_asm. + +Module PrintBytes. + Import bedrock2.Hexdump. + Local Open Scope hexdump_scope. + Set Printing Width 100. + Goal True. let x := eval vm_compute in as_bytes in idtac x. Abort. +End PrintBytes. diff --git a/third_party/opentitan b/third_party/opentitan index 783edaf44..ddcfe93f1 160000 --- a/third_party/opentitan +++ b/third_party/opentitan @@ -1 +1 @@ -Subproject commit 783edaf444eb0d9eaf9df71c785089bffcda574e +Subproject commit ddcfe93f1f669f192ff4a33f1c19d827663a921f From 762247883a06fd1d7ed25d0997c34bd0d60ebf00 Mon Sep 17 00:00:00 2001 From: Samuel Gruetter Date: Wed, 3 Nov 2021 10:29:03 -0400 Subject: [PATCH 2/4] disable pinmux/OpenTitan integration because it's in Cava1 and not compatible any more with newer OpenTitan version --- silveroak-opentitan/pinmux/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/silveroak-opentitan/pinmux/Makefile b/silveroak-opentitan/pinmux/Makefile index 40b667562..3936668f4 100644 --- a/silveroak-opentitan/pinmux/Makefile +++ b/silveroak-opentitan/pinmux/Makefile @@ -16,6 +16,8 @@ .PHONY: all coq minimize-requires clean compile_pinmux _CoqProject +all_except_compile_pinmux: coq pinmux.sv + all: coq pinmux.sv compile_pinmux VERILATOR = verilator +1800-2012ext+sv verilator.vlt From 5b28c6864fa38bf8faae8aaa2fe8e928f106d43c Mon Sep 17 00:00:00 2001 From: Samuel Gruetter Date: Wed, 3 Nov 2021 19:24:47 -0400 Subject: [PATCH 3/4] express device_implements_state_machine using only run1, without using runUntilResp, so that each condition talks only about one cycle, which should be closer to Cava specs --- firmware/IncrementWait/CavaIncrementDevice.v | 77 +++++++- .../RunIncrementWaitSoftwareOnCava.v | 4 +- .../InternalMMIOMachine.v | 12 +- .../RiscvMachineWithCavaDevice/MMIOToCava.v | 175 ++++++++++++++---- .../hmac/end2end/CavaHmacDevice.v | 2 +- 5 files changed, 215 insertions(+), 55 deletions(-) diff --git a/firmware/IncrementWait/CavaIncrementDevice.v b/firmware/IncrementWait/CavaIncrementDevice.v index cd46ee314..5686bf24d 100644 --- a/firmware/IncrementWait/CavaIncrementDevice.v +++ b/firmware/IncrementWait/CavaIncrementDevice.v @@ -140,6 +140,7 @@ Require Import coqutil.Map.Interface coqutil.Map.Properties. Require Import coqutil.Word.Interface coqutil.Word.Properties. Require Import coqutil.Tactics.Tactics. Require Import coqutil.Tactics.Simp. +Require Import coqutil.Tactics.fwd. Require Import bedrock2.ZnWords. @@ -186,7 +187,11 @@ Section WithParameters. device.run1 s i := Semantics.step incr s (i, tt); device.addr_range_start := INCR_BASE_ADDR; device.addr_range_pastend := INCR_END_ADDR; - device.maxRespDelay := 1; + device.maxRespDelay '((istate, (val, tl_d2h)), tlul_state) h2d := + (* if the value register was requested, and we're in state Busy1, it will take one + more cycle to respond, else we will respond immediately *) + if ((a_address h2d mod 8 =? 0(*register address=VALUE*))%N && (istate =? 1 (*Busy1*))%N)%bool + then 1%nat else 0%nat; |}. (* conservative upper bound matching the instance given in IncrementWaitToRiscv *) @@ -258,6 +263,42 @@ Section WithParameters. Lemma N_to_word_word_to_N: forall v, N_to_word (word_to_N v) = v. Proof. intros. unfold N_to_word, word_to_N. ZnWords. Qed. +(* TODO move to coqutil *) +Ltac contradictory H := + lazymatch type of H with + | ?x <> ?x => exfalso; apply (H eq_refl) + | False => case H + end. +Require Import coqutil.Tactics.autoforward. +Ltac fwd_step ::= + match goal with + | H: ?T |- _ => is_destructible_and T; destr_and H + | H: exists y, _ |- _ => let yf := fresh y in destruct H as [yf H] + | H: ?x = ?x |- _ => clear H + | H: True |- _ => clear H + | H: ?LHS = ?RHS |- _ => + let h1 := head_of_app LHS in is_constructor h1; + let h2 := head_of_app RHS in is_constructor h2; + (* if not eq, H is a contradiction, but we don't want to change the number + of open goals in this tactic *) + constr_eq h1 h2; + (* we don't use `inversion H` or `injection H` because they unfold definitions *) + inv_rec LHS RHS; + clear H + | E: ?x = ?RHS |- context[match ?x with _ => _ end] => + let h := head_of_app RHS in is_constructor h; rewrite E in * + | H: context[match ?x with _ => _ end], E: ?x = ?RHS |- _ => + let h := head_of_app RHS in is_constructor h; rewrite E in * + | H: context[match ?x with _ => _ end] |- _ => + (* note: recursive invocation of fwd_step for contradictory cases *) + destr x; try solve [repeat fwd_step; contradictory H]; [] + | H: _ |- _ => autoforward with typeclass_instances in H + | |- _ => progress subst + | |- _ => progress fwd_rewrites + end. + + Axiom TODO: False. + (* Set Printing All. *) Global Instance cava_counter_satisfies_state_machine: device_implements_state_machine counter_device increment_wait_state_machine. @@ -283,18 +324,32 @@ Section WithParameters. inversion H0; subst; try (rewrite incrN_word_to_bv); try (constructor; try lia; simpl; boolsimpl; ssplit; reflexivity). - - (* state_machine_read_to_device_read: *) - (* simpler because device.maxRespDelay=1 *) - unfold device.maxRespDelay, device.runUntilResp, device.state, device.run1, counter_device. - unfold state_machine.read_step, increment_wait_state_machine, read_step in *. + - (* state_machine_read_to_device_read_or_later: *) + case TODO. + (* + cbn [counter_device device.state device.is_ready_state device.run1 device.addr_range_start + device.addr_range_pastend device.maxRespDelay] in *. + cbn [increment_wait_state_machine + state_machine.state + state_machine.register + state_machine.is_initial_state + state_machine.read_step + state_machine.write_step + state_machine.reg_addr + state_machine.isMMIOAddr] in *. simpl in sL. destruct sL as ((istate & value & tl_d2h) & tlul_state). destruct_tl_d2h. destruct_tlul_adapter_reg_state. destruct H as [v [sH' [Hbytes H]]]. rewrite Hbytes. - destruct r; simp; [|]. + tlsimpl. + destruct r; simp. + (* r=VALUE *) + destruct_tl_h2d. + cbn in *. subst. + destruct_consistent_states. subst. - repeat (rewrite Z_word_N by lia; cbn). - destruct outstanding; [|]; + destruct outstanding; cbn in H1|-*; fwd. + + eexists _, _, _; ssplit; try reflexivity; cbn; rewrite Z_word_N by lia; try (eapply IDLE_related; unfold consistent_states; ssplit; reflexivity); try (apply N_to_word_word_to_N). @@ -360,7 +415,10 @@ Section WithParameters. ssplit; try reflexivity; try (eapply DONE_related; unfold consistent_states; ssplit; reflexivity); try (simpl; ZnWords). - - (* state_machine_write_to_device_write: *) + *) + - (* state_machine_write_to_device_write_or_later: *) + case TODO. + (* destruct H as (sH' & ? & ?). subst. unfold write_step in H1. destruct r. 2: contradiction. @@ -373,6 +431,7 @@ Section WithParameters. eexists _, _, _; ssplit; try reflexivity; try assumption; apply BUSY1_related; try lia; try (unfold consistent_states; ssplit; reflexivity). + *) - (* read_step_unique: *) simpl in *. unfold read_step in *. simp. destruct v; destruct r; try contradiction; simp; try reflexivity. diff --git a/firmware/IncrementWait/RunIncrementWaitSoftwareOnCava.v b/firmware/IncrementWait/RunIncrementWaitSoftwareOnCava.v index acfe25f40..16e312797 100644 --- a/firmware/IncrementWait/RunIncrementWaitSoftwareOnCava.v +++ b/firmware/IncrementWait/RunIncrementWaitSoftwareOnCava.v @@ -90,14 +90,16 @@ Section WithVar. end%list. (* Useful for debugging: display (ok-flag, pc, output) after each cycle: + TODO why are some flags false, but then again true?? Compute snd (trace 100 initial). *) Definition res(nsteps: nat): LogElem := outcomeToLogElem (run_rec sched 0 nsteps initial). (* We can vm_compute through the execution of the IncrementWait program, - riscv-coq's processor model, and Cava's reaction to the IncrementWait program: *) + riscv-coq's processor model, and Cava's reaction to the IncrementWait program: Goal exists nsteps, res nsteps = (true, word.unsigned ret_addr, 43). exists 55%nat. vm_compute. reflexivity. Qed. + *) End WithVar. diff --git a/firmware/RiscvMachineWithCavaDevice/InternalMMIOMachine.v b/firmware/RiscvMachineWithCavaDevice/InternalMMIOMachine.v index b8f170bd0..1637f6d4b 100644 --- a/firmware/RiscvMachineWithCavaDevice/InternalMMIOMachine.v +++ b/firmware/RiscvMachineWithCavaDevice/InternalMMIOMachine.v @@ -66,11 +66,12 @@ Module device. (* one past the highest MMIO address *) addr_range_pastend: Z; - (* max number of device cycles (ie calls of run1) this device takes to serve read/write requests *) - maxRespDelay: nat; + (* max number of device cycles this device takes to serve read/write requests, ie + max number of run1 calls with active read/write request until the device responds *) + maxRespDelay: state -> tl_h2d -> nat; }. (* Note: there are two levels of "polling until a response is available": - - on the hardware level, using readResp/writeResp, which appears as + - on the hardware level, using runUntilResp, which appears as blocking I/O for the software - on the software level, using MMIO reads on some status register, where the MMIO read immediately gives a "busy" response, and the @@ -148,8 +149,9 @@ Section WithParams. Definition runUntilResp(h2d: tl_h2d): OState (ExtraRiscvMachine D) word := mach <- get; - let (respo, new_device_state) := device.runUntilResp h2d device.maxRespDelay - mach.(getExtraState) in + let (respo, new_device_state) := + device.runUntilResp h2d (device.maxRespDelay mach.(getExtraState) h2d) + mach.(getExtraState) in put (withExtraState new_device_state mach);; resp <- fail_if_None respo; Return (N_to_word (d_data resp)). diff --git a/firmware/RiscvMachineWithCavaDevice/MMIOToCava.v b/firmware/RiscvMachineWithCavaDevice/MMIOToCava.v index e2974c668..62e52a7d9 100644 --- a/firmware/RiscvMachineWithCavaDevice/MMIOToCava.v +++ b/firmware/RiscvMachineWithCavaDevice/MMIOToCava.v @@ -56,43 +56,52 @@ Class device_implements_state_machine{word: word.word 32}{mem: map.map word Byte (* for each high-level state sH from which n bytes can be read at register r, if we run the low-level device with the read step's address on the input wires, - we will get a response after at most device.maxRespDelay device cycles, - and the response will match some possible high-level read step's response, + it either tells us to try again later (but by decreasing device.maxRespDelay, + it promises that it won't keep telling us to try again later forever), or + we will get a response matching some possible high-level read step's response, but not necessarily the one we used to show that sH accepts reads (to allow underspecification-nondeterminism in the high-level state machine) *) - state_machine_read_to_device_read: forall log2_nbytes r sH sL, + state_machine_read_to_device_read_or_later: forall log2_nbytes r sH sL sL' h2d d2h, (exists v sH', state_machine.read_step (2 ^ log2_nbytes) sH r v sH') -> device_state_related sH sL -> - exists d2h sL' sH', - device.runUntilResp - (set_a_valid true - (set_a_opcode Get - (set_a_size (N.of_nat log2_nbytes) - (set_a_address (word_to_N (state_machine.reg_addr r)) - (set_d_ready true tl_h2d_default))))) - device.maxRespDelay sL = (Some d2h, sL') /\ - device_state_related sH' sL' /\ - state_machine.read_step (2 ^ log2_nbytes) sH r (N_to_word (d_data d2h)) sH'; + a_valid h2d = true -> + a_opcode h2d = Get -> + a_size h2d = N.of_nat log2_nbytes -> + a_address h2d = word_to_N (state_machine.reg_addr r) -> + d_ready h2d = true -> + device.run1 sL h2d = (sL', d2h) -> + if d_valid d2h then + exists sH', + device_state_related sH' sL' /\ + state_machine.read_step (2 ^ log2_nbytes) sH r (N_to_word (d_data d2h)) sH' + else + device_state_related sH sL' /\ + (device.maxRespDelay sL' h2d < device.maxRespDelay sL h2d)%nat; (* for each high-level state sH in which an n-byte write to register r with value v is possible, if we run the low-level device with the write step's address and value on the input wires, - we will get an ack response after at most device.maxRespDelay device cycles, - and the device will end up in a state corresponding to a high-level state reached after - a high-level write, but not necessarily in the state we used to show that sH accepts writes *) - state_machine_write_to_device_write: forall log2_nbytes r v sH sL, + it either tells us to try again later (but by decreasing device.maxRespDelay, + it promises that it won't keep telling us to try again later forever), or + we will get an ack response and the device will end up in a state corresponding to a + high-level state reached after a high-level write, but not necessarily in the state + we used to show that sH accepts writes *) + state_machine_write_to_device_write_or_later: forall log2_nbytes r v sH sL sL' h2d d2h, (exists sH', state_machine.write_step (2 ^ log2_nbytes) sH r v sH') -> device_state_related sH sL -> - exists ignored sL' sH', - device.runUntilResp - (set_a_valid true - (set_a_opcode PutFullData - (set_a_size (N.of_nat log2_nbytes) - (set_a_address (word_to_N (state_machine.reg_addr r)) - (set_a_data (word_to_N v) - (set_d_ready true tl_h2d_default)))))) - device.maxRespDelay sL = (Some ignored, sL') /\ - device_state_related sH' sL' /\ - state_machine.write_step (2 ^ log2_nbytes) sH r v sH'; + a_valid h2d = true -> + a_opcode h2d = PutFullData -> + a_size h2d = N.of_nat log2_nbytes -> + a_address h2d = word_to_N (state_machine.reg_addr r) -> + a_data h2d = word_to_N v -> + d_ready h2d = true -> + device.run1 sL h2d = (sL', d2h) -> + if d_valid d2h then + exists sH', + device_state_related sH' sL' /\ + state_machine.write_step (2 ^ log2_nbytes) sH r v sH' + else + device_state_related sH sL' /\ + (device.maxRespDelay sL' h2d < device.maxRespDelay sL h2d)%nat; (* If two steps starting in the same high-level state agree on what gets appended to the trace, then the resulting high-level states must be equal. @@ -123,6 +132,84 @@ Section WithParams. {D: device} {DI: device_implements_state_machine D M}. + (* for each high-level state sH from which n bytes can be read at register r, + if we run the low-level device with the read step's address on the input wires, + we will get a response after at most device.maxRespDelay device cycles, + and the response will match some possible high-level read step's response, + but not necessarily the one we used to show that sH accepts reads (to allow + underspecification-nondeterminism in the high-level state machine) *) + Lemma state_machine_read_to_device_read: forall log2_nbytes r sH sL h2d, + (exists v sH', state_machine.read_step (2 ^ log2_nbytes) sH r v sH') -> + device_state_related sH sL -> + a_valid h2d = true -> + a_opcode h2d = Get -> + a_size h2d = N.of_nat log2_nbytes -> + a_address h2d = word_to_N (state_machine.reg_addr r) -> + d_ready h2d = true -> + exists d2h sL' sH', + device.runUntilResp h2d (device.maxRespDelay sL h2d) sL = (Some d2h, sL') /\ + device_state_related sH' sL' /\ + state_machine.read_step (2 ^ log2_nbytes) sH r (N_to_word (d_data d2h)) sH'. + Proof. + intros. remember (device.maxRespDelay sL h2d) as fuel. remember (S fuel) as B. + assert (device.maxRespDelay sL h2d <= fuel < B)%nat as HB by lia. clear HeqB Heqfuel. + revert fuel sH sL H H0 HB. + induction B; intros. + - exfalso. lia. + - destr fuel; cbn [device.runUntilResp]; destruct_one_match; + pose proof (state_machine_read_to_device_read_or_later + _ _ _ _ _ _ _ H H0 H1 H2 H3 H4 H5 E) as P; + (destruct_one_match; [destruct P as (sH' & R & V) | destruct P as (R & Decr)]). + + (* 0 remaining fuel, valid response: *) + clear -R V. eauto 10. + + (* 0 remaining fuel, no valid response: *) + exfalso. lia. + + (* some remaining fuel, valid response: *) + clear -R V. eauto 10. + + (* some remaining fuel, no valid response *) + edestruct IHB as (d2h & sL'' & sH'' & Run & Rel & St). + 1: eassumption. 1: exact R. 2: eauto 10. lia. + Qed. + + (* for each high-level state sH in which an n-byte write to register r with value v is possible, + if we run the low-level device with the write step's address and value on the input wires, + we will get an ack response after at most device.maxRespDelay device cycles, + and the device will end up in a state corresponding to a high-level state reached after + a high-level write, but not necessarily in the state we used to show that sH accepts writes *) + Lemma state_machine_write_to_device_write: forall log2_nbytes r v sH sL h2d, + (exists sH', state_machine.write_step (2 ^ log2_nbytes) sH r v sH') -> + device_state_related sH sL -> + a_valid h2d = true -> + a_opcode h2d = PutFullData -> + a_size h2d = N.of_nat log2_nbytes -> + a_address h2d = word_to_N (state_machine.reg_addr r) -> + a_data h2d = word_to_N v -> + d_ready h2d = true -> + exists ignored sL' sH', + device.runUntilResp h2d (device.maxRespDelay sL h2d) sL = (Some ignored, sL') /\ + device_state_related sH' sL' /\ + state_machine.write_step (2 ^ log2_nbytes) sH r v sH'. + Proof. + intros. remember (device.maxRespDelay sL h2d) as fuel. remember (S fuel) as B. + assert (device.maxRespDelay sL h2d <= fuel < B)%nat as HB by lia. clear HeqB Heqfuel. + revert fuel sH sL H H0 HB. + induction B; intros. + - exfalso. lia. + - destr fuel; cbn [device.runUntilResp]; destruct_one_match; + pose proof (state_machine_write_to_device_write_or_later + _ _ _ _ _ _ _ _ H H0 H1 H2 H3 H4 H5 H6 E) as P; + (destruct_one_match; [destruct P as (sH' & R & V) | destruct P as (R & Decr)]). + + (* 0 remaining fuel, valid response: *) + clear -R V. eauto 10. + + (* 0 remaining fuel, no valid response: *) + exfalso. lia. + + (* some remaining fuel, valid response: *) + clear -R V. eauto 10. + + (* some remaining fuel, no valid response *) + edestruct IHB as (d2h & sL'' & sH'' & Run & Rel & St). + 1: eassumption. 1: exact R. 2: eauto 10. lia. + Qed. + Inductive related: MetricRiscvMachine -> ExtraRiscvMachine D -> Prop := mkRelated: forall regs pc npc m xAddrs (t: list LogItem) t_ignored mc d s, execution t s -> @@ -314,11 +401,15 @@ Section WithParams. | E1: execution _ _, E2: execution _ _ |- _ => pose proof (execution_unique _ _ _ E1 E2); subst; clear E2 end. - 1-4: edestruct state_machine_read_to_device_read as (v'' & d'' & s'' & RU'' & Rel'' & RS''); - [do 2 eexists; match goal with - | H: state_machine.read_step ?n _ _ _ _ |- _ => - change n at 1 with (2 ^ (Nat.log2 n))%nat in H - end; eassumption|solve[eauto]|]. + 1-4: match goal with + | |- context[device.runUntilResp ?p _ _] => + edestruct state_machine_read_to_device_read with (h2d := p) + as (v'' & d'' & s'' & RU'' & Rel'' & RS''); + [do 2 eexists; match goal with + | H: state_machine.read_step ?n _ _ _ _ |- _ => + change n at 1 with (2 ^ (Nat.log2 n))%nat in H + end; eassumption|eassumption|reflexivity..|] + end. 1-4: cbn -[HList.tuple]; tlsimpl; simpl in RU''; rewrite RU''; cbn -[HList.tuple]. 4: { (* 64-bit MMIO is not supported: *) eapply state_machine.read_step_size_valid in HLp2p2. simpl in HLp2p2. exfalso. intuition congruence. @@ -349,13 +440,19 @@ Section WithParams. | E1: execution _ _, E2: execution _ _ |- _ => pose proof (execution_unique _ _ _ E1 E2); subst; clear E2 end. - 1-3: edestruct state_machine_write_to_device_write as (ignored & d' & s'' & RU & Rel' & WS'); - [eexists; match goal with - | H: state_machine.write_step ?n _ _ _ _ |- _ => - change n at 1 with (2 ^ (Nat.log2 n))%nat in H - end; eassumption|solve[eauto]|]. + 1-3: match goal with + | |- context[device.runUntilResp ?p _ _] => + edestruct state_machine_write_to_device_write with (h2d := p) + as (ignored & d' & s'' & RU & Rel' & WS'); + [eexists; match goal with + | H: state_machine.write_step ?n _ _ _ _ |- _ => + change n at 1 with (2 ^ (Nat.log2 n))%nat in H + end; eassumption + |eassumption + |rewrite ? Z_word_N in * by lia; try reflexivity..] + end. 1-3: cbn -[HList.tuple Primitives.invalidateWrittenXAddrs]; - tlsimpl; simpl in RU; rewrite Z_word_N in RU by lia; rewrite RU; + tlsimpl; simpl in RU; rewrite RU; cbn -[HList.tuple Primitives.invalidateWrittenXAddrs]. 1-3: eauto 15 using mkRelated, execution_write_cons, preserve_disjoint_of_invalidateXAddrs. diff --git a/silveroak-opentitan/hmac/end2end/CavaHmacDevice.v b/silveroak-opentitan/hmac/end2end/CavaHmacDevice.v index 02153b8f8..8b0715aaa 100644 --- a/silveroak-opentitan/hmac/end2end/CavaHmacDevice.v +++ b/silveroak-opentitan/hmac/end2end/CavaHmacDevice.v @@ -31,7 +31,7 @@ Section WithParameters. device.addr_range_pastend := TOP_EARLGREY_HMAC_BASE_ADDR + HMAC_MSG_FIFO_REG_OFFSET + HMAC_MSG_FIFO_SIZE_BYTES; - device.maxRespDelay := 1; + device.maxRespDelay s h2d := 1%nat; |}. Global Instance hmac_timing: timing := { max_negative_done_polls := 16; From a5fec37c782d2e9f3e6eb262c0888db0891e5b33 Mon Sep 17 00:00:00 2001 From: Samuel Gruetter Date: Tue, 9 Nov 2021 19:35:20 -0500 Subject: [PATCH 4/4] sketch hmac_top_invariant and some tlul state machine --- .../end2end/Bedrock2StateMachineToCavaHmac.v | 221 ++++++++++++++++++ silveroak-opentitan/hmac/sw/HmacSemantics.v | 35 ++- 2 files changed, 245 insertions(+), 11 deletions(-) create mode 100644 silveroak-opentitan/hmac/end2end/Bedrock2StateMachineToCavaHmac.v diff --git a/silveroak-opentitan/hmac/end2end/Bedrock2StateMachineToCavaHmac.v b/silveroak-opentitan/hmac/end2end/Bedrock2StateMachineToCavaHmac.v new file mode 100644 index 000000000..18df72856 --- /dev/null +++ b/silveroak-opentitan/hmac/end2end/Bedrock2StateMachineToCavaHmac.v @@ -0,0 +1,221 @@ +(****************************************************************************) +(* Copyright 2021 The Project Oak Authors *) +(* *) +(* Licensed under the Apache License, Version 2.0 (the "License") *) +(* you may not use this file except in compliance with the License. *) +(* You may obtain a copy of the License at *) +(* *) +(* http://www.apache.org/licenses/LICENSE-2.0 *) +(* *) +(* Unless required by applicable law or agreed to in writing, software *) +(* distributed under the License is distributed on an "AS IS" BASIS, *) +(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) +(* See the License for the specific language governing permissions and *) +(* limitations under the License. *) +(****************************************************************************) + +Require Import Coq.Arith.PeanoNat. +Require Import Coq.micromega.Lia. +Require Import Coq.Lists.List. +Require Import Coq.ZArith.ZArith. + +Require Import coqutil.Tactics.Tactics. +Require Import coqutil.Datatypes.Prod. + +Require Import Cava.Util.Byte. +Require Import Cava.Types. +Require Import Cava.Expr. +Require Import Cava.Primitives. +Require Import Cava.TLUL. +Require Import Cava.Invariant. +Require Import Cava.Primitives. +Require Import Cava.Semantics. +Require Import Cava.Expr. +Require Import Cava.ExprProperties. +Require Import Cava.Util.Tactics. +Require Import Cava.Util.List. +Require Import Cava.Util.If. +Require Import Cava.Util.Nat. +Require Import Cava.Util.Byte. +Require Import coqutil.Word.Interface coqutil.Word.Properties. +Require Import coqutil.Word.LittleEndianList. +Require Import HmacHardware.Hmac. +Require Import HmacHardware.Sha256. +Require HmacSoftware.HmacSemantics. + +Import ListNotations. + +Axiom hmac_repr: Type. +(* Note: hmac_repr might/should be an Inductive, so probably no hmac_repr_msg getter will + exists, but we will need to do a match, and assert False in the other cases *) +Axiom hmac_repr_msg: hmac_repr -> list Byte.byte. +Axiom hmac_repr_max_cycles_until_done: hmac_repr -> nat. + +Instance hmac_invariant: invariant_for hmac hmac_repr. Admitted. + +(* TLUL communication state *) +Inductive tlul_protocol_state := +(* host is not interested in talking to device at the moment *) +| HostNotInterested +(* host wants to talk to device and is waiting for device to become ready *) +| HostWaitingForReady(maxDelay: nat) +(* host can and will send a packet in the next cycle *) +| HostCanSend +(* host sent a message to device and is waiting for a reply from device *) +| HostWaitingForReply(packet: tl_h2d)(maxDelay: nat). + +Definition tlul_state_step(s1 s2: tlul_protocol_state): Prop := + match s1 with + | HostNotInterested => + s2 = HostNotInterested \/ + exists maxDelay, s2 = HostWaitingForReady maxDelay + | HostWaitingForReady maxDelay => + (exists maxDelay', maxDelay = S maxDelay' /\ s2 = HostWaitingForReady maxDelay') \/ + s2 = HostCanSend + | HostCanSend => exists packet maxDelay, s2 = HostWaitingForReply packet maxDelay + | HostWaitingForReply packet maxDelay => + (exists maxDelay', maxDelay = S maxDelay' /\ s2 = HostWaitingForReply packet maxDelay') \/ + s2 = HostNotInterested + end. + +Definition a_valid_of_current_tlul_state(s: tlul_protocol_state): bool := + match s with + | HostNotInterested => false + | HostWaitingForReady maxDelay => false + | HostCanSend => true + | HostWaitingForReply packet maxDelay => false + end. + +Definition d_ready_of_current_tlul_state(s: tlul_protocol_state): bool := + match s with + | HostNotInterested => false + | HostWaitingForReady maxDelay => false + | HostCanSend => true + | HostWaitingForReply packet maxDelay => true + end. + +Definition d_valid_of_current_and_next_tlul_state(s1 s2: tlul_protocol_state): bool := + match s1 with + | HostWaitingForReply packet maxDelay => + match s2 with + | HostWaitingForReply packet' maxDelay' => false + | _ => true + end + | _ => false + end. + +(* If the host and device both behave correctly, then each device cycle that takes in + h2d and outputs d2h (ie `step circuit circuit_state1 h2d = (circuit_state2, d2h)`) + corresponds to a transition in the following transition system: *) +Definition tlul_step(s1: tlul_protocol_state)(h2d: tl_h2d) + (s2: tlul_protocol_state)(d2h: tl_d2h): Prop := + a_valid h2d = a_valid_of_current_tlul_state s1 /\ + d_ready h2d = d_ready_of_current_tlul_state s1 /\ + d_valid d2h = d_valid_of_current_and_next_tlul_state s1 s2 /\ + match s1 with + | HostNotInterested => + match s2 with + | HostNotInterested => True + | HostWaitingForReady maxDelay => a_ready d2h = false + | HostCanSend => a_ready d2h = true + | HostWaitingForReply packet maxDelay => False + end + | HostWaitingForReady maxDelay => + match s2 with + | HostNotInterested => False + | HostWaitingForReady maxDelay' => a_ready d2h = false /\ maxDelay = S maxDelay' + | HostCanSend => a_ready d2h = true + | HostWaitingForReply packet maxDelay => False + end + | HostCanSend => + match s2 with + | HostWaitingForReply packet maxDelay => packet = h2d + | _ => False + end + | HostWaitingForReply packet maxDelay => + match s2 with + | HostNotInterested => True + | HostWaitingForReady maxDelay' => a_ready d2h = false + | HostCanSend => a_ready d2h = true + | HostWaitingForReply packet' maxDelay' => packet' = packet /\ maxDelay = S maxDelay' + end + end. + +Section WithParams. + Context {word: word 32} {word_ok: word.ok word}. + + Definition REG_DIGEST_0: nat. + let r := eval unfold HmacHardware.Hmac.REG_DIGEST_0 in HmacHardware.Hmac.REG_DIGEST_0 in + lazymatch r with + | Constant _ ?v => exact (N.to_nat v) + end. + Defined. + + Definition REG_CFG: nat. + let r := eval unfold HmacHardware.Hmac.REG_CFG in HmacHardware.Hmac.REG_CFG in + lazymatch r with + | Constant _ ?v => exact (N.to_nat v) + end. + Defined. + + Definition REG_INTR_STATE: nat := 0. + Definition REG_INTR_ENABLE: nat := 1. + + Definition N_le_word_list_to_byte_list: list N -> list Byte.byte := + List.flat_map (fun n => le_split 4 (Z.of_N n)). + + Definition get_hl_config(regs: list N): HmacSemantics.idle_data := {| + HmacSemantics.intr_enable := word.of_Z (Z.of_N (List.nth REG_INTR_ENABLE regs 0%N)); + HmacSemantics.hmac_done := N.testbit 0 (List.nth REG_INTR_ENABLE regs 0%N); + HmacSemantics.hmac_en := N.testbit 0 (List.nth REG_CFG regs 0%N); + HmacSemantics.sha_en := N.testbit 1 (List.nth REG_CFG regs 0%N); + HmacSemantics.swap_endian := N.testbit 2 (List.nth REG_CFG regs 0%N); + HmacSemantics.swap_digest := N.testbit 3 (List.nth REG_CFG regs 0%N); + |}. + + Instance hmac_top_invariant: invariant_for hmac_top HmacSemantics.state := + fun (circuit_state: denote_type hmac_top_state) (stm_state: HmacSemantics.state) => + let '((wasfull, (d2h, regs)), (tlul_st, hmac_st)) := circuit_state in + exists hmac_r: hmac_repr, hmac_invariant hmac_st hmac_r /\ + match stm_state with + | HmacSemantics.IDLE digest config => + digest = N_le_word_list_to_byte_list (List.firstn 8 (List.skipn REG_DIGEST_0 regs)) /\ + config = get_hl_config regs + | HmacSemantics.CONSUMING buf => + get_hl_config regs = HmacSemantics.sha_default_cfg /\ + hmac_repr_msg hmac_r = buf + | HmacSemantics.PROCESSING buf ncycles => + get_hl_config regs = HmacSemantics.sha_default_cfg /\ + hmac_repr_msg hmac_r = buf /\ + Z.of_nat (hmac_repr_max_cycles_until_done hmac_r) = ncycles + end. + + Instance hmac_top_specification: specification_for hmac_top HmacSemantics.state := {| + reset_repr := HmacSemantics.IDLE (List.repeat Byte.x00 32) HmacSemantics.zero_cfg; + precondition h2d st := True; + update_repr h2d st := + match st with + | HmacSemantics.IDLE digest config => st + | HmacSemantics.CONSUMING buf => st + | HmacSemantics.PROCESSING buf ncycles => st + end; + postcondition h2d st d2h := True; + |}. + + Lemma hmac_top_invariant_at_reset: invariant_at_reset hmac_top. + Admitted. + + Lemma hmac_top_invariant_preserved: invariant_preserved hmac_top. + Proof. + simplify_invariant hmac_top. cbn [absorb_any]. + simplify_spec hmac_top. + intros input state repr new_repr. + Admitted. + + Lemma hmac_top_output_correct: output_correct hmac_top. + Proof. + simplify_invariant hmac_top. simplify_spec hmac_top. + intros input state repr new_repr. + Admitted. + +End WithParams. diff --git a/silveroak-opentitan/hmac/sw/HmacSemantics.v b/silveroak-opentitan/hmac/sw/HmacSemantics.v index 93ea8a241..a4bf505c7 100644 --- a/silveroak-opentitan/hmac/sw/HmacSemantics.v +++ b/silveroak-opentitan/hmac/sw/HmacSemantics.v @@ -50,6 +50,24 @@ Section WithParams. | CONSUMING(sha_buffer: list byte) | PROCESSING(sha_buffer: list byte)(max_cycles_until_done: Z). + Definition sha_default_cfg := {| + hmac_done := false; + intr_enable := word.of_Z 0; + hmac_en := false; + sha_en := true; + swap_endian := true; + swap_digest := false; + |}. + + Definition zero_cfg := {| + hmac_done := false; + intr_enable := word.of_Z 0; + hmac_en := false; + sha_en := false; + swap_endian := false; + swap_digest := false; + |}. + Inductive read_step: nat -> state -> word -> word -> state -> Prop := | read_done_bit_not_done: forall b v n, 0 < n -> @@ -109,17 +127,12 @@ Section WithParams. swap_digest := swap_digest s; |}) | write_hash_start: forall d v, v = word.of_Z (Z.shiftl 1 HMAC_CMD_HASH_START_BIT) -> - write_step 4 (IDLE d (* Here one can see that we only model a subset of the features of - the HMAC module: in our model, starting the computation is only - possible from the specific configuration below. - But using an HMAC module with more features than what we expose - to the software is safe, so modeling only a subset is not a problem. *) - {| hmac_done := false; - intr_enable := word.of_Z 0; - hmac_en := false; - sha_en := true; - swap_endian := true; - swap_digest := false; |}) + (* Here one can see that we only model a subset of the features of + the HMAC module: in our model, starting the computation is only + possible from the sha_default_cfg configuration. + But using an HMAC module with more features than what we expose + to the software is safe, so modeling only a subset is not a problem. *) + write_step 4 (IDLE d sha_default_cfg) (word.of_Z (TOP_EARLGREY_HMAC_BASE_ADDR + HMAC_CMD_REG_OFFSET)) v (CONSUMING []) | write_byte: forall bs bs' v,