diff --git a/rauc-bundle/compat-install-check-script.nix b/rauc-bundle/compat-install-check-script.nix new file mode 100644 index 00000000..9031dbee --- /dev/null +++ b/rauc-bundle/compat-install-check-script.nix @@ -0,0 +1,83 @@ +# This is a template for a RAUC install-check hook that can be used to perform +# compatibility adjustments to the system in "emergency" situations. +# +# The script is expected to exit with a failing status to always abort the +# installation of the dummy RAUC bundle, which itself is without utility. +# +# By creating a PlayOS release that uses this hook, it is possible to perform +# compatibility fixes to the "fleet" as part of the update process, without +# having remote access to the machines. +{ pkgs ? import ../pkgs {} }: +let + # Note: all tools used here must be in PATH (i.e. part of environment.systemPackages) + # in the BOOTED system! Do NOT use references like "${pkgs.gawk}/bin/awk", + # because they would point to non-existant nix packages. + # + # As a last resort, you can attempt to locate binaries in e.g. `/run/current-system/sw/bin` + compatScriptChecked = pkgs.writeShellApplication { + name = "compat-install-script"; + text = '' + # Ensure the script always exits with an exit code >10 + # even on unexpected failures. This is neede + # because that indicates to RAUC to abort installation. + trap "exit 101" EXIT + + echo "== Running compat install-check script" + + if ! [[ "''${1:-}" == "install-check" ]]; then + echo "Expected to be run with 'install-check'" + exit 1 + fi + + echo "== Step 1: Figuring out slot names of booted and other" + + # `rauc status` cannot be used during installation and `RAUC_SLOT_*` env + # variables are not provided for install-check hooks, so we determine the + # "booted" and "other" systems by analyzing the mountpoints. + other_system=$(lsblk -o LABEL,MOUNTPOINTS -P | grep 'LABEL="system.' | grep 'MOUNTPOINTS=""' | cut -f2 -d'"') || echo "" + booted_system="" + + if [[ "$other_system" == "system.a" ]]; then + booted_system="system.b" + elif [[ "$other_system" == "system.b" ]]; then + booted_system="system.a" + else + echo "Failed to determine other system (other_system='$other_system'), lsblk output:" + lsblk -o LABEL,MOUNTPOINTS || true + exit 101 + fi + + echo "Booted system is: $booted_system" + echo "Other system is: $other_system" + echo "" + + # `export` to avoid unused var error in shellcheck + export other_system_disk=/dev/disk/by-label/$other_system + export booted_system_disk=/dev/disk/by-label/$booted_system + + echo "== Step 2: Performing compat fixes" + + ### + ### DEFINE COMPAT STEPS HERE + ### + ### Use the {booted|other}_system and {booted|other}_system_disk variables + ### as needed. + + echo "== Step N: Make sure the scripts fails" + echo "Applied compatibility settings, waiting for next update" 1>&2 + exit 101 + ''; + }; +in +pkgs.runCommand + "compat-install-script-local.sh" + { + allowedReferences = []; # avoid accidentally referring to any nix package + } + # Replace shebang on first line with #!/bin/sh - this will run using the + # host system's packages, not the packages from the system image! + # Note: /bin/sh is an alias for bash on nixOS + '' + cp "${pkgs.lib.getExe compatScriptChecked}" $out + sed -i '1 s|^.*$|#!/bin/sh|' $out + '' diff --git a/rauc-bundle/default.nix b/rauc-bundle/default.nix index d1c8f941..134b703c 100644 --- a/rauc-bundle/default.nix +++ b/rauc-bundle/default.nix @@ -6,9 +6,6 @@ , systemImage , closureInfo , pkgs -# See comment below for compat details. These are treated as version prefixes, -# so will also match -VALIDATION, -TEST, and etc. -, versionsRequiringCompatScript ? [ "2025.3.0" "2025.3.1" "2025.3.2" ] }: let @@ -18,105 +15,7 @@ let systemClosureInfo = closureInfo { rootPaths = [ systemImage ]; }; - # This script works around an incompatibility between the GRUB version used up - # to and including PlayOS 2023.2.0 and the mkfs default configuration used by - # PlayOS 2025.3.{0,1,2}* when installing an update via RAUC. Versions - # 2025.3.{0,1,2}* would produce an incompatible update with any RAUC update - # bundle, even if the system in the bundle has a fixed update routine itself. - # - # We therefore expect to keep this compat script in use for the foreseeable - # future, at least until no traces of incompatible PlayOS versions can be - # found in usage logs. - # - # Note: all tools used here must be part of environment.systemPackages in the - # host system! - compatScriptChecked = pkgs.writeShellApplication { - name = "compat-script"; - text = '' - if ! [[ "''${1:-}" == "slot-post-install" ]]; then - echo "Expected to be run at phase 'slot-post-install'" - exit 1 - fi - - echo "== Checking if host system version requires compatibility fixes" - - echo "RAUC_SLOT_NAME: $RAUC_SLOT_NAME" - - # RAUC_CURRENT_BOOTNAME is not provided in this phase, so determine manually - booted_slot="" - if [[ $RAUC_SLOT_NAME == system.a ]]; then - booted_slot="system.b" - elif [[ $RAUC_SLOT_NAME == system.b ]]; then - booted_slot="system.a" - else - echo "Invalid RAUC_SLOT_NAME: $RAUC_SLOT_NAME" - exit 1 - fi - - booted_slot_version=$(grep -A10 "\[slot.$booted_slot\]" /boot/status.ini | \ - grep -m 1 "bundle.version.*=" | \ - cut -d'=' -f2 | \ - tr -d '[:space:]') - echo "Detected host system version as $booted_slot_version" - - requires_compat=0 - - ${pkgs.lib.strings.toShellVar "compatVersions" versionsRequiringCompatScript} - for ver in "''${compatVersions[@]}"; do - if [[ $booted_slot_version == $ver* ]]; then - requires_compat=1 - break - fi - done - - if [[ requires_compat -eq 0 ]]; then - echo "Host system does not require compatibility fixes, exiting." - exit 0 - fi - - echo "== Running post-install system compatibility fixes" - - BAD_EXT4_OPTION=metadata_csum_seed - - echo "RAUC_SLOT_DEVICE: $RAUC_SLOT_DEVICE" - echo "RAUC_SLOT_MOUNT_POINT: $RAUC_SLOT_MOUNT_POINT" - - echo "== Checking for unsupported tune2fs options" - - # Perform the tuning - if tune2fs -l "$RAUC_SLOT_DEVICE" | grep "Filesystem features" | grep "$BAD_EXT4_OPTION"; then - - echo "Detect $BAD_EXT4_OPTION, attempting to fix" - - echo "Unmounting $RAUC_SLOT_MOUNT_POINT" - - umount "$RAUC_SLOT_MOUNT_POINT" - - echo "Removing $BAD_EXT4_OPTION from $RAUC_SLOT_DEVICE" - tune2fs -O ^"$BAD_EXT4_OPTION" "$RAUC_SLOT_DEVICE" - - echo "Re-mounting $RAUC_SLOT_DEVICE at $RAUC_SLOT_MOUNT_POINT" - - mount "$RAUC_SLOT_DEVICE" "$RAUC_SLOT_MOUNT_POINT" - - echo "Done!" - - else - echo "No $BAD_EXT4_OPTION detected" - fi - ''; - }; - - compatScript = pkgs.runCommand - "compat-script-local.sh" - { } - # Replace shebang on first line with #!/bin/sh - this will run using the - # host system's packages, not the packages from the system image! - # Note: /bin/sh is an alias for bash on nixOS - '' - cp "${pkgs.lib.getExe compatScriptChecked}" $out - sed -i '1 s|^.*$|#!/bin/sh|' $out - ''; + compatInstallCheckScript = (import ./compat-install-check-script.nix) { inherit pkgs; }; in stdenv.mkDerivation { name = "bundle-${version}.raucb"; @@ -128,23 +27,18 @@ stdenv.mkDerivation { mkdir -p system cd system - # Copy store content - mkdir -p nix/store - for i in $(< ${systemClosureInfo}/store-paths); do - cp -a "$i" ".$i" - done - - # copy initrd, kernel and init - cp -a "${systemImage}/initrd" initrd - cp -a "${systemImage}/kernel" kernel - cp -a "${systemImage}/init" init + # The install-check script is expected to fail and abort the installation, but + # in order to produce a valid bundle we crete a system image with a single + # empty file. + touch empty-system mkdir -p ../rauc-bundle time tar --sort=name --mtime='@1' --owner=0 --group=0 --numeric-owner -c * | pixz > ../rauc-bundle/system.tar.xz cd .. - cp ${compatScript} rauc-bundle/compat-fix.sh + # Add the install-check script to the bundle + cp ${compatInstallCheckScript} rauc-bundle/install-check.sh cat < ./rauc-bundle/manifest.raucm [update] @@ -153,10 +47,10 @@ stdenv.mkDerivation { [image.system] filename=system.tar.xz - hooks=post-install [hooks] - filename=compat-fix.sh + filename=install-check.sh + hooks=install-check EOF time rauc \ diff --git a/testing/end-to-end/tests/base/proxy-and-update-legacy.nix b/testing/end-to-end/tests/base/proxy-and-update-legacy.nix deleted file mode 100644 index 73cce20c..00000000 --- a/testing/end-to-end/tests/base/proxy-and-update-legacy.nix +++ /dev/null @@ -1,5 +0,0 @@ -# Runs the same test as proxy-and-update, but in "legacy" mode that assumes -# problematic older PlayOS systems (2025.3.{0,1,2}), where mke2fs has not been -# configured to exclude unsupported ext4 features. -args@{pkgs, ...}: -pkgs.callPackage ./proxy-and-update.nix (args // { legacyMode = true; }) diff --git a/testing/end-to-end/tests/base/proxy-and-update.nix b/testing/end-to-end/tests/base/proxy-and-update.nix index eb3594a8..55bfee64 100644 --- a/testing/end-to-end/tests/base/proxy-and-update.nix +++ b/testing/end-to-end/tests/base/proxy-and-update.nix @@ -1,8 +1,6 @@ { pkgs, qemu, disk, overlayPath, safeProductName, updateUrl, version, - legacyMode ? false, # disable ext4 compatibility configuration, this mode is - # enabled in proxy-and-update-legacy.nix ... }: let @@ -53,15 +51,6 @@ let nextVersionBundle = pkgs.callPackage (playosRoot + "/rauc-bundle/default.nix") { version = nextVersion; systemImage = minimalTestSystem; - - versionsRequiringCompatScript = [ - "1.0.0" # a second version just to check array / looping works - - (if legacyMode then - version # mark our own version as legacy - else - "1.0.1") # 1.0.1 is non-existant - ]; }; in pkgs.testers.runNixOSTest { @@ -130,7 +119,6 @@ pkgs.testers.runNixOSTest { '' ${builtins.readFile ../../../helpers/nixos-test-script-helpers.py} ${builtins.readFile ./proxy-and-update-helpers.py} - import json product_name = "${safeProductName}" current_version = "1.1.1-TESTMAGIC" @@ -140,28 +128,10 @@ pkgs.testers.runNixOSTest { proxy_url = "http://${nodes.sidekick.networking.primaryIPAddress}:8888" - is_legacy_mode = bool(${toString legacyMode}) # `toString false` returns "" - bad_ext4_option = "metadata_csum_seed" - create_overlay("${disk}", "${overlayPath}") playos.start(allow_reboot=True) sidekick.start() - with TestCase("Installer produced a disk without incompatible FS features") as t: - playos.wait_for_unit("local-fs.target") - features = playos.succeed('tune2fs -l "/dev/disk/by-label/system.a" | grep "Filesystem features"') - for bad_opt in ["metadata_csum_seed", "orphan_file"]: - t.assertNotIn(bad_opt, features, f"ext4 was formatted with {bad_opt} by install-playos") - - if is_legacy_mode: - with TestPrecondition("Setup legacy mode"): - # enable all disabled features - playos.succeed("cat /etc/mke2fs.conf | tr -d '^' > /tmp/mke2fs.conf") - playos.succeed("mount --bind /tmp/mke2fs.conf /etc/mke2fs.conf") - # RAUC does not see the bind mount unless restarted - playos.wait_for_unit("rauc.service") - playos.systemctl("restart rauc.service") - ### === Stub Update server setup with TestPrecondition("Stub update server is started"): @@ -217,7 +187,7 @@ pkgs.testers.runNixOSTest { unit="playos-controller.service", timeout=61) - with TestCase("Controller installs the new upstream version") as t: + with TestCase("controller attempts to install the bundle, but aborts due to install-check") as t: next_version = "${nextVersion}" update_server.add_bundle(next_version, filepath="${nextVersionBundle}") @@ -230,7 +200,7 @@ pkgs.testers.runNixOSTest { expected_states = [ "Downloading", f"Installing.*{update_server.bundle_filename(next_version)}", - "RebootRequired" + "ErrorInstalling" ] for state in expected_states: @@ -241,56 +211,15 @@ pkgs.testers.runNixOSTest { # a 600 MB bundle will take at least 60s timeout=75) - with TestCase("RAUC status confirms the installation") as t: - rauc_status = json.loads(playos.succeed( - "rauc status --detailed --output-format=json" - )) - t.assertEqual( - rauc_status['boot_primary'], - "system.b", - "RAUC installation did not change boot primary other (i.e. system.b) slot" - ) - - b_slot = [s for s in rauc_status['slots'] if "system.b" in s][0]['system.b'] - slot_bundle_version = b_slot['slot_status']['bundle']['version'] - t.assertEqual( - slot_bundle_version, - next_version, - "Installed bundle does not have correct version" - ) - with TestCase("No raucb files left post-install") as t: playos.fail("ls /tmp/*.raucb") - target_disk = "/dev/disk/by-label/system.b" - - with TestCase("RAUC post-install hook ran and performed compatibility fixes") as t: - wait_for_logs(playos, "Checking if host system version requires compatibility fixes", unit="rauc.service") - wait_for_logs(playos, "Detected host system version as ${version}", unit="rauc.service") + with TestCase("compat fixes have run as part of install-check") as t: + wait_for_logs(playos, "== Running compat install-check script", unit="rauc.service") + wait_for_logs(playos, "Booted system is:.*system.a", unit="rauc.service") + wait_for_logs(playos, "Other system is:.*system.b", unit="rauc.service") - if is_legacy_mode: - wait_for_logs(playos, f"Removing {bad_ext4_option} from {target_disk}", unit="rauc.service") - wait_for_logs(playos, f"Re-mounting {target_disk}", unit="rauc.service") - else: - wait_for_logs(playos, "Host system does not require compatibility fixes", unit="rauc.service") - - - with TestCase("RAUC install produced a compatible filesystem") as t: - features = playos.succeed( - f'tune2fs -l "{target_disk}" | grep "Filesystem features"') - t.assertNotIn(bad_ext4_option, features, - f"ext4 was formatted with {bad_ext4_option}") - - - with TestCase("System boots into the new bundle") as t: - playos.shutdown() - playos.start() - playos.wait_for_unit('multi-user.target') - out_version = playos.succeed("cat /etc/PLAYOS_VERSION").strip() - t.assertEqual( - out_version, - next_version, - "Did not boot into the installed bundle?" - ) + ## you can define additional assertions for testing the install-check + ## script's side-effects here ''; }