From d2fea56d2d87ea38df99089a918a52a42ef817d9 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Mon, 21 Jul 2025 21:00:18 +0200 Subject: [PATCH 01/47] fix: save persistent --- hil/src/commands/mod.rs | 2 ++ hil/src/flash.rs | 7 ++++++ hil/src/main.rs | 2 ++ scripts/upload-certs.sh | 51 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 57 insertions(+), 5 deletions(-) diff --git a/hil/src/commands/mod.rs b/hil/src/commands/mod.rs index 09692f3c8..a65bab9d4 100644 --- a/hil/src/commands/mod.rs +++ b/hil/src/commands/mod.rs @@ -2,6 +2,7 @@ mod button_ctrl; mod cmd; +mod fetch_persistent; mod flash; mod login; mod mcu; @@ -9,6 +10,7 @@ mod reboot; pub use self::button_ctrl::ButtonCtrl; pub use self::cmd::Cmd; +pub use self::fetch_persistent::FetchPersistent; pub use self::flash::Flash; pub use self::login::Login; pub use self::mcu::Mcu; diff --git a/hil/src/flash.rs b/hil/src/flash.rs index 2d41b929d..457bc22bf 100644 --- a/hil/src/flash.rs +++ b/hil/src/flash.rs @@ -73,8 +73,15 @@ fn flash_cmd(variant: FlashVariant, extracted_dir: &Path) -> Result<()> { ); let cmd_file_name = variant.file_name(); + + // Remove the fetch persistent commands from flash script before executing + let flash_script_path = bootloader_dir.join(cmd_file_name); let result = run_cmd! { cd $bootloader_dir; + info "Removing fetch persistent commands from flash script"; + sed -i "/# This file should be templated to add a '--cmd' option/d" $flash_script_path; + sed -i "/--cmd.*read PERSISTENT.*reboot recovery/d" $flash_script_path; + sed -i "/wld-pre-flash-check.sh/d" $flash_script_path; info running $cmd_file_name; bash $cmd_file_name; info finished flashing!; diff --git a/hil/src/main.rs b/hil/src/main.rs index 6d7c60541..664f22a13 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -26,6 +26,7 @@ struct Cli { enum Commands { ButtonCtrl(crate::commands::ButtonCtrl), Cmd(crate::commands::Cmd), + FetchPersistent(crate::commands::FetchPersistent), Flash(crate::commands::Flash), Login(crate::commands::Login), Mcu(crate::commands::Mcu), @@ -62,6 +63,7 @@ async fn main() -> Result<()> { match args.commands { Commands::ButtonCtrl(c) => c.run().await, Commands::Cmd(c) => c.run().await, + Commands::FetchPersistent(c) => c.run().await, Commands::Flash(c) => c.run().await, Commands::Login(c) => c.run().await, Commands::Mcu(c) => c.run().await, diff --git a/scripts/upload-certs.sh b/scripts/upload-certs.sh index 54889c1b4..e8d19e23e 100755 --- a/scripts/upload-certs.sh +++ b/scripts/upload-certs.sh @@ -3,7 +3,7 @@ set -o errexit # abort on nonzero exit status set -o errtrace # pass ERR trap down to functions, substitutions, etc set -o nounset # abort on unbound variable -set -o pipefail # don’t hide errors within pipes +set -o pipefail # don't hide errors within pipes # Function to display usage information usage() { @@ -14,6 +14,7 @@ usage() { -t, --token Bearer token for authentication. -b, --backend (stage|prod) Targets the stage or prod backend. -s, --short Short upload (skip attestation cert). + -n, --dry-run Print key values without making curl requests. Environment variables (overriden by options): FM_CLI_ENV: Must be either 'stage' or 'prod'. @@ -36,6 +37,7 @@ main() { local backend="${FM_CLI_ENV:-""}" local positional_args=() local short=0 + local dry_run=0 local arg while [[ "$#" -gt 0 ]]; do arg="${1}"; shift @@ -48,6 +50,8 @@ main() { backend="${1}"; shift ;; -s|--short) short=1 ;; + -n|--dry-run) + dry_run=1 ;; -*) echo "Unknown option: ${arg}" usage; exit 1 ;; @@ -63,19 +67,19 @@ main() { exit 1 fi - if [[ -z "${bearer}" ]]; then + if [[ -z "${bearer}" ]] && [[ ${dry_run} -eq 0 ]]; then echo "Bearer token not found. Please export FM_CLI_ORB_MANAGER_INTERNAL_TOKEN, or pass it as an argument: -t " exit 1 fi - if [[ -z "${backend}" ]]; then + if [[ -z "${backend}" ]] && [[ ${dry_run} -eq 0 ]]; then echo "Environment not found. Please export FM_CLI_ENV, or pass it as an argument: -b (stage|prod)" exit 1 fi - if [[ "${backend}" != "prod" && "${backend}" != "stage" ]]; then + if [[ "${backend}" != "prod" && "${backend}" != "stage" ]] && [[ ${dry_run} -eq 0 ]]; then echo "Invalid environment: ${backend}. Must be either 'prod' or 'stage'." exit 1 fi @@ -97,6 +101,44 @@ main() { exit 1 fi + if [[ ${dry_run} -eq 1 ]]; then + echo "=== DRY RUN MODE - Key Values ===" + echo "Orb ID: ${orb_id}" + echo "Keypath: ${keypath}" + + # Print signup key values + echo "=== Signup Key ===" + local signup_pubkey + signup_pubkey=$(sed 's/$/\\n/' "${keypath}/sss_70000002_0002_0040.bin" | tr -d \\n) + echo "Key: ${signup_pubkey}" + echo "Signature: $(base64 -w 0 "${keypath}/70000002.signature.raw")" + echo "Extra Data: $(base64 -w 0 "${keypath}/70000002.extra.raw")" + + # Print attestation key values + echo "=== Attestation Key ===" + local attestation_pubkey + attestation_pubkey=$(sed 's/$/\\n/' "${keypath}/sss_70000001_0002_0040.bin" | tr -d \\n) + echo "Key: ${attestation_pubkey}" + echo "Signature: $(base64 -w 0 "${keypath}/70000001.signature.raw")" + echo "Extra Data: $(base64 -w 0 "${keypath}/70000001.extra.raw")" + + # Print chip ID values + echo "=== Chip ID ===" + echo "Key: $(base64 -w 0 "${keypath}/7fff0206.chip_id.raw")" + echo "Signature: $(base64 -w 0 "${keypath}/7fff0206.signature.raw")" + echo "Extra Data: $(base64 -w 0 "${keypath}/7fff0206.extra.raw")" + + # Print certificate if not short mode + if [[ ${short} -eq 0 ]]; then + echo "=== Certificate ===" + local certificate + certificate=$(sed 's/$/\\n/' "${keypath}/f0000013.cert" | tr -d \\n) + echo "Certificate: ${certificate}" + fi + + exit 0 + fi + echo "Getting Cloudflared access token..." local cf_token cf_token="$(get_cloudflared_token "${domain}")" @@ -163,4 +205,3 @@ main() { if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then main "$@" fi - From 9a55d7c4b2e3a71ab7b3116a392962a13f273fe7 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 9 Dec 2025 15:10:47 -0800 Subject: [PATCH 02/47] fix: replace mcu-reboot with button reboot --- hil/src/commands/ota/mod.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 5afbd4b50..272b28c18 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -1,6 +1,8 @@ use std::path::PathBuf; use std::time::Instant; +use crate::boot::{reboot}; +use crate::ssh_wrapper::{AuthMethod, SshConnectArgs, SshWrapper}; use clap::Parser; use color_eyre::{ eyre::{bail, WrapErr}, @@ -103,7 +105,12 @@ impl Ota { })?; info!("Overlays wiped successfully, rebooting device"); - system::reboot_orb(&session).await?; + reboot(false, None).await.wrap_err_with(|| { + format!( + "failed to reboot into", + ) + })?; + info!("Reboot command sent to Orb device"); let new_session = From 7973a7050304ac2f147aaa8430ca8ed11ab0456f Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 9 Dec 2025 15:21:16 -0800 Subject: [PATCH 03/47] fix: remove upload.sh --- scripts/upload-certs.sh | 207 ---------------------------------------- 1 file changed, 207 deletions(-) delete mode 100755 scripts/upload-certs.sh diff --git a/scripts/upload-certs.sh b/scripts/upload-certs.sh deleted file mode 100755 index e8d19e23e..000000000 --- a/scripts/upload-certs.sh +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit # abort on nonzero exit status -set -o errtrace # pass ERR trap down to functions, substitutions, etc -set -o nounset # abort on unbound variable -set -o pipefail # don't hide errors within pipes - -# Function to display usage information -usage() { - echo "Usage: $0 [OPTIONS] - - Options: - -h, --help Display this help message - -t, --token Bearer token for authentication. - -b, --backend (stage|prod) Targets the stage or prod backend. - -s, --short Short upload (skip attestation cert). - -n, --dry-run Print key values without making curl requests. - - Environment variables (overriden by options): - FM_CLI_ENV: Must be either 'stage' or 'prod'. - FM_CLI_ORB_AUTH_INTERNAL_TOKEN: Bearer token for authentication. - - Example: - $0 -t -b stage 349df8b0 /path/to/provisioning_material" -} - -# Function to get Cloudflared access token -get_cloudflared_token() { - local -r domain="${1}" - - cloudflared access login --quiet "${domain}" - cloudflared access token -app="${domain}" -} - -main() { - local bearer="${FM_CLI_ORB_AUTH_INTERNAL_TOKEN:-""}" - local backend="${FM_CLI_ENV:-""}" - local positional_args=() - local short=0 - local dry_run=0 - local arg - while [[ "$#" -gt 0 ]]; do - arg="${1}"; shift - case "${arg}" in - -h|--help) - usage; exit 0 ;; - -t|--bearer-token) - bearer="${1}"; shift ;; - -b|--backend) - backend="${1}"; shift ;; - -s|--short) - short=1 ;; - -n|--dry-run) - dry_run=1 ;; - -*) - echo "Unknown option: ${arg}" - usage; exit 1 ;; - *) - positional_args+=("${arg}") ;; - esac - done - set -- "${positional_args[@]}" - - if [[ $# -ne 2 ]]; then - echo "must pass " - usage - exit 1 - fi - - if [[ -z "${bearer}" ]] && [[ ${dry_run} -eq 0 ]]; then - echo "Bearer token not found. Please export FM_CLI_ORB_MANAGER_INTERNAL_TOKEN, - or pass it as an argument: -t " - exit 1 - fi - - if [[ -z "${backend}" ]] && [[ ${dry_run} -eq 0 ]]; then - echo "Environment not found. Please export FM_CLI_ENV, - or pass it as an argument: -b (stage|prod)" - exit 1 - fi - - if [[ "${backend}" != "prod" && "${backend}" != "stage" ]] && [[ ${dry_run} -eq 0 ]]; then - echo "Invalid environment: ${backend}. Must be either 'prod' or 'stage'." - exit 1 - fi - - local -r orb_id="${1}" - local -r keypath="${2}" - - # Determine the domain based on the environment - local domain - if [[ "${backend}" == "prod" ]]; then - domain="auth.internal.orb.worldcoin.dev" - else - domain="auth.internal.stage.orb.worldcoin.dev" - fi - - # Ensure the keypath exists - if [[ ! -d "$keypath" ]]; then - echo "Error: Keypath directory '$keypath' does not exist." - exit 1 - fi - - if [[ ${dry_run} -eq 1 ]]; then - echo "=== DRY RUN MODE - Key Values ===" - echo "Orb ID: ${orb_id}" - echo "Keypath: ${keypath}" - - # Print signup key values - echo "=== Signup Key ===" - local signup_pubkey - signup_pubkey=$(sed 's/$/\\n/' "${keypath}/sss_70000002_0002_0040.bin" | tr -d \\n) - echo "Key: ${signup_pubkey}" - echo "Signature: $(base64 -w 0 "${keypath}/70000002.signature.raw")" - echo "Extra Data: $(base64 -w 0 "${keypath}/70000002.extra.raw")" - - # Print attestation key values - echo "=== Attestation Key ===" - local attestation_pubkey - attestation_pubkey=$(sed 's/$/\\n/' "${keypath}/sss_70000001_0002_0040.bin" | tr -d \\n) - echo "Key: ${attestation_pubkey}" - echo "Signature: $(base64 -w 0 "${keypath}/70000001.signature.raw")" - echo "Extra Data: $(base64 -w 0 "${keypath}/70000001.extra.raw")" - - # Print chip ID values - echo "=== Chip ID ===" - echo "Key: $(base64 -w 0 "${keypath}/7fff0206.chip_id.raw")" - echo "Signature: $(base64 -w 0 "${keypath}/7fff0206.signature.raw")" - echo "Extra Data: $(base64 -w 0 "${keypath}/7fff0206.extra.raw")" - - # Print certificate if not short mode - if [[ ${short} -eq 0 ]]; then - echo "=== Certificate ===" - local certificate - certificate=$(sed 's/$/\\n/' "${keypath}/f0000013.cert" | tr -d \\n) - echo "Certificate: ${certificate}" - fi - - exit 0 - fi - - echo "Getting Cloudflared access token..." - local cf_token - cf_token="$(get_cloudflared_token "${domain}")" - - # Post attestation certificate - if [[ ${short} -eq 0 ]]; then - local certificate - certificate=$(sed 's/$/\\n/' "${keypath}/f0000013.cert" | tr -d \\n) - curl --fail --location \ - -H "Authorization: Bearer ${bearer}" \ - -H "cf-access-token: ${cf_token}" \ - -X POST "https://${domain}/api/v1/certificate" \ - -d '{ "orbId": "'"${orb_id}"'", "certificate": "'"${certificate}"'" }' - fi - - # Post signup key - local signup_pubkey - signup_pubkey=$(sed 's/$/\\n/' "${keypath}/sss_70000002_0002_0040.bin" | tr -d \\n) - curl --fail --location \ - -H "Authorization: Bearer ${bearer}" \ - -H "cf-access-token: ${cf_token}" \ - -X POST "https://${domain}/api/v1/key" \ - -d '{ - "orbId": "'"${orb_id}"'", - "type": "signup", - "key": "'"${signup_pubkey}"'", - "signature": "'$(base64 -w 0 "${keypath}/70000002.signature.raw")'", - "extraData": "'$(base64 -w 0 "${keypath}/70000002.extra.raw")'", - "active": true - }' - - # Post attestation key - local attestation_pubkey - attestation_pubkey=$(sed 's/$/\\n/' "${keypath}/sss_70000001_0002_0040.bin" | tr -d \\n) - curl --fail --location \ - -H "Authorization: Bearer ${bearer}" \ - -H "cf-access-token: ${cf_token}" \ - -X POST "https://${domain}/api/v1/key" \ - -d '{ - "orbId": "'"${orb_id}"'", - "type": "attestation", - "key": "'"${attestation_pubkey}"'", - "signature": "'$(base64 -w 0 "${keypath}/70000001.signature.raw")'", - "extraData": "'$(base64 -w 0 "${keypath}/70000001.extra.raw")'", - "active": true - }' - - # Post chip ID - curl --fail --location \ - -H "Authorization: Bearer ${bearer}" \ - -H "cf-access-token: ${cf_token}" \ - -X POST "https://${domain}/api/v1/key" \ - -d '{ - "orbId": "'"${orb_id}"'", - "type": "chipid", - "key": "'"$(base64 -w 0 "${keypath}/7fff0206.chip_id.raw")"'", - "signature": "'$(base64 -w 0 "${keypath}/7fff0206.signature.raw")'", - "extraData": "'$(base64 -w 0 "${keypath}/7fff0206.extra.raw")'", - "active": true - }' -} - -# Ensure that main only runs when called as a script -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - main "$@" -fi From a8d615b00ddb26e1cd24601371fcf16b4f3e797c Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 9 Dec 2025 15:47:24 -0800 Subject: [PATCH 04/47] fix: fmt --- hil/src/commands/ota/mod.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 272b28c18..da8dc7f95 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use std::time::Instant; -use crate::boot::{reboot}; +use crate::boot::reboot; use crate::ssh_wrapper::{AuthMethod, SshConnectArgs, SshWrapper}; use clap::Parser; use color_eyre::{ @@ -105,11 +105,9 @@ impl Ota { })?; info!("Overlays wiped successfully, rebooting device"); - reboot(false, None).await.wrap_err_with(|| { - format!( - "failed to reboot into", - ) - })?; + reboot(false, None) + .await + .wrap_err_with(|| format!("failed to reboot into",))?; info!("Reboot command sent to Orb device"); From 9fe311c7a0bd011e39aab320e676f856422c6615 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Thu, 11 Dec 2025 03:13:02 +0100 Subject: [PATCH 05/47] fix: clippy & build --- hil/src/commands/ota/mod.rs | 3 +-- hil/src/commands/ota/system.rs | 15 --------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index da8dc7f95..18f2a2662 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -2,7 +2,6 @@ use std::path::PathBuf; use std::time::Instant; use crate::boot::reboot; -use crate::ssh_wrapper::{AuthMethod, SshConnectArgs, SshWrapper}; use clap::Parser; use color_eyre::{ eyre::{bail, WrapErr}, @@ -107,7 +106,7 @@ impl Ota { reboot(false, None) .await - .wrap_err_with(|| format!("failed to reboot into",))?; + .wrap_err("failed to reboot after wiping overlays")?; info!("Reboot command sent to Orb device"); diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index f75859fcf..345093c63 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -5,21 +5,6 @@ use color_eyre::{ use orb_hil::SshWrapper; use serde_json::Value; -/// Reboot the Orb device using orb-mcu-util and shutdown -pub async fn reboot_orb(session: &SshWrapper) -> Result<()> { - session - .execute_command("TERM=dumb orb-mcu-util reboot orb") - .await - .wrap_err("Failed to execute orb-mcu-util reboot orb")?; - - session - .execute_command("TERM=dumb sudo shutdown now") - .await - .wrap_err("Failed to execute shutdown now")?; - - Ok(()) -} - /// Wipe overlays on the device (Diamond platform specific) pub async fn wipe_overlays(session: &SshWrapper) -> Result<()> { let result = session From 8a42a30c5fb1b0aecb7c0e640db8df9bcd201494 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Fri, 19 Dec 2025 15:31:07 +0100 Subject: [PATCH 06/47] fix(ota-hil): Fix target version in update_version_json --- hil/src/commands/ota/system.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 345093c63..b50e4e90a 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -95,7 +95,7 @@ fn update_versions_json_content( let mut versions_data: Value = serde_json::from_str(json_content).wrap_err("Failed to parse versions.json")?; - let version_with_prefix = format!("to-{target_version}"); + let version_with_prefix = format!("{target_version}"); let releases = versions_data.get_mut("releases").ok_or_else(|| { color_eyre::eyre::eyre!("releases field not found in versions.json") })?; From b50d887cbda145edcd259049bed9abb5dba91643 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Fri, 19 Dec 2025 16:09:18 +0100 Subject: [PATCH 07/47] fix(ota-hil): Fix target version Clippy --- hil/src/commands/ota/system.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index b50e4e90a..88c151756 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -95,7 +95,6 @@ fn update_versions_json_content( let mut versions_data: Value = serde_json::from_str(json_content).wrap_err("Failed to parse versions.json")?; - let version_with_prefix = format!("{target_version}"); let releases = versions_data.get_mut("releases").ok_or_else(|| { color_eyre::eyre::eyre!("releases field not found in versions.json") })?; @@ -104,7 +103,7 @@ fn update_versions_json_content( color_eyre::eyre::eyre!("releases field is not an object in versions.json") })?; - releases_obj.insert(current_slot.to_string(), Value::String(version_with_prefix)); + releases_obj.insert(current_slot.to_string(), Value::String(target_version.to_string())); serde_json::to_string_pretty(&versions_data) .wrap_err("Failed to serialize updated versions.json") From e27f1cc862f88d870afd3265b8059f200d875e0b Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Fri, 19 Dec 2025 16:12:22 +0100 Subject: [PATCH 08/47] fix(ota-hil): Fix Format --- hil/src/commands/ota/system.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 88c151756..a3fcc88d2 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -103,7 +103,10 @@ fn update_versions_json_content( color_eyre::eyre::eyre!("releases field is not an object in versions.json") })?; - releases_obj.insert(current_slot.to_string(), Value::String(target_version.to_string())); + releases_obj.insert( + current_slot.to_string(), + Value::String(target_version.to_string()), + ); serde_json::to_string_pretty(&versions_data) .wrap_err("Failed to serialize updated versions.json") From 19fb770c2ad6ea48b6aa64aea252a8d06f16af88 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 23 Dec 2025 14:14:02 +0100 Subject: [PATCH 09/47] feat(ota-hil): Set Non Recovery Mode after Reboot from the MCU --- hil/src/commands/mod.rs | 4 + hil/src/commands/ota/reboot.rs | 25 +++++ hil/src/commands/set_recovery_pin.rs | 154 +++++++++++++++++++++++++++ hil/src/main.rs | 4 + 4 files changed, 187 insertions(+) create mode 100644 hil/src/commands/set_recovery_pin.rs diff --git a/hil/src/commands/mod.rs b/hil/src/commands/mod.rs index b0d6066d2..1fe7bfd0a 100644 --- a/hil/src/commands/mod.rs +++ b/hil/src/commands/mod.rs @@ -9,6 +9,8 @@ mod mcu; mod nfsboot; mod ota; mod reboot; +mod reboot_with_recovery_state; +mod set_recovery_pin; pub use self::button_ctrl::ButtonCtrl; pub use self::cmd::Cmd; @@ -19,3 +21,5 @@ pub use self::mcu::Mcu; pub use self::nfsboot::Nfsboot; pub use self::ota::Ota; pub use self::reboot::Reboot; +pub use self::reboot_with_recovery_state::RebootWithRecoveryState; +pub use self::set_recovery_pin::SetRecoveryPin; diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index d4dd631f6..2993a65e6 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -1,3 +1,5 @@ +use crate::commands::SetRecoveryPin; +use crate::ftdi::OutputState; use crate::serial::{spawn_serial_reader_task, LOGIN_PROMPT_PATTERN}; use color_eyre::{ eyre::{bail, WrapErr}, @@ -18,8 +20,31 @@ impl Ota { pub(super) async fn handle_reboot(&self, log_suffix: &str) -> Result { info!("Waiting for reboot and device to come back online"); + // Set recovery pin HIGH for 5 seconds to prevent entering recovery mode + info!("Setting recovery pin HIGH to prevent recovery mode during reboot"); + let set_recovery = SetRecoveryPin { + state: OutputState::High, + serial_num: None, + desc: None, + hold: false, + duration: Some(5), + }; + + // Run recovery pin setting in background task + let recovery_task = tokio::spawn(async move { + set_recovery + .run() + .await + .wrap_err("failed to set recovery pin") + }); + self.capture_boot_logs(log_suffix).await?; + // Wait for recovery pin task to complete + recovery_task + .await + .wrap_err("recovery pin task panicked")??; + let start_time = Instant::now(); let timeout = Duration::from_secs(900); // 15 minutes let mut attempt_count = 0; diff --git a/hil/src/commands/set_recovery_pin.rs b/hil/src/commands/set_recovery_pin.rs new file mode 100644 index 000000000..059053993 --- /dev/null +++ b/hil/src/commands/set_recovery_pin.rs @@ -0,0 +1,154 @@ +use clap::Parser; +use color_eyre::{eyre::WrapErr as _, Result}; +use std::time::Duration; + +use crate::ftdi::{FtdiGpio, FtdiId, OutputState}; + +/// Set the recovery pin to a specific state without triggering the button +/// +/// This is useful for ensuring the recovery pin has a known state before +/// OS-initiated reboots, preventing the device from entering recovery mode +/// unintentionally. +#[derive(Debug, Parser)] +pub struct SetRecoveryPin { + /// Set the recovery pin state (high = normal boot, low = recovery mode) + #[arg(value_parser = parse_pin_state)] + pub state: OutputState, + /// The serial number of the FTDI device to use + #[arg(long, conflicts_with = "desc")] + pub serial_num: Option, + /// The description of the FTDI device to use + #[arg(long, conflicts_with = "serial_num")] + pub desc: Option, + /// Keep the FTDI connection open to hold the pin state indefinitely + /// (use Ctrl+C to release) + #[arg(long, conflicts_with = "duration")] + pub hold: bool, + /// Hold the pin state for a specific duration in seconds + /// (e.g., --duration 5 holds for 5 seconds) + #[arg(long)] + pub duration: Option, +} + +fn parse_pin_state(s: &str) -> Result { + match s.to_lowercase().as_str() { + "high" | "1" | "normal" => Ok(OutputState::High), + "low" | "0" | "recovery" => Ok(OutputState::Low), + _ => Err(color_eyre::eyre::eyre!( + "invalid state '{}', use 'high' or 'low'", + s + )), + } +} + +impl SetRecoveryPin { + pub async fn run(self) -> Result<()> { + let device = match (self.serial_num, self.desc) { + (Some(serial), None) => Some(FtdiId::SerialNumber(serial)), + (None, Some(desc)) => Some(FtdiId::Description(desc)), + (None, None) => None, + (Some(_), Some(_)) => unreachable!(), + }; + + let state_name = match self.state { + OutputState::High => "HIGH (normal boot mode)", + OutputState::Low => "LOW (recovery mode)", + }; + + if self.hold || self.duration.is_some() { + let hold_duration = self.duration.map(Duration::from_secs); + + if let Some(dur) = hold_duration { + tracing::info!( + "Setting recovery pin to {} and holding for {} seconds...", + state_name, + dur.as_secs() + ); + } else { + tracing::info!( + "Setting recovery pin to {} and holding indefinitely...", + state_name + ); + tracing::info!("Press Ctrl+C to release the pin"); + } + + // Create a channel to signal when to release + let (shutdown_tx, shutdown_rx) = std::sync::mpsc::channel::<()>(); + + // Spawn a blocking task that holds the FTDI connection + let state = self.state; + let hold_task = std::thread::spawn(move || -> Result<()> { + let mut ftdi = Self::make_ftdi(device)?; + ftdi.set_pin(crate::boot::RECOVERY_PIN, state)?; + + tracing::info!("✓ Pin state set and holding (FTDI connection open)"); + + // Block until shutdown signal or timeout + if let Some(duration) = hold_duration { + let _ = shutdown_rx.recv_timeout(duration); + } else { + let _ = shutdown_rx.recv(); + } + + tracing::info!("FTDI connection closing, pin will float"); + Ok(()) + }); + + // If holding indefinitely, wait for Ctrl+C + // If duration specified, wait for either Ctrl+C or timeout + if hold_duration.is_some() { + // Wait for thread to finish (will timeout after duration) + hold_task + .join() + .map_err(|_| color_eyre::eyre::eyre!("hold task panicked"))??; + tracing::info!("Duration elapsed, recovery pin released"); + } else { + // Wait for Ctrl+C + tokio::signal::ctrl_c() + .await + .wrap_err("failed to wait for ctrl+c")?; + + tracing::info!("Ctrl+C received, releasing recovery pin..."); + + // Signal shutdown (dropping sender will close channel) + drop(shutdown_tx); + + // Wait for the thread to finish + hold_task + .join() + .map_err(|_| color_eyre::eyre::eyre!("hold task panicked"))??; + } + } else { + tracing::info!("Setting recovery pin to {}", state_name); + + tokio::task::spawn_blocking(move || -> Result<()> { + let mut ftdi = Self::make_ftdi(device)?; + ftdi.set_pin(crate::boot::RECOVERY_PIN, self.state)?; + + // Note: Pin will float after FTDI is destroyed + tracing::warn!( + "Pin state set, but will float after command exits. \ + Use --hold to maintain state, or add a hardware pull-up resistor." + ); + + Ok(()) + }) + .await + .wrap_err("task panicked")??; + } + + Ok(()) + } + + fn make_ftdi(device: Option) -> Result { + let builder = FtdiGpio::builder(); + let builder = match &device { + Some(FtdiId::Description(desc)) => builder.with_description(desc), + Some(FtdiId::SerialNumber(serial)) => builder.with_serial_number(serial), + None => builder.with_default_device(), + }; + builder + .and_then(|b| b.configure()) + .wrap_err("failed to create ftdi device") + } +} diff --git a/hil/src/main.rs b/hil/src/main.rs index ade5e9fe7..1e8c520f0 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -35,6 +35,8 @@ enum Commands { Nfsboot(crate::commands::Nfsboot), Ota(crate::commands::Ota), Reboot(crate::commands::Reboot), + RebootWithRecoveryState(crate::commands::RebootWithRecoveryState), + SetRecoveryPin(crate::commands::SetRecoveryPin), } fn current_dir() -> Utf8PathBuf { @@ -75,6 +77,8 @@ async fn main() -> Result<()> { Commands::Nfsboot(c) => c.run().await, Commands::Ota(c) => c.run().await, Commands::Reboot(c) => c.run().await, + Commands::RebootWithRecoveryState(c) => c.run().await, + Commands::SetRecoveryPin(c) => c.run().await, } }; tokio::select! { From 7e24ebf3770c88bc5285c68062576c5cb0b7c3a3 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 23 Dec 2025 14:19:47 +0100 Subject: [PATCH 10/47] fix(ota-hil): Remove unused command --- hil/src/commands/mod.rs | 4 ---- hil/src/main.rs | 2 -- 2 files changed, 6 deletions(-) diff --git a/hil/src/commands/mod.rs b/hil/src/commands/mod.rs index 1fe7bfd0a..7ae1c2298 100644 --- a/hil/src/commands/mod.rs +++ b/hil/src/commands/mod.rs @@ -1,5 +1,3 @@ -//! The various top-level commands of the cli. - mod button_ctrl; mod cmd; mod fetch_persistent; @@ -9,7 +7,6 @@ mod mcu; mod nfsboot; mod ota; mod reboot; -mod reboot_with_recovery_state; mod set_recovery_pin; pub use self::button_ctrl::ButtonCtrl; @@ -21,5 +18,4 @@ pub use self::mcu::Mcu; pub use self::nfsboot::Nfsboot; pub use self::ota::Ota; pub use self::reboot::Reboot; -pub use self::reboot_with_recovery_state::RebootWithRecoveryState; pub use self::set_recovery_pin::SetRecoveryPin; diff --git a/hil/src/main.rs b/hil/src/main.rs index 1e8c520f0..3b77dae0e 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -35,7 +35,6 @@ enum Commands { Nfsboot(crate::commands::Nfsboot), Ota(crate::commands::Ota), Reboot(crate::commands::Reboot), - RebootWithRecoveryState(crate::commands::RebootWithRecoveryState), SetRecoveryPin(crate::commands::SetRecoveryPin), } @@ -77,7 +76,6 @@ async fn main() -> Result<()> { Commands::Nfsboot(c) => c.run().await, Commands::Ota(c) => c.run().await, Commands::Reboot(c) => c.run().await, - Commands::RebootWithRecoveryState(c) => c.run().await, Commands::SetRecoveryPin(c) => c.run().await, } }; From 7a7c7c5d848b4897be65e4fe3b7c12062b3ffe45 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 23 Dec 2025 14:46:19 +0100 Subject: [PATCH 11/47] fix(ota-hil): Set BTN Pin to High to not trigger button reboot --- hil/src/commands/set_recovery_pin.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/hil/src/commands/set_recovery_pin.rs b/hil/src/commands/set_recovery_pin.rs index 059053993..3451f2157 100644 --- a/hil/src/commands/set_recovery_pin.rs +++ b/hil/src/commands/set_recovery_pin.rs @@ -2,6 +2,7 @@ use clap::Parser; use color_eyre::{eyre::WrapErr as _, Result}; use std::time::Duration; +use crate::boot::{BUTTON_PIN, RECOVERY_PIN}; use crate::ftdi::{FtdiGpio, FtdiId, OutputState}; /// Set the recovery pin to a specific state without triggering the button @@ -79,7 +80,13 @@ impl SetRecoveryPin { let state = self.state; let hold_task = std::thread::spawn(move || -> Result<()> { let mut ftdi = Self::make_ftdi(device)?; - ftdi.set_pin(crate::boot::RECOVERY_PIN, state)?; + + // IMPORTANT: Set button pin HIGH first to prevent power down + // When FTDI enters bitbang mode, all pins default to LOW + ftdi.set_pin(BUTTON_PIN, OutputState::High)?; + + // Now set recovery pin to desired state + ftdi.set_pin(RECOVERY_PIN, state)?; tracing::info!("✓ Pin state set and holding (FTDI connection open)"); @@ -123,12 +130,18 @@ impl SetRecoveryPin { tokio::task::spawn_blocking(move || -> Result<()> { let mut ftdi = Self::make_ftdi(device)?; - ftdi.set_pin(crate::boot::RECOVERY_PIN, self.state)?; + + // IMPORTANT: Set button pin HIGH first to prevent power down + // When FTDI enters bitbang mode, all pins default to LOW + ftdi.set_pin(BUTTON_PIN, OutputState::High)?; + + // Now set recovery pin to desired state + ftdi.set_pin(RECOVERY_PIN, self.state)?; // Note: Pin will float after FTDI is destroyed tracing::warn!( "Pin state set, but will float after command exits. \ - Use --hold to maintain state, or add a hardware pull-up resistor." + Use --hold or --duration to maintain state, or add a hardware pull-up resistor." ); Ok(()) From c17337ee6bd3a307b720026e79cc1b7a9240fc9e Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 23 Dec 2025 14:56:36 +0100 Subject: [PATCH 12/47] fix(set_recovery_pin): Simplify Code Logic --- hil/src/commands/ota/reboot.rs | 3 +- hil/src/commands/set_recovery_pin.rs | 119 ++++++--------------------- 2 files changed, 27 insertions(+), 95 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 2993a65e6..feb2e4a7d 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -26,8 +26,7 @@ impl Ota { state: OutputState::High, serial_num: None, desc: None, - hold: false, - duration: Some(5), + duration: 5, }; // Run recovery pin setting in background task diff --git a/hil/src/commands/set_recovery_pin.rs b/hil/src/commands/set_recovery_pin.rs index 3451f2157..8642d7718 100644 --- a/hil/src/commands/set_recovery_pin.rs +++ b/hil/src/commands/set_recovery_pin.rs @@ -21,14 +21,11 @@ pub struct SetRecoveryPin { /// The description of the FTDI device to use #[arg(long, conflicts_with = "serial_num")] pub desc: Option, - /// Keep the FTDI connection open to hold the pin state indefinitely - /// (use Ctrl+C to release) - #[arg(long, conflicts_with = "duration")] - pub hold: bool, /// Hold the pin state for a specific duration in seconds - /// (e.g., --duration 5 holds for 5 seconds) - #[arg(long)] - pub duration: Option, + /// (e.g., --duration 10 holds for 10 seconds, then exits) + /// Default is 5 seconds + #[arg(long, default_value = "5")] + pub duration: u64, } fn parse_pin_state(s: &str) -> Result { @@ -56,99 +53,35 @@ impl SetRecoveryPin { OutputState::Low => "LOW (recovery mode)", }; - if self.hold || self.duration.is_some() { - let hold_duration = self.duration.map(Duration::from_secs); + tracing::info!( + "Setting recovery pin to {} and holding for {} seconds...", + state_name, + self.duration + ); - if let Some(dur) = hold_duration { - tracing::info!( - "Setting recovery pin to {} and holding for {} seconds...", - state_name, - dur.as_secs() - ); - } else { - tracing::info!( - "Setting recovery pin to {} and holding indefinitely...", - state_name - ); - tracing::info!("Press Ctrl+C to release the pin"); - } + let hold_duration = Duration::from_secs(self.duration); + let state = self.state; - // Create a channel to signal when to release - let (shutdown_tx, shutdown_rx) = std::sync::mpsc::channel::<()>(); + tokio::task::spawn_blocking(move || -> Result<()> { + let mut ftdi = Self::make_ftdi(device)?; - // Spawn a blocking task that holds the FTDI connection - let state = self.state; - let hold_task = std::thread::spawn(move || -> Result<()> { - let mut ftdi = Self::make_ftdi(device)?; + // IMPORTANT: Set button pin HIGH first to prevent power down + // When FTDI enters bitbang mode, all pins default to LOW + ftdi.set_pin(BUTTON_PIN, OutputState::High)?; - // IMPORTANT: Set button pin HIGH first to prevent power down - // When FTDI enters bitbang mode, all pins default to LOW - ftdi.set_pin(BUTTON_PIN, OutputState::High)?; + // Now set recovery pin to desired state + ftdi.set_pin(RECOVERY_PIN, state)?; - // Now set recovery pin to desired state - ftdi.set_pin(RECOVERY_PIN, state)?; + tracing::info!("✓ Pin state set and holding (FTDI connection open)"); - tracing::info!("✓ Pin state set and holding (FTDI connection open)"); + // Hold for specified duration + std::thread::sleep(hold_duration); - // Block until shutdown signal or timeout - if let Some(duration) = hold_duration { - let _ = shutdown_rx.recv_timeout(duration); - } else { - let _ = shutdown_rx.recv(); - } - - tracing::info!("FTDI connection closing, pin will float"); - Ok(()) - }); - - // If holding indefinitely, wait for Ctrl+C - // If duration specified, wait for either Ctrl+C or timeout - if hold_duration.is_some() { - // Wait for thread to finish (will timeout after duration) - hold_task - .join() - .map_err(|_| color_eyre::eyre::eyre!("hold task panicked"))??; - tracing::info!("Duration elapsed, recovery pin released"); - } else { - // Wait for Ctrl+C - tokio::signal::ctrl_c() - .await - .wrap_err("failed to wait for ctrl+c")?; - - tracing::info!("Ctrl+C received, releasing recovery pin..."); - - // Signal shutdown (dropping sender will close channel) - drop(shutdown_tx); - - // Wait for the thread to finish - hold_task - .join() - .map_err(|_| color_eyre::eyre::eyre!("hold task panicked"))??; - } - } else { - tracing::info!("Setting recovery pin to {}", state_name); - - tokio::task::spawn_blocking(move || -> Result<()> { - let mut ftdi = Self::make_ftdi(device)?; - - // IMPORTANT: Set button pin HIGH first to prevent power down - // When FTDI enters bitbang mode, all pins default to LOW - ftdi.set_pin(BUTTON_PIN, OutputState::High)?; - - // Now set recovery pin to desired state - ftdi.set_pin(RECOVERY_PIN, self.state)?; - - // Note: Pin will float after FTDI is destroyed - tracing::warn!( - "Pin state set, but will float after command exits. \ - Use --hold or --duration to maintain state, or add a hardware pull-up resistor." - ); - - Ok(()) - }) - .await - .wrap_err("task panicked")??; - } + tracing::info!("Duration elapsed, FTDI connection closing"); + Ok(()) + }) + .await + .wrap_err("task panicked")??; Ok(()) } From 3687bf4fef509cb46df706627b88f82f9df033ae Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 23 Dec 2025 19:50:26 +0100 Subject: [PATCH 13/47] fix(ota-hil): Restore MCU Reboot Command --- hil/src/commands/ota/mod.rs | 6 +----- hil/src/commands/ota/system.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index bbd054fbe..deb1de3bd 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -1,7 +1,6 @@ use std::path::PathBuf; use std::time::Instant; -use crate::boot::reboot; use clap::Parser; use color_eyre::{ eyre::{bail, WrapErr}, @@ -108,10 +107,7 @@ impl Ota { })?; info!("Overlays wiped successfully, rebooting device"); - reboot(false, None) - .await - .wrap_err("failed to reboot after wiping overlays")?; - + system::reboot_orb(&session).await?; info!("Reboot command sent to Orb device"); let new_session = diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index a3fcc88d2..5c7807f3d 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -112,6 +112,21 @@ fn update_versions_json_content( .wrap_err("Failed to serialize updated versions.json") } +/// Reboot the Orb device using orb-mcu-util and shutdown +pub async fn reboot_orb(session: &SshWrapper) -> Result<()> { + session + .execute_command("TERM=dumb orb-mcu-util reboot orb") + .await + .wrap_err("Failed to execute orb-mcu-util reboot orb")?; + + session + .execute_command("TERM=dumb sudo shutdown now") + .await + .wrap_err("Failed to execute shutdown now")?; + + Ok(()) +} + /// Restart the update agent service and return the start timestamp pub async fn restart_update_agent(session: &SshWrapper) -> Result { // Get current timestamp (ON THE ORB!) before restarting service From 33497fcc057b09616557edbb92ab9ff936deb8e2 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 24 Dec 2025 01:00:02 +0100 Subject: [PATCH 14/47] fix: format --- hil/src/commands/ota/system.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 5c7807f3d..b21efecb9 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -5,6 +5,21 @@ use color_eyre::{ use orb_hil::SshWrapper; use serde_json::Value; +/// Reboot the Orb device using orb-mcu-util and shutdown +pub async fn reboot_orb(session: &SshWrapper) -> Result<()> { + session + .execute_command("TERM=dumb orb-mcu-util reboot orb") + .await + .wrap_err("Failed to execute orb-mcu-util reboot orb")?; + + session + .execute_command("TERM=dumb sudo shutdown now") + .await + .wrap_err("Failed to execute shutdown now")?; + + Ok(()) +} + /// Wipe overlays on the device (Diamond platform specific) pub async fn wipe_overlays(session: &SshWrapper) -> Result<()> { let result = session @@ -112,21 +127,6 @@ fn update_versions_json_content( .wrap_err("Failed to serialize updated versions.json") } -/// Reboot the Orb device using orb-mcu-util and shutdown -pub async fn reboot_orb(session: &SshWrapper) -> Result<()> { - session - .execute_command("TERM=dumb orb-mcu-util reboot orb") - .await - .wrap_err("Failed to execute orb-mcu-util reboot orb")?; - - session - .execute_command("TERM=dumb sudo shutdown now") - .await - .wrap_err("Failed to execute shutdown now")?; - - Ok(()) -} - /// Restart the update agent service and return the start timestamp pub async fn restart_update_agent(session: &SshWrapper) -> Result { // Get current timestamp (ON THE ORB!) before restarting service From 484fa1b5f2eb51806edce012745532803796c80a Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 30 Dec 2025 21:03:08 +0100 Subject: [PATCH 15/47] feat: Wipe Overlays for Both Diamond and Pearl --- hil/src/commands/ota/mod.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index deb1de3bd..36308d89a 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -100,8 +100,8 @@ impl Ota { })?; let (session, wipe_overlays_status) = match self.platform { - Platform::Diamond => { - info!("Diamond platform detected - wiping overlays before update"); + Platform::Diamond | Platform::Pearl => { + info!("Wiping overlays before update"); system::wipe_overlays(&session).await.inspect_err(|e| { error!("Failed to wipe overlays: {}", e); })?; @@ -119,10 +119,6 @@ impl Ota { })?; (new_session, "succeeded".to_string()) } - Platform::Pearl => { - info!("Pearl platform detected - no special pre-update steps required"); - (session, "not_applicable".to_string()) - } }; let current_slot = From 0ef98f2ae26920b250fa7e470d618661bdaac455 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 31 Dec 2025 15:31:17 +0100 Subject: [PATCH 16/47] feat: Added Logs when Update-agent Fails --- hil/src/commands/ota/monitor.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/hil/src/commands/ota/monitor.rs b/hil/src/commands/ota/monitor.rs index de885586c..5ee79fd09 100644 --- a/hil/src/commands/ota/monitor.rs +++ b/hil/src/commands/ota/monitor.rs @@ -39,7 +39,29 @@ pub async fn monitor_update_progress( while start_time.elapsed() < timeout { match check_service_failed(session).await { Ok(true) => { - bail!("Update agent service failed - update installation failed"); + // Service failed - fetch remaining logs to show the actual error + if let Ok((error_lines, _)) = + fetch_new_log_lines(session, cursor.as_deref(), start_timestamp).await + { + for line in &error_lines { + println!("{}", line.trim()); + } + all_lines.extend(error_lines); + } + + // Also fetch the service status for more details + let status_result = session + .execute_command( + "TERM=dumb sudo systemctl status worldcoin-update-agent.service --no-pager -l", + ) + .await; + + if let Ok(result) = status_result { + println!("\n=== Service Status ==="); + println!("{}", result.stdout); + } + + bail!("Update agent service failed - update installation failed. Check logs above for details."); } Ok(false) => { // Service is not failed, continue monitoring From 8781765f87f5c3874714f4dd5e212b2dd3e9206f Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 31 Dec 2025 17:59:46 +0100 Subject: [PATCH 17/47] fix: Restart Update agent after NTP sync --- hil/src/commands/ota/mod.rs | 7 ++++++ hil/src/commands/ota/system.rs | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 36308d89a..abd2adfde 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -143,6 +143,13 @@ impl Ota { })?; info!("versions.json updated successfully"); + info!("Waiting for system time synchronization"); + system::wait_for_time_sync(&session).await.inspect_err(|e| { + println!("OTA_RESULT=FAILED"); + println!("OTA_ERROR=TIME_SYNC_FAILED: {e}"); + })?; + info!("System time synchronized"); + info!("Restarting worldcoin-update-agent.service"); let start_timestamp = system::restart_update_agent(&session) .await diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index b21efecb9..277b00dcd 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -127,6 +127,49 @@ fn update_versions_json_content( .wrap_err("Failed to serialize updated versions.json") } +/// Wait for system time to be synchronized via NTP/chrony +pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { + use std::time::Duration; + use tracing::info; + + const MAX_ATTEMPTS: u32 = 60; // 60 attempts = 2 minutes max wait + const SLEEP_DURATION: Duration = Duration::from_secs(2); + + info!("Waiting for system time synchronization..."); + let sync_start = std::time::Instant::now(); + + for attempt in 1..=MAX_ATTEMPTS { + let result = session + .execute_command("TERM=dumb timedatectl status") + .await + .wrap_err("Failed to check time synchronization status")?; + + if result.is_success() { + // Check if "System clock synchronized: yes" appears in output + if result.stdout.contains("System clock synchronized: yes") + || result.stdout.contains("synchronized: yes") + { + let sync_duration = sync_start.elapsed(); + info!("System time synchronized successfully after {:?}", sync_duration); + return Ok(()); + } + } + + if attempt < MAX_ATTEMPTS { + info!( + "Time not yet synchronized (attempt {}/{}), waiting...", + attempt, MAX_ATTEMPTS + ); + tokio::time::sleep(SLEEP_DURATION).await; + } + } + + bail!( + "Timeout waiting for system time synchronization after {} seconds", + MAX_ATTEMPTS * 2 + ); +} + /// Restart the update agent service and return the start timestamp pub async fn restart_update_agent(session: &SshWrapper) -> Result { // Get current timestamp (ON THE ORB!) before restarting service From 1b07e900f015865b9260d67c93da6f6f85f707c8 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 31 Dec 2025 19:14:33 +0100 Subject: [PATCH 18/47] fix: format --- hil/src/commands/ota/mod.rs | 10 ++++++---- hil/src/commands/ota/monitor.rs | 3 ++- hil/src/commands/ota/system.rs | 5 ++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index abd2adfde..d7e4974a5 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -144,10 +144,12 @@ impl Ota { info!("versions.json updated successfully"); info!("Waiting for system time synchronization"); - system::wait_for_time_sync(&session).await.inspect_err(|e| { - println!("OTA_RESULT=FAILED"); - println!("OTA_ERROR=TIME_SYNC_FAILED: {e}"); - })?; + system::wait_for_time_sync(&session) + .await + .inspect_err(|e| { + println!("OTA_RESULT=FAILED"); + println!("OTA_ERROR=TIME_SYNC_FAILED: {e}"); + })?; info!("System time synchronized"); info!("Restarting worldcoin-update-agent.service"); diff --git a/hil/src/commands/ota/monitor.rs b/hil/src/commands/ota/monitor.rs index 5ee79fd09..86dff66a7 100644 --- a/hil/src/commands/ota/monitor.rs +++ b/hil/src/commands/ota/monitor.rs @@ -41,7 +41,8 @@ pub async fn monitor_update_progress( Ok(true) => { // Service failed - fetch remaining logs to show the actual error if let Ok((error_lines, _)) = - fetch_new_log_lines(session, cursor.as_deref(), start_timestamp).await + fetch_new_log_lines(session, cursor.as_deref(), start_timestamp) + .await { for line in &error_lines { println!("{}", line.trim()); diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 277b00dcd..590f549df 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -150,7 +150,10 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { || result.stdout.contains("synchronized: yes") { let sync_duration = sync_start.elapsed(); - info!("System time synchronized successfully after {:?}", sync_duration); + info!( + "System time synchronized successfully after {:?}", + sync_duration + ); return Ok(()); } } From 0c15f915fc5bca856ad055634367fd0aa47b0909 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Sat, 3 Jan 2026 11:52:15 +0100 Subject: [PATCH 19/47] feat: Caputer Serial Logs during Boot --- hil/src/commands/ota/mod.rs | 41 +++++++++++++++++++++++++++++++++- hil/src/commands/ota/reboot.rs | 5 +++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index d7e4974a5..1a69b740c 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -72,7 +72,7 @@ pub struct Ota { } #[derive(Debug, Clone, clap::ValueEnum)] -enum Platform { +pub(super) enum Platform { Diamond, Pearl, } @@ -271,10 +271,49 @@ impl Ota { println!("OTA_SLOT_FINAL={}", current_slot); println!("OTA_WIPE_OVERLAYS_FINAL={}", wipe_overlays_status); + // Print all result files for easy collection/upload + self.print_result_files(); + info!("OTA update completed successfully!"); Ok(()) } + fn print_result_files(&self) { + let platform_name = format!("{:?}", self.platform).to_lowercase(); + let log_dir = self + .log_file + .parent() + .unwrap_or_else(|| std::path::Path::new(".")); + + println!("\n========================================"); + println!("OTA TEST RESULT FILES"); + println!("========================================"); + + let result_files = vec![ + self.log_file.clone(), + log_dir.join(format!("boot_log_{}_wipe_overlays.txt", platform_name)), + log_dir.join(format!("boot_log_{}_update.txt", platform_name)), + ]; + + println!("The following files contain OTA test results:"); + for file in &result_files { + if file.exists() { + println!(" ✓ {}", file.display()); + } else { + println!(" ✗ {} (not found)", file.display()); + } + } + + println!("\nTo upload all files:"); + println!(" # List of files:"); + for file in &result_files { + if file.exists() { + println!(" {}", file.display()); + } + } + println!("========================================\n"); + } + async fn connect_ssh(&self) -> Result { info!( "Connecting to Orb device at {}:{}", diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index feb2e4a7d..d91d86986 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -105,13 +105,14 @@ impl Ota { #[instrument(skip_all)] async fn capture_boot_logs(&self, log_suffix: &str) -> Result<()> { - info!("Starting boot log capture for {})", log_suffix); + let platform_name = format!("{:?}", self.platform).to_lowercase(); + info!("Starting boot log capture for {} ({})", log_suffix, platform_name); let boot_log_path = self .log_file .parent() .unwrap_or_else(|| std::path::Path::new(".")) - .join(format!("boot_log_{log_suffix}.txt")); + .join(format!("boot_log_{platform_name}_{log_suffix}.txt")); let serial_path = match self.get_serial_path() { Ok(path) => path, From ceae6e6ca9403965a4b8a4f2837f864b7691fd21 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Sat, 3 Jan 2026 11:56:51 +0100 Subject: [PATCH 20/47] fix: format --- hil/src/commands/ota/reboot.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index d91d86986..2560e4ca1 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -106,7 +106,10 @@ impl Ota { #[instrument(skip_all)] async fn capture_boot_logs(&self, log_suffix: &str) -> Result<()> { let platform_name = format!("{:?}", self.platform).to_lowercase(); - info!("Starting boot log capture for {} ({})", log_suffix, platform_name); + info!( + "Starting boot log capture for {} ({})", + log_suffix, platform_name + ); let boot_log_path = self .log_file From 4f8b4b2af33856873db4c4fb26b6f77a57522d2f Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Sat, 3 Jan 2026 12:00:09 +0100 Subject: [PATCH 21/47] fix: remove unessary pub vis --- hil/src/commands/ota/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 1a69b740c..084ef1fe8 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -72,7 +72,7 @@ pub struct Ota { } #[derive(Debug, Clone, clap::ValueEnum)] -pub(super) enum Platform { +enum Platform { Diamond, Pearl, } From 5cc74ab3f83c24860fcaf9132403ad43fc1546ba Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 00:07:57 +0100 Subject: [PATCH 22/47] fix: set recovery mode before capture logs --- hil/src/commands/ota/reboot.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 2560e4ca1..f6423d638 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -37,13 +37,13 @@ impl Ota { .wrap_err("failed to set recovery pin") }); - self.capture_boot_logs(log_suffix).await?; - - // Wait for recovery pin task to complete + // Wait for recovery pin task to complete before capturing boot logs recovery_task .await .wrap_err("recovery pin task panicked")??; + self.capture_boot_logs(log_suffix).await?; + let start_time = Instant::now(); let timeout = Duration::from_secs(900); // 15 minutes let mut attempt_count = 0; From 83a80fd3d664764ecef394565736b5b3111945a4 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 17:43:45 +0100 Subject: [PATCH 23/47] fix: Permission issue of not capturing serial logs --- hil/src/commands/ota/reboot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index f6423d638..90d80561e 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -37,7 +37,7 @@ impl Ota { .wrap_err("failed to set recovery pin") }); - // Wait for recovery pin task to complete before capturing boot logs + // Wait for recovery pin task to complete recovery_task .await .wrap_err("recovery pin task panicked")??; From c8df636e8070dab3e01d170d17c726204714338b Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 22:07:42 +0100 Subject: [PATCH 24/47] fix: reboot command not setting every time orb to recovery mode --- hil/src/boot.rs | 9 +++-- hil/src/commands/ota/reboot.rs | 63 +++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 9116fd933..2e6a4756b 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -37,7 +37,12 @@ pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { info!("Turning off"); let device_clone = device.cloned(); - let ftdi = tokio::task::spawn_blocking(|| -> Result<_, color_eyre::Report> { + let recovery_state = if recovery { + OutputState::Low + } else { + OutputState::High + }; + let ftdi = tokio::task::spawn_blocking(move || -> Result<_, color_eyre::Report> { for d in FtdiGpio::list_devices().wrap_err("failed to list ftdi devices")? { debug!( "ftdi device: desc:{}, serial:{}, vid:{}, pid:{}", @@ -46,7 +51,7 @@ pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { } let mut ftdi = make_ftdi(device_clone)?; ftdi.set_pin(BUTTON_PIN, OutputState::Low)?; - ftdi.set_pin(RECOVERY_PIN, OutputState::High)?; + ftdi.set_pin(RECOVERY_PIN, recovery_state)?; Ok(ftdi) }) .await diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 90d80561e..18add8a12 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -151,7 +151,28 @@ impl Ota { spawn_serial_reader_task(serial_reader, serial_output_tx); let boot_log_fut = async { - let mut boot_log_content = Vec::new(); + use tokio::io::AsyncWriteExt; + + // Open file for writing incrementally + let mut log_file = match tokio::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(&boot_log_path) + .await + { + Ok(f) => Some(f), + Err(e) => { + warn!( + "Failed to open boot log file {}: {}. Will continue without writing to disk.", + boot_log_path.display(), + e + ); + None + } + }; + + let mut total_bytes = 0; let mut serial_stream = BroadcastStream::new(serial_output_rx); // 3-minute timeout for flaky serial connections let timeout = Duration::from_secs(180); @@ -164,7 +185,17 @@ impl Ota { .await { Ok(Some(Ok(bytes))) => { - boot_log_content.extend_from_slice(&bytes); + // Write to file immediately as data arrives + if let Some(ref mut file) = log_file { + if let Err(e) = file.write_all(&bytes).await { + warn!("Failed to write to boot log file: {}. Continuing capture in memory only.", e); + log_file = None; + } else { + // Flush to ensure data is written to disk immediately + let _ = file.flush().await; + total_bytes += bytes.len(); + } + } if let Ok(text) = String::from_utf8(bytes.to_vec()) && text.contains(LOGIN_PROMPT_PATTERN) @@ -197,24 +228,16 @@ impl Ota { ); } - if !boot_log_content.is_empty() { - match tokio::fs::write(&boot_log_path, &boot_log_content).await { - Ok(_) => { - info!( - "Boot log saved to: {} ({} bytes)", - boot_log_path.display(), - boot_log_content.len() - ); - } - Err(e) => { - warn!( - "Failed to write boot log to {}: {}. Continuing anyway.", - boot_log_path.display(), - e - ); - } - } - } else { + // Final flush and close + if let Some(mut file) = log_file { + let _ = file.flush().await; + let _ = file.shutdown().await; + info!( + "Boot log saved to: {} ({} bytes)", + boot_log_path.display(), + total_bytes + ); + } else if total_bytes == 0 { warn!("No boot log content captured from serial"); } From 32b2f1fb67bf01430cebf09cd4a414a567c9e037 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 22:13:39 +0100 Subject: [PATCH 25/47] fix: added NTP sync before reboot, for attestation token -> key retrieval --- hil/src/commands/ota/mod.rs | 10 +++++++++- hil/src/commands/ota/reboot.rs | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 084ef1fe8..83bc4f7c9 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -105,7 +105,15 @@ impl Ota { system::wipe_overlays(&session).await.inspect_err(|e| { error!("Failed to wipe overlays: {}", e); })?; - info!("Overlays wiped successfully, rebooting device"); + info!("Overlays wiped successfully"); + + info!("Waiting for NTP time synchronization before reboot"); + system::wait_for_time_sync(&session) + .await + .inspect_err(|e| { + error!("Failed to sync time before reboot: {}", e); + })?; + info!("NTP time synchronized, rebooting device"); system::reboot_orb(&session).await?; info!("Reboot command sent to Orb device"); diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 18add8a12..94c55470f 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -63,7 +63,21 @@ impl Ota { Ok(session) => match session.test_connection().await { Ok(_) => { info!("Device is back online and responsive after reboot (attempt {})", attempt_count); - return Ok(session); + + info!("Waiting for NTP time synchronization after reboot"); + match super::system::wait_for_time_sync(&session).await { + Ok(_) => { + info!("NTP time synchronized successfully"); + return Ok(session); + } + Err(e) => { + debug!( + "Time sync failed on attempt {}: {}", + attempt_count, e + ); + last_error = Some(e); + } + } } Err(e) => { debug!( From 16746e9a27493508ab146aaaf77c100826540045 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 22:44:21 +0100 Subject: [PATCH 26/47] fix: set recovery mode HIGH for longer time --- hil/src/boot.rs | 2 +- hil/src/commands/ota/mod.rs | 8 ++++++++ hil/src/commands/ota/reboot.rs | 15 +++++++++++++-- hil/src/commands/ota/system.rs | 11 +++++++---- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 2e6a4756b..00b11efa1 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -79,7 +79,7 @@ pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { }) .await .wrap_err("task panicked")??; - tokio::time::sleep(Duration::from_secs(4)).await; + tokio::time::sleep(Duration::from_secs(10)).await; tokio::task::spawn_blocking(move || ftdi.destroy()) .await diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 83bc4f7c9..8fe856ba9 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -94,6 +94,14 @@ impl Ota { let _start_time = Instant::now(); info!("Starting OTA update to version: {}", self.target_version); + // Create log directory if it doesn't exist + if let Some(log_dir) = self.log_file.parent() { + tokio::fs::create_dir_all(log_dir) + .await + .wrap_err_with(|| format!("Failed to create log directory: {}", log_dir.display()))?; + info!("Log directory created/verified: {}", log_dir.display()); + } + let session = self.connect_ssh().await.inspect_err(|e| { println!("OTA_RESULT=FAILED"); println!("OTA_ERROR=SSH_CONNECTION_FAILED: {e}"); diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 94c55470f..0d01b54d1 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -20,13 +20,13 @@ impl Ota { pub(super) async fn handle_reboot(&self, log_suffix: &str) -> Result { info!("Waiting for reboot and device to come back online"); - // Set recovery pin HIGH for 5 seconds to prevent entering recovery mode + // Set recovery pin HIGH for 10 seconds to prevent entering recovery mode info!("Setting recovery pin HIGH to prevent recovery mode during reboot"); let set_recovery = SetRecoveryPin { state: OutputState::High, serial_num: None, desc: None, - duration: 5, + duration: 10, }; // Run recovery pin setting in background task @@ -131,6 +131,17 @@ impl Ota { .unwrap_or_else(|| std::path::Path::new(".")) .join(format!("boot_log_{platform_name}_{log_suffix}.txt")); + // Create parent directory if it doesn't exist + if let Some(parent) = boot_log_path.parent() { + if let Err(e) = tokio::fs::create_dir_all(parent).await { + warn!( + "Failed to create directory {}: {}. Boot log capture may fail.", + parent.display(), + e + ); + } + } + let serial_path = match self.get_serial_path() { Ok(path) => path, Err(e) => { diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 590f549df..441f8b7cc 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -139,15 +139,18 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { let sync_start = std::time::Instant::now(); for attempt in 1..=MAX_ATTEMPTS { + // Try chronyc tracking first (most common on Orb) let result = session - .execute_command("TERM=dumb timedatectl status") + .execute_command("TERM=dumb chronyc tracking") .await .wrap_err("Failed to check time synchronization status")?; if result.is_success() { - // Check if "System clock synchronized: yes" appears in output - if result.stdout.contains("System clock synchronized: yes") - || result.stdout.contains("synchronized: yes") + // Check if chrony is synchronized + // Leap status should be "Normal" when synchronized + // Reference ID should not be "0.0.0.0" (unsynchronized) + if result.stdout.contains("Leap status : Normal") + && !result.stdout.contains("Reference ID : 0.0.0.0") { let sync_duration = sync_start.elapsed(); info!( From 0299edaaf1da115966d49f4a1645599ea238adca Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 22:46:11 +0100 Subject: [PATCH 27/47] fix: format --- hil/src/commands/ota/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 8fe856ba9..7a71054d2 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -96,9 +96,9 @@ impl Ota { // Create log directory if it doesn't exist if let Some(log_dir) = self.log_file.parent() { - tokio::fs::create_dir_all(log_dir) - .await - .wrap_err_with(|| format!("Failed to create log directory: {}", log_dir.display()))?; + tokio::fs::create_dir_all(log_dir).await.wrap_err_with(|| { + format!("Failed to create log directory: {}", log_dir.display()) + })?; info!("Log directory created/verified: {}", log_dir.display()); } From d972cb226188cb74d73976210a9258f6adec5b3f Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 6 Jan 2026 22:47:30 +0100 Subject: [PATCH 28/47] fix: clippy --- hil/src/commands/ota/reboot.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 0d01b54d1..4266b418b 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -132,14 +132,14 @@ impl Ota { .join(format!("boot_log_{platform_name}_{log_suffix}.txt")); // Create parent directory if it doesn't exist - if let Some(parent) = boot_log_path.parent() { - if let Err(e) = tokio::fs::create_dir_all(parent).await { - warn!( - "Failed to create directory {}: {}. Boot log capture may fail.", - parent.display(), - e - ); - } + if let Some(parent) = boot_log_path.parent() + && let Err(e) = tokio::fs::create_dir_all(parent).await + { + warn!( + "Failed to create directory {}: {}. Boot log capture may fail.", + parent.display(), + e + ); } let serial_path = match self.get_serial_path() { From b96d50fcf559aa6ed1fa4d74947f8c2a1e37f04c Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 7 Jan 2026 11:37:05 +0100 Subject: [PATCH 29/47] fix: use either: chronyc or timedatectl (temp change for testing before chrony branches --- hil/src/commands/ota/system.rs | 83 ++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 441f8b7cc..3d91f7bfb 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -138,27 +138,72 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { info!("Waiting for system time synchronization..."); let sync_start = std::time::Instant::now(); + // Detect which time sync tool is available + let use_timedatectl = session + .execute_command("TERM=dumb command -v timedatectl") + .await + .map(|r| r.is_success()) + .unwrap_or(false); + + let use_chronyc = session + .execute_command("TERM=dumb command -v chronyc") + .await + .map(|r| r.is_success()) + .unwrap_or(false); + + if !use_timedatectl && !use_chronyc { + bail!("Neither timedatectl nor chronyc found on the system"); + } + + info!( + "Using {} for time sync check", + if use_timedatectl { + "timedatectl" + } else { + "chronyc" + } + ); + for attempt in 1..=MAX_ATTEMPTS { - // Try chronyc tracking first (most common on Orb) - let result = session - .execute_command("TERM=dumb chronyc tracking") - .await - .wrap_err("Failed to check time synchronization status")?; - - if result.is_success() { - // Check if chrony is synchronized - // Leap status should be "Normal" when synchronized - // Reference ID should not be "0.0.0.0" (unsynchronized) - if result.stdout.contains("Leap status : Normal") - && !result.stdout.contains("Reference ID : 0.0.0.0") - { - let sync_duration = sync_start.elapsed(); - info!( - "System time synchronized successfully after {:?}", - sync_duration - ); - return Ok(()); + let is_synced = if use_timedatectl { + // Try timedatectl status + let result = session + .execute_command("TERM=dumb timedatectl status") + .await + .wrap_err("Failed to check time synchronization status")?; + + if result.is_success() { + // Check if "System clock synchronized: yes" appears in output + result.stdout.contains("System clock synchronized: yes") + || result.stdout.contains("synchronized: yes") + } else { + false + } + } else { + // Try chronyc tracking + let result = session + .execute_command("TERM=dumb chronyc tracking") + .await + .wrap_err("Failed to check time synchronization status")?; + + if result.is_success() { + // Check if chrony is synchronized + // Leap status should be "Normal" when synchronized + // Reference ID should not be "0.0.0.0" (unsynchronized) + result.stdout.contains("Leap status : Normal") + && !result.stdout.contains("Reference ID : 0.0.0.0") + } else { + false } + }; + + if is_synced { + let sync_duration = sync_start.elapsed(); + info!( + "System time synchronized successfully after {:?}", + sync_duration + ); + return Ok(()); } if attempt < MAX_ATTEMPTS { From 6c1ae63ad5c23c1471b2fcf513a6f2cd3b91949d Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 7 Jan 2026 16:36:16 +0100 Subject: [PATCH 30/47] fix: add delay between recovery mode triggerring and log capturing --- hil/src/commands/ota/reboot.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 4266b418b..67a226381 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -42,6 +42,11 @@ impl Ota { .await .wrap_err("recovery pin task panicked")??; + // Brief delay to allow USB device to be re-enumerated and udev rules to apply + // after FTDI GPIO is released. The FTDI device detaches/reattaches kernel + // drivers which causes /dev/ttyUSB* to be recreated. + tokio::time::sleep(Duration::from_millis(200)).await; + self.capture_boot_logs(log_suffix).await?; let start_time = Instant::now(); From 8be233d4b352c8481bc0f317c41e29ac9b6d2bb0 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 7 Jan 2026 23:14:42 +0100 Subject: [PATCH 31/47] fix: on ota-reboot wait until ssh get disconnected to press recovery pin --- hil/src/commands/ota/reboot.rs | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 67a226381..477614402 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -20,13 +20,30 @@ impl Ota { pub(super) async fn handle_reboot(&self, log_suffix: &str) -> Result { info!("Waiting for reboot and device to come back online"); - // Set recovery pin HIGH for 10 seconds to prevent entering recovery mode - info!("Setting recovery pin HIGH to prevent recovery mode during reboot"); + // For update-initiated reboots, wait for SSH to become unreachable before + // holding the recovery pin. This ensures we time it with the actual shutdown, + // not based on assumptions about timing. + let hold_duration = if log_suffix == "update" { + info!("Monitoring SSH connection to detect when shutdown actually begins"); + self.wait_for_ssh_disconnection(Duration::from_secs(30)) + .await?; + info!("SSH disconnected - system is shutting down, holding recovery pin"); + 20 // Hold for 20s to cover systemd shutdown + power cycle + early boot + } else { + // For manual reboots (wipe_overlays), we control the timing directly + 10 + }; + + // Set recovery pin HIGH to prevent entering recovery mode + info!( + "Setting recovery pin HIGH to prevent recovery mode during reboot (hold duration: {}s)", + hold_duration + ); let set_recovery = SetRecoveryPin { state: OutputState::High, serial_num: None, desc: None, - duration: 10, + duration: hold_duration, }; // Run recovery pin setting in background task @@ -288,4 +305,46 @@ impl Ota { Ok(()) } + + /// Wait for SSH connection to become unreachable, indicating shutdown has started + #[instrument(skip_all)] + async fn wait_for_ssh_disconnection(&self, timeout: Duration) -> Result<()> { + let start = Instant::now(); + let mut attempt = 0; + + loop { + if start.elapsed() > timeout { + bail!("SSH did not disconnect within {:?}", timeout); + } + + attempt += 1; + + // Try to establish connection with a lightweight command + match self.connect_ssh().await { + Ok(session) => match session.execute_command("echo").await { + Ok(_) => { + // SSH still alive, system hasn't started shutting down yet + debug!( + "SSH still responsive (attempt {}), waiting for shutdown...", + attempt + ); + tokio::time::sleep(Duration::from_millis(500)).await; + } + Err(_) => { + // Command failed but connection succeeded - might be shutting down + info!("SSH connection degraded, shutdown likely in progress"); + return Ok(()); + } + }, + Err(_) => { + // Can't connect - shutdown has started + info!( + "SSH connection lost after {} attempts, shutdown confirmed", + attempt + ); + return Ok(()); + } + } + } + } } From c377b95b08cae438653ec74e80536b36403ca8e1 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Fri, 9 Jan 2026 16:56:47 +0100 Subject: [PATCH 32/47] fix: NTP check hanging --- hil/src/commands/ota/system.rs | 39 +++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 3d91f7bfb..3c1951595 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -165,12 +165,23 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { ); for attempt in 1..=MAX_ATTEMPTS { + // Timeout for individual command execution (10 seconds is generous for timedatectl/chronyc) + const COMMAND_TIMEOUT: Duration = Duration::from_secs(10); + let is_synced = if use_timedatectl { - // Try timedatectl status - let result = session - .execute_command("TERM=dumb timedatectl status") - .await - .wrap_err("Failed to check time synchronization status")?; + // Try timedatectl with timeout + let result = tokio::time::timeout( + COMMAND_TIMEOUT, + session.execute_command("TERM=dumb timedatectl"), + ) + .await + .map_err(|_| { + color_eyre::eyre::eyre!( + "timedatectl command timed out after {:?}", + COMMAND_TIMEOUT + ) + })? + .wrap_err("Failed to check time synchronization status")?; if result.is_success() { // Check if "System clock synchronized: yes" appears in output @@ -180,11 +191,19 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { false } } else { - // Try chronyc tracking - let result = session - .execute_command("TERM=dumb chronyc tracking") - .await - .wrap_err("Failed to check time synchronization status")?; + // Try chronyc tracking with timeout + let result = tokio::time::timeout( + COMMAND_TIMEOUT, + session.execute_command("TERM=dumb chronyc tracking"), + ) + .await + .map_err(|_| { + color_eyre::eyre::eyre!( + "chronyc command timed out after {:?}", + COMMAND_TIMEOUT + ) + })? + .wrap_err("Failed to check time synchronization status")?; if result.is_success() { // Check if chrony is synchronized From 8e02331742a822a43bebfe93a827f82798deb775 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Fri, 9 Jan 2026 18:12:46 +0100 Subject: [PATCH 33/47] feat: wait for SSH disconnection before holding the recovery pin --- hil/src/commands/ota/reboot.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 477614402..30c8cd2c6 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -20,19 +20,17 @@ impl Ota { pub(super) async fn handle_reboot(&self, log_suffix: &str) -> Result { info!("Waiting for reboot and device to come back online"); - // For update-initiated reboots, wait for SSH to become unreachable before - // holding the recovery pin. This ensures we time it with the actual shutdown, - // not based on assumptions about timing. - let hold_duration = if log_suffix == "update" { - info!("Monitoring SSH connection to detect when shutdown actually begins"); - self.wait_for_ssh_disconnection(Duration::from_secs(30)) - .await?; - info!("SSH disconnected - system is shutting down, holding recovery pin"); - 20 // Hold for 20s to cover systemd shutdown + power cycle + early boot - } else { - // For manual reboots (wipe_overlays), we control the timing directly - 10 - }; + // Always wait for SSH to become unreachable before holding the recovery pin. + // This ensures we time it with the actual shutdown, not based on assumptions. + // - For manual reboots (wipe_overlays): reboot command was just sent, wait for SSH to die + // - For update-initiated reboots: update-agent will reboot, wait for SSH to die + info!("Monitoring SSH connection to detect when shutdown actually begins"); + self.wait_for_ssh_disconnection(Duration::from_secs(30)) + .await?; + info!("SSH disconnected - system is shutting down, holding recovery pin"); + + // Hold for 20s to cover systemd shutdown + power cycle + early boot + let hold_duration = 20; // Set recovery pin HIGH to prevent entering recovery mode info!( From 55f290bf9c69a3f6bd453b159dd5504c1515d81c Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Fri, 9 Jan 2026 19:14:15 +0100 Subject: [PATCH 34/47] fix: removing waiting NTP before reboot --- hil/src/commands/ota/mod.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 7a71054d2..e8ca1af14 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -69,6 +69,11 @@ pub struct Ota { /// Serial port ID for boot log capture (alternative to --serial-path) #[arg(long, group = "serial")] serial_id: Option, + + /// Skip NTP time synchronization check before the first reboot (after wipe_overlays). + /// Time sync will still be checked after reboot and before starting the update. + #[arg(long, default_value = "false")] + skip_time_sync_before_reboot: bool, } #[derive(Debug, Clone, clap::ValueEnum)] @@ -115,13 +120,17 @@ impl Ota { })?; info!("Overlays wiped successfully"); - info!("Waiting for NTP time synchronization before reboot"); - system::wait_for_time_sync(&session) - .await - .inspect_err(|e| { - error!("Failed to sync time before reboot: {}", e); - })?; - info!("NTP time synchronized, rebooting device"); + if !self.skip_time_sync_before_reboot { + info!("Waiting for NTP time synchronization before reboot"); + system::wait_for_time_sync(&session) + .await + .inspect_err(|e| { + error!("Failed to sync time before reboot: {}", e); + })?; + info!("NTP time synchronized, rebooting device"); + } else { + info!("Skipping NTP time synchronization before reboot (--skip-time-sync-before-reboot flag set)"); + } system::reboot_orb(&session).await?; info!("Reboot command sent to Orb device"); From 3a9c42ee360e94c1903608a846aaf54edb03fe1a Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Sun, 11 Jan 2026 17:35:18 +0100 Subject: [PATCH 35/47] fix: capturing serial async & removing timeout --- hil/src/commands/ota/reboot.rs | 73 ++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 30c8cd2c6..4055e6564 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -21,18 +21,13 @@ impl Ota { info!("Waiting for reboot and device to come back online"); // Always wait for SSH to become unreachable before holding the recovery pin. - // This ensures we time it with the actual shutdown, not based on assumptions. - // - For manual reboots (wipe_overlays): reboot command was just sent, wait for SSH to die - // - For update-initiated reboots: update-agent will reboot, wait for SSH to die info!("Monitoring SSH connection to detect when shutdown actually begins"); self.wait_for_ssh_disconnection(Duration::from_secs(30)) .await?; info!("SSH disconnected - system is shutting down, holding recovery pin"); - // Hold for 20s to cover systemd shutdown + power cycle + early boot let hold_duration = 20; - // Set recovery pin HIGH to prevent entering recovery mode info!( "Setting recovery pin HIGH to prevent recovery mode during reboot (hold duration: {}s)", hold_duration @@ -44,7 +39,6 @@ impl Ota { duration: hold_duration, }; - // Run recovery pin setting in background task let recovery_task = tokio::spawn(async move { set_recovery .run() @@ -52,7 +46,6 @@ impl Ota { .wrap_err("failed to set recovery pin") }); - // Wait for recovery pin task to complete recovery_task .await .wrap_err("recovery pin task panicked")??; @@ -62,7 +55,15 @@ impl Ota { // drivers which causes /dev/ttyUSB* to be recreated. tokio::time::sleep(Duration::from_millis(200)).await; - self.capture_boot_logs(log_suffix).await?; + // Spawn boot log capture as a background task so it runs concurrently + // with SSH reconnection attempts. Extract needed values upfront. + let platform = self.platform.clone(); + let log_file = self.log_file.clone(); + let serial_path = self.get_serial_path().ok(); + let boot_log_suffix = log_suffix.to_string(); + let boot_log_task = tokio::spawn(async move { + Self::capture_boot_logs_static(platform, log_file, serial_path, &boot_log_suffix).await + }); let start_time = Instant::now(); let timeout = Duration::from_secs(900); // 15 minutes @@ -88,6 +89,20 @@ impl Ota { match super::system::wait_for_time_sync(&session).await { Ok(_) => { info!("NTP time synchronized successfully"); + + // Wait for boot log capture to finish + match boot_log_task.await { + Ok(Ok(())) => { + info!("Boot log capture completed successfully"); + } + Ok(Err(e)) => { + warn!("Boot log capture failed: {}", e); + } + Err(e) => { + warn!("Boot log capture task panicked: {}", e); + } + } + return Ok(session); } Err(e) => { @@ -137,16 +152,21 @@ impl Ota { ); } + /// Captures boot logs from serial port in the background #[instrument(skip_all)] - async fn capture_boot_logs(&self, log_suffix: &str) -> Result<()> { - let platform_name = format!("{:?}", self.platform).to_lowercase(); + async fn capture_boot_logs_static( + platform: super::Platform, + log_file: std::path::PathBuf, + serial_path: Option, + log_suffix: &str, + ) -> Result<()> { + let platform_name = format!("{:?}", platform).to_lowercase(); info!( "Starting boot log capture for {} ({})", log_suffix, platform_name ); - let boot_log_path = self - .log_file + let boot_log_path = log_file .parent() .unwrap_or_else(|| std::path::Path::new(".")) .join(format!("boot_log_{platform_name}_{log_suffix}.txt")); @@ -162,13 +182,10 @@ impl Ota { ); } - let serial_path = match self.get_serial_path() { - Ok(path) => path, - Err(e) => { - warn!( - "Failed to get serial path: {}. Skipping boot log capture.", - e - ); + let serial_path = match serial_path { + Some(path) => path, + None => { + warn!("No serial path provided. Skipping boot log capture."); return Ok(()); } }; @@ -219,13 +236,12 @@ impl Ota { let mut total_bytes = 0; let mut serial_stream = BroadcastStream::new(serial_output_rx); - // 3-minute timeout for flaky serial connections - let timeout = Duration::from_secs(180); let start_time = Instant::now(); let mut found_login_prompt = false; - while start_time.elapsed() < timeout { + // Wait indefinitely until login prompt is detected + loop { match tokio::time::timeout(Duration::from_secs(1), serial_stream.next()) .await { @@ -242,6 +258,7 @@ impl Ota { } } + // Stop capturing when login prompt is detected if let Ok(text) = String::from_utf8(bytes.to_vec()) && text.contains(LOGIN_PROMPT_PATTERN) { @@ -261,15 +278,21 @@ impl Ota { break; } Err(_) => { + // Timeout on reading - continue waiting continue; } } } - if start_time.elapsed() >= timeout && !found_login_prompt { + if found_login_prompt { + info!( + "Boot log capture completed successfully after {:?}", + start_time.elapsed() + ); + } else { warn!( - "Boot log capture timed out after {:?} without finding login prompt. Will proceed with SSH reconnection anyway.", - timeout + "Boot log capture ended without detecting login prompt after {:?}", + start_time.elapsed() ); } From 22e027cf5c287b6ac71054f4b18ea906bc33ef26 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Mon, 12 Jan 2026 13:59:34 +0100 Subject: [PATCH 36/47] fix: clean code --- hil/src/boot.rs | 7 +++++-- hil/src/commands/ota/mod.rs | 1 - hil/src/commands/ota/reboot.rs | 13 +++++-------- hil/src/commands/ota/system.rs | 5 ++--- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 00b11efa1..3c2d5a354 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -23,6 +23,9 @@ pub async fn is_recovery_mode_detected() -> Result { /// If `device` is `None`, will get the first available device. #[tracing::instrument] pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { + const DEFAULT_HOLDING_DELAY:u64 = 10; + const INBETWEEN_DELAY:u64 = 4; + fn make_ftdi(device: Option) -> Result { let builder = FtdiGpio::builder(); let builder = match &device { @@ -56,13 +59,13 @@ pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { }) .await .wrap_err("task panicked")??; - tokio::time::sleep(Duration::from_secs(10)).await; + tokio::time::sleep(Duration::from_secs(DEFAULT_HOLDING_DELAY)).await; info!("Resetting FTDI"); tokio::task::spawn_blocking(move || ftdi.destroy()) .await .wrap_err("task panicked")??; - tokio::time::sleep(Duration::from_secs(4)).await; + tokio::time::sleep(Duration::from_secs(INBETWEEN_DELAY)).await; info!("Turning on"); let device_clone = device.cloned(); diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index e8ca1af14..18511a740 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -99,7 +99,6 @@ impl Ota { let _start_time = Instant::now(); info!("Starting OTA update to version: {}", self.target_version); - // Create log directory if it doesn't exist if let Some(log_dir) = self.log_file.parent() { tokio::fs::create_dir_all(log_dir).await.wrap_err_with(|| { format!("Failed to create log directory: {}", log_dir.display()) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 4055e6564..f1ecc0d74 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -15,6 +15,8 @@ use tracing::{debug, error, info, instrument, warn}; use super::Ota; +const DELAY_CAPTURE_LOGS:u64 = 200; + impl Ota { #[instrument(skip_all)] pub(super) async fn handle_reboot(&self, log_suffix: &str) -> Result { @@ -53,7 +55,7 @@ impl Ota { // Brief delay to allow USB device to be re-enumerated and udev rules to apply // after FTDI GPIO is released. The FTDI device detaches/reattaches kernel // drivers which causes /dev/ttyUSB* to be recreated. - tokio::time::sleep(Duration::from_millis(200)).await; + tokio::time::sleep(Duration::from_millis(DELAY_CAPTURE_LOGS)).await; // Spawn boot log capture as a background task so it runs concurrently // with SSH reconnection attempts. Extract needed values upfront. @@ -62,7 +64,7 @@ impl Ota { let serial_path = self.get_serial_path().ok(); let boot_log_suffix = log_suffix.to_string(); let boot_log_task = tokio::spawn(async move { - Self::capture_boot_logs_static(platform, log_file, serial_path, &boot_log_suffix).await + Self::capture_boot_logs(platform, log_file, serial_path, &boot_log_suffix).await }); let start_time = Instant::now(); @@ -154,7 +156,7 @@ impl Ota { /// Captures boot logs from serial port in the background #[instrument(skip_all)] - async fn capture_boot_logs_static( + async fn capture_boot_logs( platform: super::Platform, log_file: std::path::PathBuf, serial_path: Option, @@ -278,7 +280,6 @@ impl Ota { break; } Err(_) => { - // Timeout on reading - continue waiting continue; } } @@ -296,7 +297,6 @@ impl Ota { ); } - // Final flush and close if let Some(mut file) = log_file { let _ = file.flush().await; let _ = file.shutdown().await; @@ -344,7 +344,6 @@ impl Ota { match self.connect_ssh().await { Ok(session) => match session.execute_command("echo").await { Ok(_) => { - // SSH still alive, system hasn't started shutting down yet debug!( "SSH still responsive (attempt {}), waiting for shutdown...", attempt @@ -352,13 +351,11 @@ impl Ota { tokio::time::sleep(Duration::from_millis(500)).await; } Err(_) => { - // Command failed but connection succeeded - might be shutting down info!("SSH connection degraded, shutdown likely in progress"); return Ok(()); } }, Err(_) => { - // Can't connect - shutdown has started info!( "SSH connection lost after {} attempts, shutdown confirmed", attempt diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 3c1951595..f75fb7862 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -134,6 +134,8 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { const MAX_ATTEMPTS: u32 = 60; // 60 attempts = 2 minutes max wait const SLEEP_DURATION: Duration = Duration::from_secs(2); + // Timeout for individual command execution (10 seconds is generous for timedatectl/chronyc) + const COMMAND_TIMEOUT: Duration = Duration::from_secs(10); info!("Waiting for system time synchronization..."); let sync_start = std::time::Instant::now(); @@ -165,8 +167,6 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { ); for attempt in 1..=MAX_ATTEMPTS { - // Timeout for individual command execution (10 seconds is generous for timedatectl/chronyc) - const COMMAND_TIMEOUT: Duration = Duration::from_secs(10); let is_synced = if use_timedatectl { // Try timedatectl with timeout @@ -208,7 +208,6 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { if result.is_success() { // Check if chrony is synchronized // Leap status should be "Normal" when synchronized - // Reference ID should not be "0.0.0.0" (unsynchronized) result.stdout.contains("Leap status : Normal") && !result.stdout.contains("Reference ID : 0.0.0.0") } else { From 7a1b966f85381f56afb3f27b39d67731c744bb14 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Mon, 12 Jan 2026 14:00:04 +0100 Subject: [PATCH 37/47] fix: format --- hil/src/boot.rs | 4 ++-- hil/src/commands/ota/reboot.rs | 9 ++++++--- hil/src/commands/ota/system.rs | 1 - 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 3c2d5a354..e4e5aa268 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -23,8 +23,8 @@ pub async fn is_recovery_mode_detected() -> Result { /// If `device` is `None`, will get the first available device. #[tracing::instrument] pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { - const DEFAULT_HOLDING_DELAY:u64 = 10; - const INBETWEEN_DELAY:u64 = 4; + const DEFAULT_HOLDING_DELAY: u64 = 10; + const INBETWEEN_DELAY: u64 = 4; fn make_ftdi(device: Option) -> Result { let builder = FtdiGpio::builder(); diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index f1ecc0d74..055a46f14 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -15,7 +15,7 @@ use tracing::{debug, error, info, instrument, warn}; use super::Ota; -const DELAY_CAPTURE_LOGS:u64 = 200; +const DELAY_CAPTURE_LOGS: u64 = 200; impl Ota { #[instrument(skip_all)] @@ -64,7 +64,8 @@ impl Ota { let serial_path = self.get_serial_path().ok(); let boot_log_suffix = log_suffix.to_string(); let boot_log_task = tokio::spawn(async move { - Self::capture_boot_logs(platform, log_file, serial_path, &boot_log_suffix).await + Self::capture_boot_logs(platform, log_file, serial_path, &boot_log_suffix) + .await }); let start_time = Instant::now(); @@ -95,7 +96,9 @@ impl Ota { // Wait for boot log capture to finish match boot_log_task.await { Ok(Ok(())) => { - info!("Boot log capture completed successfully"); + info!( + "Boot log capture completed successfully" + ); } Ok(Err(e)) => { warn!("Boot log capture failed: {}", e); diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index f75fb7862..3a5ca0452 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -167,7 +167,6 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { ); for attempt in 1..=MAX_ATTEMPTS { - let is_synced = if use_timedatectl { // Try timedatectl with timeout let result = tokio::time::timeout( From 6f2a845743a57df0770c46080887a66ea126e02d Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Mon, 12 Jan 2026 14:18:00 +0100 Subject: [PATCH 38/47] fix: replace delay value with constant --- hil/src/boot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index e4e5aa268..64d32efdc 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -82,7 +82,7 @@ pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { }) .await .wrap_err("task panicked")??; - tokio::time::sleep(Duration::from_secs(10)).await; + tokio::time::sleep(Duration::from_secs(DEFAULT_HOLDING_DELAY)).await; tokio::task::spawn_blocking(move || ftdi.destroy()) .await From 022ae7bffed0dcc9b505babead501e0c95fe3fb3 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 13 Jan 2026 01:47:04 +0100 Subject: [PATCH 39/47] fix: try chrony first & fallback to timedatectl --- hil/src/commands/ota/system.rs | 54 ++++++++++++++++++---------------- nix/machines/hil-common.nix | 9 +++++- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 3a5ca0452..941f17201 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -140,75 +140,79 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { info!("Waiting for system time synchronization..."); let sync_start = std::time::Instant::now(); - // Detect which time sync tool is available - let use_timedatectl = session - .execute_command("TERM=dumb command -v timedatectl") - .await - .map(|r| r.is_success()) - .unwrap_or(false); - + // Detect which time sync tool is available (prefer chronyc over timedatectl) let use_chronyc = session .execute_command("TERM=dumb command -v chronyc") .await .map(|r| r.is_success()) .unwrap_or(false); + let use_timedatectl = if !use_chronyc { + session + .execute_command("TERM=dumb command -v timedatectl") + .await + .map(|r| r.is_success()) + .unwrap_or(false) + } else { + false + }; + if !use_timedatectl && !use_chronyc { - bail!("Neither timedatectl nor chronyc found on the system"); + bail!("Neither chronyc nor timedatectl found on the system"); } info!( "Using {} for time sync check", - if use_timedatectl { - "timedatectl" - } else { + if use_chronyc { "chronyc" + } else { + "timedatectl" } ); for attempt in 1..=MAX_ATTEMPTS { - let is_synced = if use_timedatectl { - // Try timedatectl with timeout + let is_synced = if use_chronyc { + // Try chronyc tracking with timeout let result = tokio::time::timeout( COMMAND_TIMEOUT, - session.execute_command("TERM=dumb timedatectl"), + session.execute_command("TERM=dumb chronyc tracking"), ) .await .map_err(|_| { color_eyre::eyre::eyre!( - "timedatectl command timed out after {:?}", + "chronyc command timed out after {:?}", COMMAND_TIMEOUT ) })? .wrap_err("Failed to check time synchronization status")?; if result.is_success() { - // Check if "System clock synchronized: yes" appears in output - result.stdout.contains("System clock synchronized: yes") - || result.stdout.contains("synchronized: yes") + // Check if chrony is synchronized + // Leap status should be "Normal" when synchronized + result.stdout.contains("Leap status : Normal") + && !result.stdout.contains("Reference ID : 0.0.0.0") } else { false } } else { - // Try chronyc tracking with timeout + // Try timedatectl with timeout let result = tokio::time::timeout( COMMAND_TIMEOUT, - session.execute_command("TERM=dumb chronyc tracking"), + session.execute_command("TERM=dumb timedatectl"), ) .await .map_err(|_| { color_eyre::eyre::eyre!( - "chronyc command timed out after {:?}", + "timedatectl command timed out after {:?}", COMMAND_TIMEOUT ) })? .wrap_err("Failed to check time synchronization status")?; if result.is_success() { - // Check if chrony is synchronized - // Leap status should be "Normal" when synchronized - result.stdout.contains("Leap status : Normal") - && !result.stdout.contains("Reference ID : 0.0.0.0") + // Check if "System clock synchronized: yes" appears in output + result.stdout.contains("System clock synchronized: yes") + || result.stdout.contains("synchronized: yes") } else { false } diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index a0da2aff5..4a9405e54 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -204,7 +204,14 @@ in "nixos" "flashing-hil" "${hostname}" - ]; + ] ++ ( + # Add platform-specific labels based on hostname + if builtins.elem hostname ["worldcoin-hil-munich-0" "worldcoin-hil-munich-5"] + then ["worldcoin-hil-pearl"] + else if builtins.elem hostname ["worldcoin-hil-munich-2" "worldcoin-hil-munich-3"] + then ["worldcoin-hil-diamond"] + else [] + ); replace = true; user = ghRunnerUser; From 269826cce1ac94db676e2559bb5bda45a1d409c7 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 13 Jan 2026 12:53:27 +0100 Subject: [PATCH 40/47] fix: time sync bug, unhandled error --- hil/src/commands/ota/system.rs | 76 ++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 941f17201..5341d7d5b 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -173,48 +173,62 @@ pub async fn wait_for_time_sync(session: &SshWrapper) -> Result<()> { for attempt in 1..=MAX_ATTEMPTS { let is_synced = if use_chronyc { // Try chronyc tracking with timeout - let result = tokio::time::timeout( + match tokio::time::timeout( COMMAND_TIMEOUT, session.execute_command("TERM=dumb chronyc tracking"), ) .await - .map_err(|_| { - color_eyre::eyre::eyre!( - "chronyc command timed out after {:?}", - COMMAND_TIMEOUT - ) - })? - .wrap_err("Failed to check time synchronization status")?; - - if result.is_success() { - // Check if chrony is synchronized - // Leap status should be "Normal" when synchronized - result.stdout.contains("Leap status : Normal") - && !result.stdout.contains("Reference ID : 0.0.0.0") - } else { - false + { + Ok(Ok(result)) if result.is_success() => { + // Check if chrony is synchronized + // Leap status should be "Normal" when synchronized + result.stdout.contains("Leap status : Normal") + && !result.stdout.contains("Reference ID : 0.0.0.0") + } + Ok(Ok(_)) => false, + Ok(Err(e)) => { + info!( + "Failed to check chronyc status (attempt {}/{}): {}", + attempt, MAX_ATTEMPTS, e + ); + false + } + Err(_) => { + info!( + "chronyc command timed out after {:?} (attempt {}/{})", + COMMAND_TIMEOUT, attempt, MAX_ATTEMPTS + ); + false + } } } else { // Try timedatectl with timeout - let result = tokio::time::timeout( + match tokio::time::timeout( COMMAND_TIMEOUT, session.execute_command("TERM=dumb timedatectl"), ) .await - .map_err(|_| { - color_eyre::eyre::eyre!( - "timedatectl command timed out after {:?}", - COMMAND_TIMEOUT - ) - })? - .wrap_err("Failed to check time synchronization status")?; - - if result.is_success() { - // Check if "System clock synchronized: yes" appears in output - result.stdout.contains("System clock synchronized: yes") - || result.stdout.contains("synchronized: yes") - } else { - false + { + Ok(Ok(result)) if result.is_success() => { + // Check if "System clock synchronized: yes" appears in output + result.stdout.contains("System clock synchronized: yes") + || result.stdout.contains("synchronized: yes") + } + Ok(Ok(_)) => false, + Ok(Err(e)) => { + info!( + "Failed to check timedatectl status (attempt {}/{}): {}", + attempt, MAX_ATTEMPTS, e + ); + false + } + Err(_) => { + info!( + "timedatectl command timed out after {:?} (attempt {}/{})", + COMMAND_TIMEOUT, attempt, MAX_ATTEMPTS + ); + false + } } }; From 3568b0b70c7448047ff2e6c43b0e6d883d619204 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 13 Jan 2026 14:18:30 +0100 Subject: [PATCH 41/47] fix: default hold for 5 secs --- hil/src/boot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 64d32efdc..f984ab3ac 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -23,7 +23,7 @@ pub async fn is_recovery_mode_detected() -> Result { /// If `device` is `None`, will get the first available device. #[tracing::instrument] pub async fn reboot(recovery: bool, device: Option<&FtdiId>) -> Result<()> { - const DEFAULT_HOLDING_DELAY: u64 = 10; + const DEFAULT_HOLDING_DELAY: u64 = 5; const INBETWEEN_DELAY: u64 = 4; fn make_ftdi(device: Option) -> Result { From 857ec455de5b877827d69b9ae72b3a14c9255867 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 13 Jan 2026 18:34:05 +0100 Subject: [PATCH 42/47] feat: Include More Serial Logs Capture When Orbs is not Reachable --- hil/src/commands/ota/reboot.rs | 165 +++++++++++++++++++ scripts/orb-registration/orb-registration.py | 145 ++++++++++------ 2 files changed, 258 insertions(+), 52 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 055a46f14..75a9b3fa8 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -149,6 +149,18 @@ impl Ota { "No specific error captured".to_string() }; + // Try hardware button reboot as fallback + warn!("SSH reconnection failed, attempting hardware button reboot recovery"); + match self.try_hardware_reboot_recovery(log_suffix).await { + Ok(session) => { + info!("Hardware button reboot recovery succeeded!"); + return Ok(session); + } + Err(recovery_err) => { + error!("Hardware button reboot recovery also failed: {}", recovery_err); + } + } + bail!( "Device did not come back online within {:?} (attempted {} times). {}", elapsed, @@ -368,4 +380,157 @@ impl Ota { } } } + + /// Try hardware button reboot as fallback recovery mechanism + /// Performs up to 3 boot attempts with serial log capture + #[instrument(skip_all)] + async fn try_hardware_reboot_recovery(&self, log_suffix: &str) -> Result { + const MAX_BOOT_ATTEMPTS: u32 = 3; + + info!("Starting hardware button reboot recovery (max {} attempts)", MAX_BOOT_ATTEMPTS); + + for boot_attempt in 1..=MAX_BOOT_ATTEMPTS { + info!("Hardware reboot attempt {}/{}", boot_attempt, MAX_BOOT_ATTEMPTS); + + // Perform hardware button reboot + info!("Triggering hardware button reboot (recovery=false)"); + crate::boot::reboot(false, None).await + .wrap_err("Failed to trigger hardware button reboot")?; + + // Brief delay to allow USB device to be re-enumerated + tokio::time::sleep(Duration::from_millis(DELAY_CAPTURE_LOGS)).await; + + // Capture boot logs and wait for login prompt + let serial_path = match self.get_serial_path() { + Ok(path) => path, + Err(e) => { + warn!("Failed to get serial path for boot log capture: {}", e); + // Continue without serial logs + continue; + } + }; + + info!("Opening serial port for boot log capture: {}", serial_path.display()); + let serial = match tokio_serial::new( + &*serial_path.to_string_lossy(), + crate::serial::ORB_BAUD_RATE, + ) + .open_native_async() + { + Ok(s) => s, + Err(e) => { + warn!("Failed to open serial port: {}. Continuing without logs.", e); + continue; + } + }; + + let (serial_reader, _serial_writer) = tokio::io::split(serial); + let (serial_output_tx, serial_output_rx) = broadcast::channel(64); + let (reader_task, kill_tx) = + spawn_serial_reader_task(serial_reader, serial_output_tx); + + // Capture boot logs in background while waiting for login prompt + let platform = self.platform.clone(); + let log_file = self.log_file.clone(); + let serial_path_clone = serial_path.clone(); + let boot_log_suffix = format!("{}_hardware_recovery_{}", log_suffix, boot_attempt); + let boot_log_task = tokio::spawn(async move { + Self::capture_boot_logs( + platform, + log_file, + Some(serial_path_clone), + &boot_log_suffix, + ) + .await + }); + + // Wait for login prompt with timeout + info!("Waiting for login prompt..."); + let wait_result = tokio::time::timeout( + Duration::from_secs(300), // 5 minutes timeout per boot attempt + crate::serial::wait_for_pattern( + LOGIN_PROMPT_PATTERN.to_owned().into_bytes(), + BroadcastStream::new(serial_output_rx), + ), + ) + .await; + + let _ = kill_tx.send(()); + let _ = reader_task.await; + + match wait_result { + Ok(Ok(())) => { + info!("Login prompt detected on boot attempt {}", boot_attempt); + + // Wait a bit for boot to stabilize + tokio::time::sleep(Duration::from_secs(10)).await; + + // Try to SSH connect + info!("Attempting SSH connection after hardware reboot..."); + let mut ssh_attempts = 0; + const MAX_SSH_ATTEMPTS: u32 = 30; + + while ssh_attempts < MAX_SSH_ATTEMPTS { + ssh_attempts += 1; + tokio::time::sleep(Duration::from_secs(10)).await; + + match self.connect_ssh().await { + Ok(session) => match session.test_connection().await { + Ok(_) => { + info!("SSH connection established after hardware reboot!"); + + // Wait for time sync + info!("Waiting for NTP time synchronization"); + match super::system::wait_for_time_sync(&session).await { + Ok(_) => { + info!("Hardware reboot recovery successful!"); + + // Wait for boot log capture to finish + match boot_log_task.await { + Ok(Ok(())) => { + info!("Boot log capture completed for attempt {}", boot_attempt); + } + Ok(Err(e)) => { + warn!("Boot log capture failed for attempt {}: {}", boot_attempt, e); + } + Err(e) => { + warn!("Boot log capture task panicked for attempt {}: {}", boot_attempt, e); + } + } + + return Ok(session); + } + Err(e) => { + warn!("Time sync failed after hardware reboot: {}", e); + } + } + } + Err(e) => { + debug!("SSH connection test failed (attempt {}): {}", ssh_attempts, e); + } + }, + Err(e) => { + debug!("SSH connection failed (attempt {}): {}", ssh_attempts, e); + } + } + } + + warn!("SSH connection failed after {} attempts on boot attempt {}", MAX_SSH_ATTEMPTS, boot_attempt); + + // Clean up boot log task since we're moving to next attempt + boot_log_task.abort(); + } + Ok(Err(e)) => { + warn!("Error waiting for login prompt on boot attempt {}: {}", boot_attempt, e); + boot_log_task.abort(); + } + Err(_) => { + warn!("Timeout waiting for login prompt on boot attempt {}", boot_attempt); + boot_log_task.abort(); + } + } + } + + bail!("Hardware reboot recovery failed after {} boot attempts", MAX_BOOT_ATTEMPTS); + } } diff --git a/scripts/orb-registration/orb-registration.py b/scripts/orb-registration/orb-registration.py index 5a7fc822a..2e8c16b2e 100644 --- a/scripts/orb-registration/orb-registration.py +++ b/scripts/orb-registration/orb-registration.py @@ -109,13 +109,45 @@ def check_orb_id_format(self, orb_id: str) -> str: ) orb_id = orb_id.lower() - if len(orb_id) < 8: - self.logger.warning( - f"Orb ID '{orb_id}' is less than 8 characters, padding with zeros" - ) - orb_id = orb_id.zfill(8) - elif len(orb_id) > 8: - raise ValueError(f"Orb ID '{orb_id}' exceeds 8 characters") + # Pearl EV2+ uses full 64-character hash IDs + # Older Pearl (EV1) and Diamond use 8-character IDs + # Valid hardware versions from API: + # Pearl: PEARL_EV1, PEARL_EV2, PEARL_EV3, PEARL_EV4, PEARL_EV5, PEARL_EV5_S, + # PEARL_EV6, PEARL_EV7, PEARL_EV8 + # Diamond: DIAMOND_B3, DIAMOND_DVT1, DIAMOND_DVT2, DIAMOND_EV1, DIAMOND_EVT, DIAMOND_PVT + # Other: PROTO_0S, VIRTUAL + + hw_version_upper = self.args.hardware_version.upper() + + # EV2 and newer Pearl versions use 64-char hash IDs + uses_long_ids = hw_version_upper in [ + "PEARL_EV2", "PEARL_EV3", "PEARL_EV4", "PEARL_EV5", "PEARL_EV5_S", + "PEARL_EV6", "PEARL_EV7", "PEARL_EV8" + ] + + if uses_long_ids: + # Allow full-length hash IDs (64 characters for SHA256) + if len(orb_id) == 64: + # Validate it's a valid hex string + try: + int(orb_id, 16) + except ValueError: + raise ValueError(f"Orb ID '{orb_id}' is not a valid hexadecimal string") + return orb_id + elif len(orb_id) == 8: + # Also accept short 8-character format for backwards compatibility + return orb_id + else: + raise ValueError(f"Orb ID '{orb_id}' must be either 8 or 64 characters for {self.args.hardware_version}") + else: + # Older hardware uses 8-character IDs + if len(orb_id) < 8: + self.logger.warning( + f"Orb ID '{orb_id}' is less than 8 characters, padding with zeros" + ) + orb_id = orb_id.zfill(8) + elif len(orb_id) > 8: + raise ValueError(f"Orb ID '{orb_id}' exceeds 8 characters") return orb_id @@ -588,7 +620,7 @@ def save_orb_artifacts( def process_pearl_orb(self, cf_token: str, mount_point: Path) -> str: """Process a single Pearl orb (generate ID, register, create artifacts).""" orb_id = self.generate_orb_id() - platform = self.detect_platform(self.args.hardware_version) + platform = self.args.platform orb_name = self.register_orb_mongo(orb_id, cf_token, platform) self.set_orb_channel(orb_id, cf_token) @@ -599,19 +631,19 @@ def process_pearl_orb(self, cf_token: str, mount_point: Path) -> str: return orb_id - def process_diamond_orb_ids(self, orb_ids: List[str], cf_token: str): - """Process Diamond orb IDs (register in MongoDB, then Core-App).""" - platform = self.detect_platform(self.args.hardware_version) + def register_orb_ids(self, orb_ids: List[str], cf_token: str): + """Register orb IDs (in MongoDB first, then Core-App).""" + platform = self.args.platform for orb_id in orb_ids: - self.logger.info(f"Processing Diamond Orb ID: {orb_id}") + self.logger.info(f"Processing Orb ID: {orb_id}") orb_id = self.check_orb_id_format(orb_id) orb_name = self.register_orb_mongo(orb_id, cf_token, platform) self.register_orb_core_app(orb_id, orb_name) - self.logger.info(f"Successfully processed Diamond Orb: {orb_id}") + self.logger.info(f"Successfully processed Orb: {orb_id}") - def process_diamond_orb_pairs(self, orb_pairs: List[Tuple[str, str]]): - """Process Diamond orb ID+name pairs (register directly in Core-App).""" + def register_orb_pairs(self, orb_pairs: List[Tuple[str, str]]): + """Register orb ID+name pairs (directly in Core-App, skipping MongoDB).""" for orb_id, orb_name in orb_pairs: self.logger.info(f"Processing Diamond Orb pair: {orb_id} -> {orb_name}") orb_id = self.check_orb_id_format(orb_id) @@ -643,40 +675,54 @@ def run(self): cf_token = self.get_cloudflared_token() if self.args.platform == "pearl": - # Pearl: Generate artifacts and register - self.build_dir.mkdir(exist_ok=True) - self.artifacts_dir.mkdir(exist_ok=True) + if self.args.orb_ids or self.args.input_file: + # Pearl: Register existing orb IDs (no artifact generation) + if self.args.input_file: + if self.args.input_format == "ids": + orb_ids = self.read_input_file(self.args.input_file) + self.register_orb_ids(orb_ids, cf_token) + elif self.args.input_format == "pairs": + orb_pairs = self.read_input_pairs_file(self.args.input_file) + self.register_orb_pairs(orb_pairs) + elif self.args.orb_ids: + self.register_orb_ids(self.args.orb_ids, cf_token) + + self.logger.info("All Pearl Orb IDs registered successfully.") + else: + # Pearl: Generate artifacts and register + self.build_dir.mkdir(exist_ok=True) + self.artifacts_dir.mkdir(exist_ok=True) + + with tempfile.TemporaryDirectory() as temp_dir: + mount_point = Path(temp_dir) / "loop" + mount_point.mkdir() - with tempfile.TemporaryDirectory() as temp_dir: - mount_point = Path(temp_dir) / "loop" - mount_point.mkdir() + self.create_persistent_images(mount_point) - self.create_persistent_images(mount_point) + for i in range(self.args.count): + self.logger.info( + f"Generating Pearl Orb ID #{i+1} of {self.args.count}..." + ) + orb_id = self.process_pearl_orb(cf_token, mount_point) + self.logger.info(f"Successfully processed Pearl Orb: {orb_id}") + print("", file=sys.stderr) - for i in range(self.args.count): self.logger.info( - f"Generating Pearl Orb ID #{i+1} of {self.args.count}..." + f"All {self.args.count} Pearl Orb IDs generated and registered successfully." ) - orb_id = self.process_pearl_orb(cf_token, mount_point) - self.logger.info(f"Successfully processed Pearl Orb: {orb_id}") - print("", file=sys.stderr) - - self.logger.info( - f"All {self.args.count} Pearl Orb IDs generated and registered successfully." - ) elif self.args.platform == "diamond": if self.args.input_file: # Diamond: Read from file if self.args.input_format == "ids": orb_ids = self.read_input_file(self.args.input_file) - self.process_diamond_orb_ids(orb_ids, cf_token) + self.register_orb_ids(orb_ids, cf_token) elif self.args.input_format == "pairs": orb_pairs = self.read_input_pairs_file(self.args.input_file) - self.process_diamond_orb_pairs(orb_pairs) + self.register_orb_pairs(orb_pairs) elif self.args.orb_ids: # Diamond: Direct arguments - self.process_diamond_orb_ids(self.args.orb_ids, cf_token) + self.register_orb_ids(self.args.orb_ids, cf_token) else: raise ValueError( "Diamond platform requires either --input-file or direct orb IDs" @@ -732,13 +778,13 @@ def main(): "--count", type=int, default=1, - help="Number of Pearl orbs to generate (Pearl only)", + help="Number of Pearl orbs to generate (Pearl generation mode only)", ) - # Diamond-specific arguments + # Common arguments for both platforms parser.add_argument( "--input-file", - help="Input file containing orb IDs or orb ID+name pairs (Diamond only)", + help="Input file containing orb IDs or orb ID+name pairs", ) parser.add_argument( "--input-format", @@ -749,7 +795,7 @@ def main(): parser.add_argument( "orb_ids", nargs="*", - help="Orb IDs to register (Diamond only, alternative to --input-file)", + help="Orb IDs to register (alternative to --input-file)", ) check_cli_dependencies(REQUIRED_TOOLS) @@ -775,26 +821,21 @@ def main(): sys.exit(1) # Platform-specific validation - if args.platform == "pearl": - if args.input_file or args.orb_ids: - print( - "Error: Pearl platform doesn't support input files or direct orb IDs", - file=sys.stderr, - ) - sys.exit(1) - elif args.platform == "diamond": + if args.platform == "diamond": if not args.input_file and not args.orb_ids: print( "Error: Diamond platform requires either --input-file or direct orb IDs", file=sys.stderr, ) sys.exit(1) - if args.input_file and args.orb_ids: - print( - "Error: Cannot use both --input-file and direct orb IDs", - file=sys.stderr, - ) - sys.exit(1) + + # Validate that both input methods aren't used simultaneously + if args.input_file and args.orb_ids: + print( + "Error: Cannot use both --input-file and direct orb IDs", + file=sys.stderr, + ) + sys.exit(1) try: orb_registration = OrbRegistration(args) From 7662565e04f0094d321396c522029cc89a0462f3 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Tue, 13 Jan 2026 18:35:15 +0100 Subject: [PATCH 43/47] fix: format --- hil/src/commands/ota/reboot.rs | 125 ++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 42 deletions(-) diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 75a9b3fa8..82a0b457c 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -157,7 +157,10 @@ impl Ota { return Ok(session); } Err(recovery_err) => { - error!("Hardware button reboot recovery also failed: {}", recovery_err); + error!( + "Hardware button reboot recovery also failed: {}", + recovery_err + ); } } @@ -384,17 +387,27 @@ impl Ota { /// Try hardware button reboot as fallback recovery mechanism /// Performs up to 3 boot attempts with serial log capture #[instrument(skip_all)] - async fn try_hardware_reboot_recovery(&self, log_suffix: &str) -> Result { + async fn try_hardware_reboot_recovery( + &self, + log_suffix: &str, + ) -> Result { const MAX_BOOT_ATTEMPTS: u32 = 3; - info!("Starting hardware button reboot recovery (max {} attempts)", MAX_BOOT_ATTEMPTS); + info!( + "Starting hardware button reboot recovery (max {} attempts)", + MAX_BOOT_ATTEMPTS + ); for boot_attempt in 1..=MAX_BOOT_ATTEMPTS { - info!("Hardware reboot attempt {}/{}", boot_attempt, MAX_BOOT_ATTEMPTS); + info!( + "Hardware reboot attempt {}/{}", + boot_attempt, MAX_BOOT_ATTEMPTS + ); // Perform hardware button reboot info!("Triggering hardware button reboot (recovery=false)"); - crate::boot::reboot(false, None).await + crate::boot::reboot(false, None) + .await .wrap_err("Failed to trigger hardware button reboot")?; // Brief delay to allow USB device to be re-enumerated @@ -410,7 +423,10 @@ impl Ota { } }; - info!("Opening serial port for boot log capture: {}", serial_path.display()); + info!( + "Opening serial port for boot log capture: {}", + serial_path.display() + ); let serial = match tokio_serial::new( &*serial_path.to_string_lossy(), crate::serial::ORB_BAUD_RATE, @@ -419,7 +435,10 @@ impl Ota { { Ok(s) => s, Err(e) => { - warn!("Failed to open serial port: {}. Continuing without logs.", e); + warn!( + "Failed to open serial port: {}. Continuing without logs.", + e + ); continue; } }; @@ -433,7 +452,8 @@ impl Ota { let platform = self.platform.clone(); let log_file = self.log_file.clone(); let serial_path_clone = serial_path.clone(); - let boot_log_suffix = format!("{}_hardware_recovery_{}", log_suffix, boot_attempt); + let boot_log_suffix = + format!("{}_hardware_recovery_{}", log_suffix, boot_attempt); let boot_log_task = tokio::spawn(async move { Self::capture_boot_logs( platform, @@ -475,62 +495,83 @@ impl Ota { tokio::time::sleep(Duration::from_secs(10)).await; match self.connect_ssh().await { - Ok(session) => match session.test_connection().await { - Ok(_) => { - info!("SSH connection established after hardware reboot!"); - - // Wait for time sync - info!("Waiting for NTP time synchronization"); - match super::system::wait_for_time_sync(&session).await { - Ok(_) => { - info!("Hardware reboot recovery successful!"); - - // Wait for boot log capture to finish - match boot_log_task.await { - Ok(Ok(())) => { - info!("Boot log capture completed for attempt {}", boot_attempt); - } - Ok(Err(e)) => { - warn!("Boot log capture failed for attempt {}: {}", boot_attempt, e); - } - Err(e) => { - warn!("Boot log capture task panicked for attempt {}: {}", boot_attempt, e); + Ok(session) => { + match session.test_connection().await { + Ok(_) => { + info!("SSH connection established after hardware reboot!"); + + // Wait for time sync + info!("Waiting for NTP time synchronization"); + match super::system::wait_for_time_sync( + &session, + ) + .await + { + Ok(_) => { + info!("Hardware reboot recovery successful!"); + + // Wait for boot log capture to finish + match boot_log_task.await { + Ok(Ok(())) => { + info!("Boot log capture completed for attempt {}", boot_attempt); + } + Ok(Err(e)) => { + warn!("Boot log capture failed for attempt {}: {}", boot_attempt, e); + } + Err(e) => { + warn!("Boot log capture task panicked for attempt {}: {}", boot_attempt, e); + } } - } - return Ok(session); - } - Err(e) => { - warn!("Time sync failed after hardware reboot: {}", e); + return Ok(session); + } + Err(e) => { + warn!("Time sync failed after hardware reboot: {}", e); + } } } + Err(e) => { + debug!("SSH connection test failed (attempt {}): {}", ssh_attempts, e); + } } - Err(e) => { - debug!("SSH connection test failed (attempt {}): {}", ssh_attempts, e); - } - }, + } Err(e) => { - debug!("SSH connection failed (attempt {}): {}", ssh_attempts, e); + debug!( + "SSH connection failed (attempt {}): {}", + ssh_attempts, e + ); } } } - warn!("SSH connection failed after {} attempts on boot attempt {}", MAX_SSH_ATTEMPTS, boot_attempt); + warn!( + "SSH connection failed after {} attempts on boot attempt {}", + MAX_SSH_ATTEMPTS, boot_attempt + ); // Clean up boot log task since we're moving to next attempt boot_log_task.abort(); } Ok(Err(e)) => { - warn!("Error waiting for login prompt on boot attempt {}: {}", boot_attempt, e); + warn!( + "Error waiting for login prompt on boot attempt {}: {}", + boot_attempt, e + ); boot_log_task.abort(); } Err(_) => { - warn!("Timeout waiting for login prompt on boot attempt {}", boot_attempt); + warn!( + "Timeout waiting for login prompt on boot attempt {}", + boot_attempt + ); boot_log_task.abort(); } } } - bail!("Hardware reboot recovery failed after {} boot attempts", MAX_BOOT_ATTEMPTS); + bail!( + "Hardware reboot recovery failed after {} boot attempts", + MAX_BOOT_ATTEMPTS + ); } } From bec8b5dea6c36d89de773bf18730ee4ab909d907 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 14 Jan 2026 12:40:52 +0100 Subject: [PATCH 44/47] feat: Introduce Ethernet Through Usb for Ota orb --- hil/src/commands/ota/mod.rs | 72 +++++++++++++++++++++++++++++-------- hil/src/lib.rs | 2 ++ 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 18511a740..13ad0ec8a 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -30,9 +30,9 @@ pub struct Ota { #[arg(long)] target_version: String, - /// Hostname of the Orb device + /// Hostname of the Orb device (optional - will auto-discover via USB ethernet if not provided) #[arg(long)] - hostname: String, + hostname: Option, /// Username #[arg(long, default_value = "worldcoin")] @@ -74,6 +74,18 @@ pub struct Ota { /// Time sync will still be checked after reboot and before starting the update. #[arg(long, default_value = "false")] skip_time_sync_before_reboot: bool, + + /// IP range start for USB ethernet auto-discovery + #[arg(long, default_value = "2")] + discovery_ip_start: u8, + + /// IP range end for USB ethernet auto-discovery + #[arg(long, default_value = "10")] + discovery_ip_end: u8, + + /// Timeout for discovering Orb via USB ethernet (seconds) + #[arg(long, default_value = "30")] + discovery_timeout_secs: u64, } #[derive(Debug, Clone, clap::ValueEnum)] @@ -339,24 +351,44 @@ impl Ota { } async fn connect_ssh(&self) -> Result { - info!( - "Connecting to Orb device at {}:{}", - self.hostname, self.port - ); - - let auth = match (&self.password, &self.key_path) { - (Some(password), None) => AuthMethod::Password(password.clone()), - (None, Some(key_path)) => AuthMethod::Key { - private_key_path: key_path.clone(), - }, - _ => unreachable!("Clap ensures exactly one auth method is specified"), + let hostname = match &self.hostname { + Some(h) => { + info!("Using provided hostname: {}", h); + h.clone() + } + None => { + info!("No hostname provided, starting USB ethernet auto-discovery"); + let discovery = orb_hil::NetworkDiscovery { + username: self.username.clone(), + auth: self.get_auth_method(), + port: self.port, + ip_range_start: self.discovery_ip_start, + ip_range_end: self.discovery_ip_end, + connection_timeout: std::time::Duration::from_secs( + self.discovery_timeout_secs, + ), + }; + + let discovered = discovery + .discover_orb() + .await + .wrap_err("Failed to discover Orb via USB ethernet")?; + + info!( + "Discovered Orb at {} on interface {}", + discovered.hostname, discovered.interface + ); + discovered.hostname + } }; + info!("Connecting to Orb device at {}:{}", hostname, self.port); + let connect_args = SshConnectArgs { - hostname: self.hostname.clone(), + hostname, port: self.port, username: self.username.clone(), - auth, + auth: self.get_auth_method(), }; let session = SshWrapper::connect(connect_args) @@ -366,4 +398,14 @@ impl Ota { info!("Successfully connected to Orb device"); Ok(session) } + + fn get_auth_method(&self) -> AuthMethod { + match (&self.password, &self.key_path) { + (Some(password), None) => AuthMethod::Password(password.clone()), + (None, Some(key_path)) => AuthMethod::Key { + private_key_path: key_path.clone(), + }, + _ => unreachable!("Clap ensures exactly one auth method is specified"), + } + } } diff --git a/hil/src/lib.rs b/hil/src/lib.rs index a02448d3e..a8535d710 100644 --- a/hil/src/lib.rs +++ b/hil/src/lib.rs @@ -1,5 +1,6 @@ #![forbid(unsafe_code)] +mod network_discovery; mod ssh_wrapper; #[path = "commands/ota/verify.rs"] @@ -8,4 +9,5 @@ pub mod verify; #[path = "commands/ota/mcu_util.rs"] pub mod mcu_util; +pub use network_discovery::{DiscoveredOrb, NetworkDiscovery}; pub use ssh_wrapper::{AuthMethod, CommandResult, SshConnectArgs, SshWrapper}; From 2c89aa4e3629d82f7507fd6b7a14498759d90c9a Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 14 Jan 2026 12:53:00 +0100 Subject: [PATCH 45/47] feat: Ethernet through USB --- hil/src/network_discovery.rs | 205 +++++++++++++++++++ scripts/orb-registration/orb-registration.py | 145 +++++-------- 2 files changed, 257 insertions(+), 93 deletions(-) create mode 100644 hil/src/network_discovery.rs diff --git a/hil/src/network_discovery.rs b/hil/src/network_discovery.rs new file mode 100644 index 000000000..771922c5a --- /dev/null +++ b/hil/src/network_discovery.rs @@ -0,0 +1,205 @@ +use crate::{AuthMethod, SshConnectArgs, SshWrapper}; +use color_eyre::{eyre::bail, Result}; +use std::time::Duration; +use tokio::time::timeout; +use tracing::{debug, info, warn}; + +/// Configuration for network discovery on USB ethernet interfaces +#[derive(Debug, Clone)] +pub struct NetworkDiscovery { + pub username: String, + pub auth: AuthMethod, + pub port: u16, + pub ip_range_start: u8, + pub ip_range_end: u8, + pub connection_timeout: Duration, +} + +/// Information about a discovered Orb device +#[derive(Debug, Clone)] +pub struct DiscoveredOrb { + pub hostname: String, + pub interface: String, +} + +impl NetworkDiscovery { + /// Discovers an Orb device on USB ethernet interfaces (orbeth0-3) + pub async fn discover_orb(&self) -> Result { + info!("Starting Orb discovery on USB ethernet interfaces"); + + let interfaces = enumerate_orbeth_interfaces().await?; + + if interfaces.is_empty() { + bail!( + "No USB ethernet interfaces (orbeth0-3) found.\n\ + Ensure Orb is connected via USB and udev rules are configured." + ); + } + + info!("Found USB ethernet interfaces: {:?}", interfaces); + + let mut tasks = Vec::new(); + for interface in interfaces.iter() { + let interface = interface.clone(); + let discovery = self.clone(); + let task = + tokio::spawn(async move { discovery.scan_interface(&interface).await }); + tasks.push(task); + } + + let discovery_result = timeout(self.connection_timeout, async { + loop { + for task in &mut tasks { + if task.is_finished() { + match task.await { + Ok(Ok(discovered)) => return Ok(discovered), + Ok(Err(e)) => debug!("Interface scan failed: {}", e), + Err(e) => warn!("Task panicked: {}", e), + } + } + } + tokio::time::sleep(Duration::from_millis(100)).await; + } + }) + .await; + + match discovery_result { + Ok(Ok(discovered)) => Ok(discovered), + Ok(Err(e)) => Err(e), + Err(_) => bail!( + "Failed to discover Orb on USB ethernet after {}s.\n\ + Scanned interfaces: {}\n\ + IP range: 10.42.0.{}-{}\n\ + Suggestion: Verify Orb is powered on and SSH is running.\n\ + Or use --hostname to specify manually.", + self.connection_timeout.as_secs(), + interfaces.join(", "), + self.ip_range_start, + self.ip_range_end + ), + } + } + + /// Scans a specific interface for responsive Orb devices + async fn scan_interface(&self, interface: &str) -> Result { + debug!("Scanning interface {} for Orb devices", interface); + + let mut tasks = Vec::new(); + for ip_suffix in self.ip_range_start..=self.ip_range_end { + let ip = format!("10.42.0.{}", ip_suffix); + let interface = interface.to_string(); + let discovery = self.clone(); + + let task = tokio::spawn(async move { + discovery.test_ssh_connection(&ip, &interface).await + }); + tasks.push(task); + } + + loop { + for task in &mut tasks { + if task.is_finished() { + match task.await { + Ok(Ok(discovered)) => { + info!( + "Successfully connected to Orb at {} on {}", + discovered.hostname, discovered.interface + ); + return Ok(discovered); + } + Ok(Err(e)) => debug!("SSH test failed: {}", e), + Err(e) => warn!("Task panicked: {}", e), + } + } + } + + tokio::time::sleep(Duration::from_millis(100)).await; + + if tasks.iter().all(|t| t.is_finished()) { + break; + } + } + + bail!("No responsive Orb found on interface {}", interface) + } + + /// Tests SSH connection to a specific IP address + async fn test_ssh_connection( + &self, + ip: &str, + interface: &str, + ) -> Result { + debug!("Testing SSH connection to {} on {}", ip, interface); + + let connect_args = SshConnectArgs { + hostname: ip.to_string(), + port: self.port, + username: self.username.clone(), + auth: self.auth.clone(), + }; + + let test_result = timeout(Duration::from_secs(3), async { + SshWrapper::connect(connect_args).await + }) + .await; + + match test_result { + Ok(Ok(_wrapper)) => { + debug!("SSH connection successful to {} on {}", ip, interface); + + Ok(DiscoveredOrb { + hostname: ip.to_string(), + interface: interface.to_string(), + }) + } + Ok(Err(e)) => { + debug!("SSH connection failed to {}: {}", ip, e); + Err(e) + } + Err(_) => { + debug!("SSH connection timed out to {}", ip); + bail!("Connection timeout") + } + } + } +} + +/// Enumerates USB ethernet interfaces (orbeth0-3) that are currently UP +async fn enumerate_orbeth_interfaces() -> Result> { + let sys_net_path = "/sys/class/net"; + let mut interfaces = Vec::new(); + + let mut entries = tokio::fs::read_dir(sys_net_path).await?; + + while let Some(entry) = entries.next_entry().await? { + let interface_name = entry.file_name(); + let interface_str = interface_name.to_string_lossy(); + + if interface_str.starts_with("orbeth") && interface_str.len() == 7 { + let operstate_path = + format!("{}/{}/operstate", sys_net_path, interface_str); + + if let Ok(state) = tokio::fs::read_to_string(&operstate_path).await + && state.trim() == "up" + { + interfaces.push(interface_str.to_string()); + debug!("Found active USB ethernet interface: {}", interface_str); + } + } + } + + interfaces.sort(); + + Ok(interfaces) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_enumerate_interfaces() { + let result = enumerate_orbeth_interfaces().await; + assert!(result.is_ok()); + } +} diff --git a/scripts/orb-registration/orb-registration.py b/scripts/orb-registration/orb-registration.py index 2e8c16b2e..5a7fc822a 100644 --- a/scripts/orb-registration/orb-registration.py +++ b/scripts/orb-registration/orb-registration.py @@ -109,45 +109,13 @@ def check_orb_id_format(self, orb_id: str) -> str: ) orb_id = orb_id.lower() - # Pearl EV2+ uses full 64-character hash IDs - # Older Pearl (EV1) and Diamond use 8-character IDs - # Valid hardware versions from API: - # Pearl: PEARL_EV1, PEARL_EV2, PEARL_EV3, PEARL_EV4, PEARL_EV5, PEARL_EV5_S, - # PEARL_EV6, PEARL_EV7, PEARL_EV8 - # Diamond: DIAMOND_B3, DIAMOND_DVT1, DIAMOND_DVT2, DIAMOND_EV1, DIAMOND_EVT, DIAMOND_PVT - # Other: PROTO_0S, VIRTUAL - - hw_version_upper = self.args.hardware_version.upper() - - # EV2 and newer Pearl versions use 64-char hash IDs - uses_long_ids = hw_version_upper in [ - "PEARL_EV2", "PEARL_EV3", "PEARL_EV4", "PEARL_EV5", "PEARL_EV5_S", - "PEARL_EV6", "PEARL_EV7", "PEARL_EV8" - ] - - if uses_long_ids: - # Allow full-length hash IDs (64 characters for SHA256) - if len(orb_id) == 64: - # Validate it's a valid hex string - try: - int(orb_id, 16) - except ValueError: - raise ValueError(f"Orb ID '{orb_id}' is not a valid hexadecimal string") - return orb_id - elif len(orb_id) == 8: - # Also accept short 8-character format for backwards compatibility - return orb_id - else: - raise ValueError(f"Orb ID '{orb_id}' must be either 8 or 64 characters for {self.args.hardware_version}") - else: - # Older hardware uses 8-character IDs - if len(orb_id) < 8: - self.logger.warning( - f"Orb ID '{orb_id}' is less than 8 characters, padding with zeros" - ) - orb_id = orb_id.zfill(8) - elif len(orb_id) > 8: - raise ValueError(f"Orb ID '{orb_id}' exceeds 8 characters") + if len(orb_id) < 8: + self.logger.warning( + f"Orb ID '{orb_id}' is less than 8 characters, padding with zeros" + ) + orb_id = orb_id.zfill(8) + elif len(orb_id) > 8: + raise ValueError(f"Orb ID '{orb_id}' exceeds 8 characters") return orb_id @@ -620,7 +588,7 @@ def save_orb_artifacts( def process_pearl_orb(self, cf_token: str, mount_point: Path) -> str: """Process a single Pearl orb (generate ID, register, create artifacts).""" orb_id = self.generate_orb_id() - platform = self.args.platform + platform = self.detect_platform(self.args.hardware_version) orb_name = self.register_orb_mongo(orb_id, cf_token, platform) self.set_orb_channel(orb_id, cf_token) @@ -631,19 +599,19 @@ def process_pearl_orb(self, cf_token: str, mount_point: Path) -> str: return orb_id - def register_orb_ids(self, orb_ids: List[str], cf_token: str): - """Register orb IDs (in MongoDB first, then Core-App).""" - platform = self.args.platform + def process_diamond_orb_ids(self, orb_ids: List[str], cf_token: str): + """Process Diamond orb IDs (register in MongoDB, then Core-App).""" + platform = self.detect_platform(self.args.hardware_version) for orb_id in orb_ids: - self.logger.info(f"Processing Orb ID: {orb_id}") + self.logger.info(f"Processing Diamond Orb ID: {orb_id}") orb_id = self.check_orb_id_format(orb_id) orb_name = self.register_orb_mongo(orb_id, cf_token, platform) self.register_orb_core_app(orb_id, orb_name) - self.logger.info(f"Successfully processed Orb: {orb_id}") + self.logger.info(f"Successfully processed Diamond Orb: {orb_id}") - def register_orb_pairs(self, orb_pairs: List[Tuple[str, str]]): - """Register orb ID+name pairs (directly in Core-App, skipping MongoDB).""" + def process_diamond_orb_pairs(self, orb_pairs: List[Tuple[str, str]]): + """Process Diamond orb ID+name pairs (register directly in Core-App).""" for orb_id, orb_name in orb_pairs: self.logger.info(f"Processing Diamond Orb pair: {orb_id} -> {orb_name}") orb_id = self.check_orb_id_format(orb_id) @@ -675,54 +643,40 @@ def run(self): cf_token = self.get_cloudflared_token() if self.args.platform == "pearl": - if self.args.orb_ids or self.args.input_file: - # Pearl: Register existing orb IDs (no artifact generation) - if self.args.input_file: - if self.args.input_format == "ids": - orb_ids = self.read_input_file(self.args.input_file) - self.register_orb_ids(orb_ids, cf_token) - elif self.args.input_format == "pairs": - orb_pairs = self.read_input_pairs_file(self.args.input_file) - self.register_orb_pairs(orb_pairs) - elif self.args.orb_ids: - self.register_orb_ids(self.args.orb_ids, cf_token) - - self.logger.info("All Pearl Orb IDs registered successfully.") - else: - # Pearl: Generate artifacts and register - self.build_dir.mkdir(exist_ok=True) - self.artifacts_dir.mkdir(exist_ok=True) - - with tempfile.TemporaryDirectory() as temp_dir: - mount_point = Path(temp_dir) / "loop" - mount_point.mkdir() + # Pearl: Generate artifacts and register + self.build_dir.mkdir(exist_ok=True) + self.artifacts_dir.mkdir(exist_ok=True) - self.create_persistent_images(mount_point) + with tempfile.TemporaryDirectory() as temp_dir: + mount_point = Path(temp_dir) / "loop" + mount_point.mkdir() - for i in range(self.args.count): - self.logger.info( - f"Generating Pearl Orb ID #{i+1} of {self.args.count}..." - ) - orb_id = self.process_pearl_orb(cf_token, mount_point) - self.logger.info(f"Successfully processed Pearl Orb: {orb_id}") - print("", file=sys.stderr) + self.create_persistent_images(mount_point) + for i in range(self.args.count): self.logger.info( - f"All {self.args.count} Pearl Orb IDs generated and registered successfully." + f"Generating Pearl Orb ID #{i+1} of {self.args.count}..." ) + orb_id = self.process_pearl_orb(cf_token, mount_point) + self.logger.info(f"Successfully processed Pearl Orb: {orb_id}") + print("", file=sys.stderr) + + self.logger.info( + f"All {self.args.count} Pearl Orb IDs generated and registered successfully." + ) elif self.args.platform == "diamond": if self.args.input_file: # Diamond: Read from file if self.args.input_format == "ids": orb_ids = self.read_input_file(self.args.input_file) - self.register_orb_ids(orb_ids, cf_token) + self.process_diamond_orb_ids(orb_ids, cf_token) elif self.args.input_format == "pairs": orb_pairs = self.read_input_pairs_file(self.args.input_file) - self.register_orb_pairs(orb_pairs) + self.process_diamond_orb_pairs(orb_pairs) elif self.args.orb_ids: # Diamond: Direct arguments - self.register_orb_ids(self.args.orb_ids, cf_token) + self.process_diamond_orb_ids(self.args.orb_ids, cf_token) else: raise ValueError( "Diamond platform requires either --input-file or direct orb IDs" @@ -778,13 +732,13 @@ def main(): "--count", type=int, default=1, - help="Number of Pearl orbs to generate (Pearl generation mode only)", + help="Number of Pearl orbs to generate (Pearl only)", ) - # Common arguments for both platforms + # Diamond-specific arguments parser.add_argument( "--input-file", - help="Input file containing orb IDs or orb ID+name pairs", + help="Input file containing orb IDs or orb ID+name pairs (Diamond only)", ) parser.add_argument( "--input-format", @@ -795,7 +749,7 @@ def main(): parser.add_argument( "orb_ids", nargs="*", - help="Orb IDs to register (alternative to --input-file)", + help="Orb IDs to register (Diamond only, alternative to --input-file)", ) check_cli_dependencies(REQUIRED_TOOLS) @@ -821,21 +775,26 @@ def main(): sys.exit(1) # Platform-specific validation - if args.platform == "diamond": + if args.platform == "pearl": + if args.input_file or args.orb_ids: + print( + "Error: Pearl platform doesn't support input files or direct orb IDs", + file=sys.stderr, + ) + sys.exit(1) + elif args.platform == "diamond": if not args.input_file and not args.orb_ids: print( "Error: Diamond platform requires either --input-file or direct orb IDs", file=sys.stderr, ) sys.exit(1) - - # Validate that both input methods aren't used simultaneously - if args.input_file and args.orb_ids: - print( - "Error: Cannot use both --input-file and direct orb IDs", - file=sys.stderr, - ) - sys.exit(1) + if args.input_file and args.orb_ids: + print( + "Error: Cannot use both --input-file and direct orb IDs", + file=sys.stderr, + ) + sys.exit(1) try: orb_registration = OrbRegistration(args) From 65dacd1849d9782abf8436e3a318ee6c7fc1b648 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 14 Jan 2026 12:57:20 +0100 Subject: [PATCH 46/47] fix: restore from main / nix conf --- nix/machines/hil-common.nix | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 4a9405e54..a0da2aff5 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -204,14 +204,7 @@ in "nixos" "flashing-hil" "${hostname}" - ] ++ ( - # Add platform-specific labels based on hostname - if builtins.elem hostname ["worldcoin-hil-munich-0" "worldcoin-hil-munich-5"] - then ["worldcoin-hil-pearl"] - else if builtins.elem hostname ["worldcoin-hil-munich-2" "worldcoin-hil-munich-3"] - then ["worldcoin-hil-diamond"] - else [] - ); + ]; replace = true; user = ghRunnerUser; From 3b354646b0c76d64eefb09347bdb9ffdf53be455 Mon Sep 17 00:00:00 2001 From: chrisgalanis Date: Wed, 14 Jan 2026 13:33:37 +0100 Subject: [PATCH 47/47] fix: make test only for linux --- hil/src/network_discovery.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/hil/src/network_discovery.rs b/hil/src/network_discovery.rs index 771922c5a..ceb2af06c 100644 --- a/hil/src/network_discovery.rs +++ b/hil/src/network_discovery.rs @@ -198,6 +198,7 @@ mod tests { use super::*; #[tokio::test] + #[cfg(target_os = "linux")] async fn test_enumerate_interfaces() { let result = enumerate_orbeth_interfaces().await; assert!(result.is_ok());