From 449b38f63b8ece9332e83c29440236ebd2e66f27 Mon Sep 17 00:00:00 2001
From: YingjingLu <yingjinglu2019us@gmail.com>
Date: Sun, 22 Feb 2026 12:21:33 -0800
Subject: [PATCH 1/2] test(e2e): Fix docker image and add Github E2E workflow

---
 .github/workflows/e2e.yml  |  36 +++
 docker/e2e/Dockerfile      |   3 +-
 scripts/test-e2e-docker.sh |   9 +-
 test_results.txt           | 529 +++++++++++++++++++++++++++++++++++++
 4 files changed, 574 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/e2e.yml
 create mode 100644 test_results.txt

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 0000000..10df456
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,36 @@
+name: E2E Tests
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  e2e:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+    - uses: actions/checkout@v4
+    
+    # We use actions/setup-node just to have npm available to run the test script wrapper.
+    # The actual e2e test runs inside the Docker container.
+    - name: Use Node.js 22.x
+      uses: actions/setup-node@v4
+      with:
+        node-version: 22.x
+        
+    - name: Enable KVM group perms
+      # This enables hardware acceleration for the Android emulator inside the Docker container
+      # since Docker on Linux can pass through /dev/kvm
+      run: |
+          echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
+          sudo udevadm control --reload-rules
+          sudo udevadm trigger --name-match=kvm
+
+    - name: Run E2E tests (Docker)
+      run: npm run test:e2e:docker
+      # We need to make sure the docker container can access /dev/kvm if we want hardware acceleration,
+      # but by default the `test-e2e-docker.sh` might not pass `--device /dev/kvm`...
+      # Let's check test-e2e-docker.sh again.
diff --git a/docker/e2e/Dockerfile b/docker/e2e/Dockerfile
index d31fef0..8435ba1 100644
--- a/docker/e2e/Dockerfile
+++ b/docker/e2e/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:20-bookworm
+FROM --platform=linux/amd64 node:20-bookworm
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV ANDROID_SDK_ROOT=/opt/android-sdk
@@ -55,6 +55,7 @@ RUN npm ci
 
 COPY src ./src
 COPY test ./test
+COPY docs ./docs
 COPY openpocket ./openpocket
 COPY openpocket.config.example.json ./openpocket.config.example.json
 COPY README.md ./README.md
diff --git a/scripts/test-e2e-docker.sh b/scripts/test-e2e-docker.sh
index 44e147b..d6e910d 100755
--- a/scripts/test-e2e-docker.sh
+++ b/scripts/test-e2e-docker.sh
@@ -11,7 +11,12 @@ echo "[e2e] Building Docker image: ${IMAGE_TAG}"
 docker build -f docker/e2e/Dockerfile -t "${IMAGE_TAG}" .
 
 echo "[e2e] Running Docker container"
-docker run --rm \
-  --shm-size="${SHM_SIZE}" \
+DOCKER_RUN_ARGS=(--rm --shm-size="${SHM_SIZE}")
+if [ -e /dev/kvm ]; then
+  echo "[e2e] Hardware virtualization (/dev/kvm) detected. Enabling KVM."
+  DOCKER_RUN_ARGS+=(--device /dev/kvm)
+fi
+
+docker run "${DOCKER_RUN_ARGS[@]}" \
   -e OPENPOCKET_E2E_TASK="${OPENPOCKET_E2E_TASK:-Open Android Settings, then return to the home screen, then finish.}" \
   "${IMAGE_TAG}"
diff --git a/test_results.txt b/test_results.txt
new file mode 100644
index 0000000..466f50d
--- /dev/null
+++ b/test_results.txt
@@ -0,0 +1,529 @@
+
+> openpocket@0.2.2 test
+> npm run build && node --test test/*.test.mjs
+
+
+> openpocket@0.2.2 build
+> tsc -p tsconfig.json
+
+✔ normalizeAction handles invalid payload (1.478ms)
+✔ normalizeAction converts numeric fields for tap/swipe (0.42925ms)
+✔ normalizeAction sets defaults for run_script and finish (0.076625ms)
+✔ normalizeAction supports request_human_auth with defaults (0.932125ms)
+✔ normalizeAction falls back for unknown action (0.071708ms)
+✔ normalizeAction supports memory tools with defaults (0.073708ms)
+✔ AdbRuntime uses clipboard paste for non-ASCII typing (0.86675ms)
+✔ AdbRuntime falls back to clipboard when input text fails (0.240625ms)
+✔ AdbRuntime avoids stale clipboard paste when clipboard cannot be verified (0.12475ms)
+(node:39904) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
+(Use `node --trace-deprecation ...` to show where the warning was created)
+✖ AgentRuntime injects BOOTSTRAP guidance into system prompt context (19.375167ms)
+✖ AgentRuntime supports none system prompt mode for constrained runs (6.895292ms)
+✔ AgentRuntime context report marks hook usage and head-tail truncation (4.292958ms)
+✖ AgentRuntime returns home after successful task by default (5.555375ms)
+✖ AgentRuntime does not return home when config is disabled (6.390959ms)
+✖ AgentRuntime pauses for request_human_auth and resumes after approval (5.829584ms)
+✖ AgentRuntime fails when request_human_auth is rejected (9.391334ms)
+✖ AgentRuntime auto-approves Android permission dialog app without human auth (8.154542ms)
+✖ AgentRuntime does not call human auth when model asks permission capability (5.477417ms)
+✖ AgentRuntime auto-approves permission dialog even when model asks permission capability (6.581334ms)
+✖ AgentRuntime still requests human auth for camera capability after auto-allowing VM permission dialog (5.444958ms)
+✖ AgentRuntime applies OTP code from manual approval note when no artifact is provided (6.944917ms)
+✖ AgentRuntime applies delegated text artifact after human auth approval (9.037167ms)
+✖ AgentRuntime applies delegated location artifact after human auth approval (8.374958ms)
+✖ AgentRuntime appends gallery template hint after delegated image artifact (6.953959ms)
+✔ ensureAndroidPrerequisites supports skip mode for CI/tests (9.463166ms)
+✔ strict mode uses only Google Play system image candidates (0.257125ms)
+✔ buildAvdManagerOpts pins avdmanager toolsdir to selected SDK root (0.437708ms)
+✔ buildAvdManagerOpts preserves existing flags and avoids duplicate toolsdir (0.104291ms)
+✔ AutoArtifactBuilder returns null when task not successful (0.61625ms)
+✔ AutoArtifactBuilder creates skill and script files (6.396958ms)
+(node:39907) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
+(Use `node --trace-deprecation ...` to show where the warning was created)
+✔ ChatAssistant decide relies on model routing for greeting text (13.988333ms)
+✔ ChatAssistant decide keeps model task result (5.0915ms)
+✔ ChatAssistant decide reports missing API key without heuristics (5.5ms)
+✔ ChatAssistant reply handles missing API key gracefully (5.106292ms)
+✖ ChatAssistant decide uses Codex CLI credentials fallback (45.425ms)
+✔ ChatAssistant runs profile onboarding when identity and user are empty (6.116166ms)
+✔ ChatAssistant onboarding follows Chinese and supports one-shot multi-field answer (5.082917ms)
+✔ ChatAssistant onboarding accepts persona preset index (8.59575ms)
+✔ ChatAssistant onboarding reads question copy and presets from PROFILE_ONBOARDING.json (9.928916ms)
+✔ ChatAssistant onboarding triggers on default scaffold with blank profile fields (6.939416ms)
+✔ ChatAssistant model-driven onboarding completes and removes bootstrap file (6.620834ms)
+✔ ChatAssistant exposes pending profile update after onboarding completion (6.031292ms)
+✔ ChatAssistant updates profile from regular rename message (5.918291ms)
+✔ ChatAssistant narrateTaskProgress uses model decision output (5.554166ms)
+✔ ChatAssistant narrateTaskProgress falls back when model output is unavailable (8.818666ms)
+✔ ChatAssistant narrateTaskOutcome rewrites final output with model decision (7.991042ms)
+✔ ChatAssistant narrateTaskOutcome falls back and strips boilerplate (6.278459ms)
+✔ installCliShortcut creates launcher and updates shell rc once (3.852334ms)
+✔ init creates config and workspace files (256.375041ms)
+✔ init does not install CLI shortcut implicitly (217.8295ms)
+✔ onboard installs CLI launcher once on first run (541.163459ms)
+✔ legacy snake_case config is migrated to camelCase by init (183.254292ms)
+✔ agent command without API key fails and writes session/memory (385.996458ms)
+✔ help output uses onboard as primary command and lists legacy aliases (175.42375ms)
+✔ telegram setup requires interactive terminal (355.292875ms)
+✔ telegram whoami prints allow policy without requiring token (355.589ms)
+✔ telegram command validates unknown subcommand (181.592542ms)
+✔ gateway start command is accepted (reaches token validation) (361.369875ms)
+✔ gateway defaults to start when subcommand is omitted (356.126458ms)
+✔ dashboard command validates subcommand (175.355792ms)
+✔ test permission-app task prints recommended telegram flow (174.408334ms)
+✔ test permission-app task supports scenario-specific prompt (173.813833ms)
+✔ test permission-app cases prints scenario list (179.362333ms)
+✔ test command validates unknown target (174.713833ms)
+✔ test permission-app task --send requires telegram token (351.90575ms)
+✔ test permission-app task --send requires chat id when allowlist is empty (354.733125ms)
+✔ test permission-app run requires telegram token (370.443792ms)
+✔ loadConfig creates defaults including returnHomeOnTaskEnd (7.992083ms)
+✔ loadConfig migrates legacy snake_case return_home_on_task_end (3.821917ms)
+✔ loadConfig normalizes agent.lang to en (5.9145ms)
+✔ getModelProfile and resolveApiKey follow precedence rules (56.631458ms)
+✔ resolveModelAuth falls back to Codex CLI auth.json for codex models (41.019917ms)
+✔ resolveModelAuth does not use Codex CLI fallback for non-codex models (0.313917ms)
+✔ getModelProfile throws on unknown profile (6.593958ms)
+✔ CronService executes due jobs and persists state (20.548583ms)
+✔ control store provides defaults and persists onboarding/control files (16.146292ms)
+✔ default prompt entries include core prompt files (0.445ms)
+✔ dashboard server exposes health/config and prompt CRUD APIs (49.1885ms)
+✔ dashboard permission scope does not leak sibling paths with shared prefix (16.593208ms)
+✔ HeartbeatRunner writes heartbeat log and warns on stuck task (14.290292ms)
+✔ HumanAuthBridge supports manual approve flow (12.685375ms)
+✔ HumanAuthBridge times out when unresolved (1009.085375ms)
+✔ HumanAuthRelayServer create, resolve, and poll lifecycle (52.333292ms)
+[OpenPocket][human-auth] local relay started at http://127.0.0.1:53955
+✔ LocalHumanAuthStack starts local relay without tunnel (18.8935ms)
+✔ local launcher supports ./openpocket --help (261.216042ms)
+✔ MemoryExecutor memory_search returns ranked snippets with citations (11.972167ms)
+✔ MemoryExecutor memory_get enforces memory path policy (4.547084ms)
+(node:39936) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
+(Use `node --trace-deprecation ...` to show where the warning was created)
+✖ ModelClient falls back from chat to responses (3.667625ms)
+✖ ModelClient parses chat tool call correctly (0.291375ms)
+✖ ModelClient fails when no tool call is returned (0.626792ms)
+✔ path helpers respect OPENPOCKET_HOME (0.49025ms)
+✔ resolvePath handles ~ and nowForFilename has expected format (1.08575ms)
+✔ isAdbInstallUpdateIncompatible detects signature mismatch error (0.458ms)
+✔ isAdbInstallUpdateIncompatible ignores unrelated install failures (0.113791ms)
+✔ buildSystemPrompt includes planning rules and skills (0.639541ms)
+✔ buildSystemPrompt includes workspace context when provided (0.118208ms)
+✔ buildSystemPrompt supports minimal mode (1.167333ms)
+✔ buildSystemPrompt supports none mode (0.103959ms)
+✖ buildUserPrompt keeps only recent 8 history items (1.569542ms)
+✔ runGatewayLoop restarts on SIGUSR1 then stops on SIGTERM (4.506084ms)
+✔ ScreenshotStore enforces maxCount by deleting oldest files (112.046125ms)
+✔ ScriptExecutor executes allowed commands (13.365833ms)
+✔ ScriptExecutor blocks commands outside allowlist (3.967292ms)
+✔ setup wizard aborts when consent is not accepted (7.139667ms)
+✔ setup wizard configures OpenAI key and records Gmail onboarding state (6.195292ms)
+✔ setup wizard applies provider key to selected provider only (4.25075ms)
+✔ setup wizard configures local human-auth ngrok mode (11.200166ms)
+✔ setup wizard can configure Telegram token and allowlist in config (5.895375ms)
+✔ setup wizard normalizes invalid telegram botTokenEnv name (4.198667ms)
+✖ setup wizard supports codex cli auth option in model selection (30.156416ms)
+✖ setup wizard uses existing codex credential when codex login command fails (28.439791ms)
+✔ SkillLoader loads workspace skills (8.867083ms)
+(node:39954) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
+(Use `node --trace-deprecation ...` to show where the warning was created)
+✔ telegram command menu includes control commands for bot menu (0.399333ms)
+✔ telegram command menu uses Telegram-compatible command schema (0.1895ms)
+(node:39970) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
+(Use `node --trace-deprecation ...` to show where the warning was created)
+✔ TelegramGateway keeps typing heartbeat during async operation (801.15ms)
+✔ TelegramGateway typing heartbeat supports nested operations (383.209167ms)
+[OpenPocket][gateway] 2026-02-22T18:51:11.135Z telegram bot display name updated chat=123 name="Jarvis"
+✔ TelegramGateway syncs bot display name after onboarding update (175.041125ms)
+[OpenPocket][gateway] 2026-02-22T18:51:11.298Z telegram bot display name startup-sync name="Jarvis-Startup"
+✔ TelegramGateway startup sync reads assistant name from IDENTITY.md (162.887958ms)
+[OpenPocket][gateway] 2026-02-22T18:51:11.459Z telegram bot display name startup-sync rate-limited retry_after=120s
+[OpenPocket][gateway] 2026-02-22T18:51:11.459Z telegram bot display name startup-sync skipped: rate-limited retry_after=120s
+✔ TelegramGateway startup sync backs off after Telegram rate limit (160.966917ms)
+[OpenPocket][gateway] 2026-02-22T18:51:11.618Z telegram bot display name startup-sync name="Cached-Bot"
+✔ TelegramGateway startup sync skips API call when name already cached locally (317.292667ms)
+[OpenPocket][gateway] 2026-02-22T18:51:11.938Z decision chat=456 mode=chat confidence=1.00 reason=profile_update
+[OpenPocket][gateway] 2026-02-22T18:51:11.939Z telegram bot display name updated chat=456 name="Jarvis-Phone"
+✔ TelegramGateway consumes profile-update payload after chat reply (162.120875ms)
+✔ TelegramGateway resolves pending 2FA request from plain numeric text (152.393625ms)
+✔ TelegramGateway /start triggers onboarding reply when onboarding is pending (208.748917ms)
+✔ TelegramGateway /start replies with stable welcome when onboarding is completed (157.22825ms)
+✔ TelegramGateway /reset sends session reset startup prompt when onboarding is completed (647.592458ms)
+✔ TelegramGateway /reset routes into onboarding when onboarding is pending (398.48ms)
+✔ TelegramGateway /context returns summary report (166.701208ms)
+✔ TelegramGateway /context detail returns file snippet (179.186958ms)
+✔ TelegramGateway /context json returns machine-readable report (177.743458ms)
+[OpenPocket][gateway] 2026-02-22T18:51:14.190Z task accepted source=chat chat=9201 task="打开 Gmail 并进入收件箱" model=gpt-5.2-codex
+[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=1/5 action=launch_app app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=2/5 action=wait app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=3/5 action=wait app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=4/5 action=tap app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.190Z task done source=chat chat=9201 ok=true session=/tmp/session-test.md
+✔ TelegramGateway narrates progress only when model marks meaningful updates (163.277958ms)
+[OpenPocket][gateway] 2026-02-22T18:51:14.355Z task accepted source=chat chat=9301 task="Check Gmail inbox" model=gpt-5.2-codex
+[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=6/50 action=launch_app app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=8/50 action=wait app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=10/50 action=wait app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=15/50 action=tap app=com.google.android.gm
+[OpenPocket][gateway] 2026-02-22T18:51:14.356Z task done source=chat chat=9301 ok=true session=/tmp/session-test-2.md
+✔ TelegramGateway suppresses low-signal repetitive narration even if model requests notify (165.013459ms)
+✔ workspace templates resolve from OPENPOCKET_TEMPLATE_DIR and strip markdown frontmatter (1.01375ms)
+✔ ensureWorkspaceBootstrap creates required layout (5.988959ms)
+✔ workspace onboarding state marks completion after bootstrap removal (3.890458ms)
+✔ markWorkspaceOnboardingCompleted writes completion marker (3.550084ms)
+✔ WorkspaceStore writes session steps final and daily memory (4.43675ms)
+ℹ tests 136
+ℹ suites 0
+ℹ pass 115
+ℹ fail 21
+ℹ cancelled 0
+ℹ skipped 0
+ℹ todo 0
+ℹ duration_ms 5385.552167
+
+✖ failing tests:
+
+test at test/agent-runtime.test.mjs:49:1
+✖ AgentRuntime injects BOOTSTRAP guidance into system prompt context (19.375167ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:78:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async startSubtestAfterBootstrap (node:internal/test_runner/harness:358:3) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:90:1
+✖ AgentRuntime supports none system prompt mode for constrained runs (6.895292ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:113:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:145:1
+✖ AgentRuntime returns home after successful task by default (5.555375ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:169:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:179:1
+✖ AgentRuntime does not return home when config is disabled (6.390959ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:203:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:213:1
+✖ AgentRuntime pauses for request_human_auth and resumes after approval (5.829584ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:269:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:278:1
+✖ AgentRuntime fails when request_human_auth is rejected (9.391334ms)
+  AssertionError [ERR_ASSERTION]: The input did not match the regular expression /Human authorization rejected/. Input:
+  
+  'Agent execution failed: this.adb.queryLaunchablePackages is not a function'
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:315:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: 'Agent execution failed: this.adb.queryLaunchablePackages is not a function',
+    expected: /Human authorization rejected/,
+    operator: 'match',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:321:1
+✖ AgentRuntime auto-approves Android permission dialog app without human auth (8.154542ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:390:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:408:1
+✖ AgentRuntime does not call human auth when model asks permission capability (5.477417ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:464:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:473:1
+✖ AgentRuntime auto-approves permission dialog even when model asks permission capability (6.581334ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:547:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:564:1
+✖ AgentRuntime still requests human auth for camera capability after auto-allowing VM permission dialog (5.444958ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:638:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:657:1
+✖ AgentRuntime applies OTP code from manual approval note when no artifact is provided (6.944917ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:710:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:720:1
+✖ AgentRuntime applies delegated text artifact after human auth approval (9.037167ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:783:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:794:1
+✖ AgentRuntime applies delegated location artifact after human auth approval (8.374958ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:865:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/agent-runtime.test.mjs:888:1
+✖ AgentRuntime appends gallery template hint after delegated image artifact (6.953959ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  
+  false !== true
+  
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:948:12)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: false,
+    expected: true,
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/chat-assistant.test.mjs:139:1
+✖ ChatAssistant decide uses Codex CLI credentials fallback (45.425ms)
+  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
+  + actual - expected
+  
+  + 'no_api_key'
+  - 'model_classify'
+  
+      at file:///Users/yingjinglu/Documents/Projects/openpocket/test/chat-assistant.test.mjs:163:12
+      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/chat-assistant.test.mjs:140:3)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: 'no_api_key',
+    expected: 'model_classify',
+    operator: 'strictEqual',
+    diff: 'simple'
+  }
+
+test at test/model-client.test.mjs:35:1
+✖ ModelClient falls back from chat to responses (3.667625ms)
+  TypeError: Cannot read properties of undefined (reading 'length')
+      at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
+      at ModelClient.nextStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/model-client.js:173:56)
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:58:28)
+      at Test.runInAsyncScope (node:async_hooks:214:14)
+      at Test.run (node:internal/test_runner/test:1103:25)
+      at Test.start (node:internal/test_runner/test:1000:17)
+      at startSubtestAfterBootstrap (node:internal/test_runner/harness:358:17)
+
+test at test/model-client.test.mjs:72:1
+✖ ModelClient parses chat tool call correctly (0.291375ms)
+  TypeError: Cannot read properties of undefined (reading 'length')
+      at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
+      at ModelClient.nextStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/model-client.js:173:56)
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:102:28)
+      at Test.runInAsyncScope (node:async_hooks:214:14)
+      at Test.run (node:internal/test_runner/test:1103:25)
+      at Test.processPendingSubtests (node:internal/test_runner/test:785:18)
+      at Test.postRun (node:internal/test_runner/test:1232:19)
+      at Test.run (node:internal/test_runner/test:1160:12)
+      at async startSubtestAfterBootstrap (node:internal/test_runner/harness:358:3)
+
+test at test/model-client.test.mjs:116:1
+✖ ModelClient fails when no tool call is returned (0.626792ms)
+  AssertionError [ERR_ASSERTION]: The input did not match the regular expression /All model endpoints failed/. Input:
+  
+  "TypeError: Cannot read properties of undefined (reading 'length')"
+  
+      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:133:3)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
+    generatedMessage: true,
+    code: 'ERR_ASSERTION',
+    actual: TypeError: Cannot read properties of undefined (reading 'length')
+        at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
+        at ModelClient.nextStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/model-client.js:173:56)
+        at file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:135:14
+        at waitForActual (node:assert:632:21)
+        at strict.rejects (node:assert:769:31)
+        at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:133:16)
+        at Test.runInAsyncScope (node:async_hooks:214:14)
+        at Test.run (node:internal/test_runner/test:1103:25)
+        at Test.processPendingSubtests (node:internal/test_runner/test:785:18)
+        at Test.postRun (node:internal/test_runner/test:1232:19),
+    expected: /All model endpoints failed/,
+    operator: 'rejects',
+    diff: 'simple'
+  }
+
+test at test/prompts.test.mjs:51:1
+✖ buildUserPrompt keeps only recent 8 history items (1.569542ms)
+  TypeError: Cannot read properties of undefined (reading 'length')
+      at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
+      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/prompts.test.mjs:53:18)
+      at Test.runInAsyncScope (node:async_hooks:214:14)
+      at Test.run (node:internal/test_runner/test:1103:25)
+      at Test.processPendingSubtests (node:internal/test_runner/test:785:18)
+      at Test.postRun (node:internal/test_runner/test:1232:19)
+      at Test.run (node:internal/test_runner/test:1160:12)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7)
+
+test at test/setup-wizard.test.mjs:263:1
+✖ setup wizard supports codex cli auth option in model selection (30.156416ms)
+  Error: No scripted confirm value.
+      at FakePrompter.confirm (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:36:13)
+      at runApiKeyStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:584:44)
+      at async runSetupWizard (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:902:9)
+      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:276:7
+      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:265:5
+      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:264:3)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7)
+
+test at test/setup-wizard.test.mjs:312:1
+✖ setup wizard uses existing codex credential when codex login command fails (28.439791ms)
+  Error: No scripted confirm value.
+      at FakePrompter.confirm (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:36:13)
+      at runApiKeyStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:584:44)
+      at async runSetupWizard (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:902:9)
+      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:336:7
+      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:314:5
+      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:313:3)
+      at async Test.run (node:internal/test_runner/test:1110:7)
+      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7)

From 3df684bdd0ee70bb7e1a56a5a84218c4701b5f19 Mon Sep 17 00:00:00 2001
From: YingjingLu <yingjinglu2019us@gmail.com>
Date: Sun, 22 Feb 2026 12:22:54 -0800
Subject: [PATCH 2/2] Clean up

---
 test_results.txt | 529 -----------------------------------------------
 1 file changed, 529 deletions(-)
 delete mode 100644 test_results.txt

diff --git a/test_results.txt b/test_results.txt
deleted file mode 100644
index 466f50d..0000000
--- a/test_results.txt
+++ /dev/null
@@ -1,529 +0,0 @@
-
-> openpocket@0.2.2 test
-> npm run build && node --test test/*.test.mjs
-
-
-> openpocket@0.2.2 build
-> tsc -p tsconfig.json
-
-✔ normalizeAction handles invalid payload (1.478ms)
-✔ normalizeAction converts numeric fields for tap/swipe (0.42925ms)
-✔ normalizeAction sets defaults for run_script and finish (0.076625ms)
-✔ normalizeAction supports request_human_auth with defaults (0.932125ms)
-✔ normalizeAction falls back for unknown action (0.071708ms)
-✔ normalizeAction supports memory tools with defaults (0.073708ms)
-✔ AdbRuntime uses clipboard paste for non-ASCII typing (0.86675ms)
-✔ AdbRuntime falls back to clipboard when input text fails (0.240625ms)
-✔ AdbRuntime avoids stale clipboard paste when clipboard cannot be verified (0.12475ms)
-(node:39904) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
-(Use `node --trace-deprecation ...` to show where the warning was created)
-✖ AgentRuntime injects BOOTSTRAP guidance into system prompt context (19.375167ms)
-✖ AgentRuntime supports none system prompt mode for constrained runs (6.895292ms)
-✔ AgentRuntime context report marks hook usage and head-tail truncation (4.292958ms)
-✖ AgentRuntime returns home after successful task by default (5.555375ms)
-✖ AgentRuntime does not return home when config is disabled (6.390959ms)
-✖ AgentRuntime pauses for request_human_auth and resumes after approval (5.829584ms)
-✖ AgentRuntime fails when request_human_auth is rejected (9.391334ms)
-✖ AgentRuntime auto-approves Android permission dialog app without human auth (8.154542ms)
-✖ AgentRuntime does not call human auth when model asks permission capability (5.477417ms)
-✖ AgentRuntime auto-approves permission dialog even when model asks permission capability (6.581334ms)
-✖ AgentRuntime still requests human auth for camera capability after auto-allowing VM permission dialog (5.444958ms)
-✖ AgentRuntime applies OTP code from manual approval note when no artifact is provided (6.944917ms)
-✖ AgentRuntime applies delegated text artifact after human auth approval (9.037167ms)
-✖ AgentRuntime applies delegated location artifact after human auth approval (8.374958ms)
-✖ AgentRuntime appends gallery template hint after delegated image artifact (6.953959ms)
-✔ ensureAndroidPrerequisites supports skip mode for CI/tests (9.463166ms)
-✔ strict mode uses only Google Play system image candidates (0.257125ms)
-✔ buildAvdManagerOpts pins avdmanager toolsdir to selected SDK root (0.437708ms)
-✔ buildAvdManagerOpts preserves existing flags and avoids duplicate toolsdir (0.104291ms)
-✔ AutoArtifactBuilder returns null when task not successful (0.61625ms)
-✔ AutoArtifactBuilder creates skill and script files (6.396958ms)
-(node:39907) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
-(Use `node --trace-deprecation ...` to show where the warning was created)
-✔ ChatAssistant decide relies on model routing for greeting text (13.988333ms)
-✔ ChatAssistant decide keeps model task result (5.0915ms)
-✔ ChatAssistant decide reports missing API key without heuristics (5.5ms)
-✔ ChatAssistant reply handles missing API key gracefully (5.106292ms)
-✖ ChatAssistant decide uses Codex CLI credentials fallback (45.425ms)
-✔ ChatAssistant runs profile onboarding when identity and user are empty (6.116166ms)
-✔ ChatAssistant onboarding follows Chinese and supports one-shot multi-field answer (5.082917ms)
-✔ ChatAssistant onboarding accepts persona preset index (8.59575ms)
-✔ ChatAssistant onboarding reads question copy and presets from PROFILE_ONBOARDING.json (9.928916ms)
-✔ ChatAssistant onboarding triggers on default scaffold with blank profile fields (6.939416ms)
-✔ ChatAssistant model-driven onboarding completes and removes bootstrap file (6.620834ms)
-✔ ChatAssistant exposes pending profile update after onboarding completion (6.031292ms)
-✔ ChatAssistant updates profile from regular rename message (5.918291ms)
-✔ ChatAssistant narrateTaskProgress uses model decision output (5.554166ms)
-✔ ChatAssistant narrateTaskProgress falls back when model output is unavailable (8.818666ms)
-✔ ChatAssistant narrateTaskOutcome rewrites final output with model decision (7.991042ms)
-✔ ChatAssistant narrateTaskOutcome falls back and strips boilerplate (6.278459ms)
-✔ installCliShortcut creates launcher and updates shell rc once (3.852334ms)
-✔ init creates config and workspace files (256.375041ms)
-✔ init does not install CLI shortcut implicitly (217.8295ms)
-✔ onboard installs CLI launcher once on first run (541.163459ms)
-✔ legacy snake_case config is migrated to camelCase by init (183.254292ms)
-✔ agent command without API key fails and writes session/memory (385.996458ms)
-✔ help output uses onboard as primary command and lists legacy aliases (175.42375ms)
-✔ telegram setup requires interactive terminal (355.292875ms)
-✔ telegram whoami prints allow policy without requiring token (355.589ms)
-✔ telegram command validates unknown subcommand (181.592542ms)
-✔ gateway start command is accepted (reaches token validation) (361.369875ms)
-✔ gateway defaults to start when subcommand is omitted (356.126458ms)
-✔ dashboard command validates subcommand (175.355792ms)
-✔ test permission-app task prints recommended telegram flow (174.408334ms)
-✔ test permission-app task supports scenario-specific prompt (173.813833ms)
-✔ test permission-app cases prints scenario list (179.362333ms)
-✔ test command validates unknown target (174.713833ms)
-✔ test permission-app task --send requires telegram token (351.90575ms)
-✔ test permission-app task --send requires chat id when allowlist is empty (354.733125ms)
-✔ test permission-app run requires telegram token (370.443792ms)
-✔ loadConfig creates defaults including returnHomeOnTaskEnd (7.992083ms)
-✔ loadConfig migrates legacy snake_case return_home_on_task_end (3.821917ms)
-✔ loadConfig normalizes agent.lang to en (5.9145ms)
-✔ getModelProfile and resolveApiKey follow precedence rules (56.631458ms)
-✔ resolveModelAuth falls back to Codex CLI auth.json for codex models (41.019917ms)
-✔ resolveModelAuth does not use Codex CLI fallback for non-codex models (0.313917ms)
-✔ getModelProfile throws on unknown profile (6.593958ms)
-✔ CronService executes due jobs and persists state (20.548583ms)
-✔ control store provides defaults and persists onboarding/control files (16.146292ms)
-✔ default prompt entries include core prompt files (0.445ms)
-✔ dashboard server exposes health/config and prompt CRUD APIs (49.1885ms)
-✔ dashboard permission scope does not leak sibling paths with shared prefix (16.593208ms)
-✔ HeartbeatRunner writes heartbeat log and warns on stuck task (14.290292ms)
-✔ HumanAuthBridge supports manual approve flow (12.685375ms)
-✔ HumanAuthBridge times out when unresolved (1009.085375ms)
-✔ HumanAuthRelayServer create, resolve, and poll lifecycle (52.333292ms)
-[OpenPocket][human-auth] local relay started at http://127.0.0.1:53955
-✔ LocalHumanAuthStack starts local relay without tunnel (18.8935ms)
-✔ local launcher supports ./openpocket --help (261.216042ms)
-✔ MemoryExecutor memory_search returns ranked snippets with citations (11.972167ms)
-✔ MemoryExecutor memory_get enforces memory path policy (4.547084ms)
-(node:39936) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
-(Use `node --trace-deprecation ...` to show where the warning was created)
-✖ ModelClient falls back from chat to responses (3.667625ms)
-✖ ModelClient parses chat tool call correctly (0.291375ms)
-✖ ModelClient fails when no tool call is returned (0.626792ms)
-✔ path helpers respect OPENPOCKET_HOME (0.49025ms)
-✔ resolvePath handles ~ and nowForFilename has expected format (1.08575ms)
-✔ isAdbInstallUpdateIncompatible detects signature mismatch error (0.458ms)
-✔ isAdbInstallUpdateIncompatible ignores unrelated install failures (0.113791ms)
-✔ buildSystemPrompt includes planning rules and skills (0.639541ms)
-✔ buildSystemPrompt includes workspace context when provided (0.118208ms)
-✔ buildSystemPrompt supports minimal mode (1.167333ms)
-✔ buildSystemPrompt supports none mode (0.103959ms)
-✖ buildUserPrompt keeps only recent 8 history items (1.569542ms)
-✔ runGatewayLoop restarts on SIGUSR1 then stops on SIGTERM (4.506084ms)
-✔ ScreenshotStore enforces maxCount by deleting oldest files (112.046125ms)
-✔ ScriptExecutor executes allowed commands (13.365833ms)
-✔ ScriptExecutor blocks commands outside allowlist (3.967292ms)
-✔ setup wizard aborts when consent is not accepted (7.139667ms)
-✔ setup wizard configures OpenAI key and records Gmail onboarding state (6.195292ms)
-✔ setup wizard applies provider key to selected provider only (4.25075ms)
-✔ setup wizard configures local human-auth ngrok mode (11.200166ms)
-✔ setup wizard can configure Telegram token and allowlist in config (5.895375ms)
-✔ setup wizard normalizes invalid telegram botTokenEnv name (4.198667ms)
-✖ setup wizard supports codex cli auth option in model selection (30.156416ms)
-✖ setup wizard uses existing codex credential when codex login command fails (28.439791ms)
-✔ SkillLoader loads workspace skills (8.867083ms)
-(node:39954) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
-(Use `node --trace-deprecation ...` to show where the warning was created)
-✔ telegram command menu includes control commands for bot menu (0.399333ms)
-✔ telegram command menu uses Telegram-compatible command schema (0.1895ms)
-(node:39970) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
-(Use `node --trace-deprecation ...` to show where the warning was created)
-✔ TelegramGateway keeps typing heartbeat during async operation (801.15ms)
-✔ TelegramGateway typing heartbeat supports nested operations (383.209167ms)
-[OpenPocket][gateway] 2026-02-22T18:51:11.135Z telegram bot display name updated chat=123 name="Jarvis"
-✔ TelegramGateway syncs bot display name after onboarding update (175.041125ms)
-[OpenPocket][gateway] 2026-02-22T18:51:11.298Z telegram bot display name startup-sync name="Jarvis-Startup"
-✔ TelegramGateway startup sync reads assistant name from IDENTITY.md (162.887958ms)
-[OpenPocket][gateway] 2026-02-22T18:51:11.459Z telegram bot display name startup-sync rate-limited retry_after=120s
-[OpenPocket][gateway] 2026-02-22T18:51:11.459Z telegram bot display name startup-sync skipped: rate-limited retry_after=120s
-✔ TelegramGateway startup sync backs off after Telegram rate limit (160.966917ms)
-[OpenPocket][gateway] 2026-02-22T18:51:11.618Z telegram bot display name startup-sync name="Cached-Bot"
-✔ TelegramGateway startup sync skips API call when name already cached locally (317.292667ms)
-[OpenPocket][gateway] 2026-02-22T18:51:11.938Z decision chat=456 mode=chat confidence=1.00 reason=profile_update
-[OpenPocket][gateway] 2026-02-22T18:51:11.939Z telegram bot display name updated chat=456 name="Jarvis-Phone"
-✔ TelegramGateway consumes profile-update payload after chat reply (162.120875ms)
-✔ TelegramGateway resolves pending 2FA request from plain numeric text (152.393625ms)
-✔ TelegramGateway /start triggers onboarding reply when onboarding is pending (208.748917ms)
-✔ TelegramGateway /start replies with stable welcome when onboarding is completed (157.22825ms)
-✔ TelegramGateway /reset sends session reset startup prompt when onboarding is completed (647.592458ms)
-✔ TelegramGateway /reset routes into onboarding when onboarding is pending (398.48ms)
-✔ TelegramGateway /context returns summary report (166.701208ms)
-✔ TelegramGateway /context detail returns file snippet (179.186958ms)
-✔ TelegramGateway /context json returns machine-readable report (177.743458ms)
-[OpenPocket][gateway] 2026-02-22T18:51:14.190Z task accepted source=chat chat=9201 task="打开 Gmail 并进入收件箱" model=gpt-5.2-codex
-[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=1/5 action=launch_app app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=2/5 action=wait app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=3/5 action=wait app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.190Z progress source=chat chat=9201 step=4/5 action=tap app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.190Z task done source=chat chat=9201 ok=true session=/tmp/session-test.md
-✔ TelegramGateway narrates progress only when model marks meaningful updates (163.277958ms)
-[OpenPocket][gateway] 2026-02-22T18:51:14.355Z task accepted source=chat chat=9301 task="Check Gmail inbox" model=gpt-5.2-codex
-[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=6/50 action=launch_app app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=8/50 action=wait app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=10/50 action=wait app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.355Z progress source=chat chat=9301 step=15/50 action=tap app=com.google.android.gm
-[OpenPocket][gateway] 2026-02-22T18:51:14.356Z task done source=chat chat=9301 ok=true session=/tmp/session-test-2.md
-✔ TelegramGateway suppresses low-signal repetitive narration even if model requests notify (165.013459ms)
-✔ workspace templates resolve from OPENPOCKET_TEMPLATE_DIR and strip markdown frontmatter (1.01375ms)
-✔ ensureWorkspaceBootstrap creates required layout (5.988959ms)
-✔ workspace onboarding state marks completion after bootstrap removal (3.890458ms)
-✔ markWorkspaceOnboardingCompleted writes completion marker (3.550084ms)
-✔ WorkspaceStore writes session steps final and daily memory (4.43675ms)
-ℹ tests 136
-ℹ suites 0
-ℹ pass 115
-ℹ fail 21
-ℹ cancelled 0
-ℹ skipped 0
-ℹ todo 0
-ℹ duration_ms 5385.552167
-
-✖ failing tests:
-
-test at test/agent-runtime.test.mjs:49:1
-✖ AgentRuntime injects BOOTSTRAP guidance into system prompt context (19.375167ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:78:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async startSubtestAfterBootstrap (node:internal/test_runner/harness:358:3) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:90:1
-✖ AgentRuntime supports none system prompt mode for constrained runs (6.895292ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:113:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:145:1
-✖ AgentRuntime returns home after successful task by default (5.555375ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:169:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:179:1
-✖ AgentRuntime does not return home when config is disabled (6.390959ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:203:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:213:1
-✖ AgentRuntime pauses for request_human_auth and resumes after approval (5.829584ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:269:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:278:1
-✖ AgentRuntime fails when request_human_auth is rejected (9.391334ms)
-  AssertionError [ERR_ASSERTION]: The input did not match the regular expression /Human authorization rejected/. Input:
-  
-  'Agent execution failed: this.adb.queryLaunchablePackages is not a function'
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:315:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: 'Agent execution failed: this.adb.queryLaunchablePackages is not a function',
-    expected: /Human authorization rejected/,
-    operator: 'match',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:321:1
-✖ AgentRuntime auto-approves Android permission dialog app without human auth (8.154542ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:390:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:408:1
-✖ AgentRuntime does not call human auth when model asks permission capability (5.477417ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:464:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:473:1
-✖ AgentRuntime auto-approves permission dialog even when model asks permission capability (6.581334ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:547:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:564:1
-✖ AgentRuntime still requests human auth for camera capability after auto-allowing VM permission dialog (5.444958ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:638:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:657:1
-✖ AgentRuntime applies OTP code from manual approval note when no artifact is provided (6.944917ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:710:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:720:1
-✖ AgentRuntime applies delegated text artifact after human auth approval (9.037167ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:783:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:794:1
-✖ AgentRuntime applies delegated location artifact after human auth approval (8.374958ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:865:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/agent-runtime.test.mjs:888:1
-✖ AgentRuntime appends gallery template hint after delegated image artifact (6.953959ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  
-  false !== true
-  
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/agent-runtime.test.mjs:948:12)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: false,
-    expected: true,
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/chat-assistant.test.mjs:139:1
-✖ ChatAssistant decide uses Codex CLI credentials fallback (45.425ms)
-  AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
-  + actual - expected
-  
-  + 'no_api_key'
-  - 'model_classify'
-  
-      at file:///Users/yingjinglu/Documents/Projects/openpocket/test/chat-assistant.test.mjs:163:12
-      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/chat-assistant.test.mjs:140:3)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: 'no_api_key',
-    expected: 'model_classify',
-    operator: 'strictEqual',
-    diff: 'simple'
-  }
-
-test at test/model-client.test.mjs:35:1
-✖ ModelClient falls back from chat to responses (3.667625ms)
-  TypeError: Cannot read properties of undefined (reading 'length')
-      at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
-      at ModelClient.nextStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/model-client.js:173:56)
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:58:28)
-      at Test.runInAsyncScope (node:async_hooks:214:14)
-      at Test.run (node:internal/test_runner/test:1103:25)
-      at Test.start (node:internal/test_runner/test:1000:17)
-      at startSubtestAfterBootstrap (node:internal/test_runner/harness:358:17)
-
-test at test/model-client.test.mjs:72:1
-✖ ModelClient parses chat tool call correctly (0.291375ms)
-  TypeError: Cannot read properties of undefined (reading 'length')
-      at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
-      at ModelClient.nextStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/model-client.js:173:56)
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:102:28)
-      at Test.runInAsyncScope (node:async_hooks:214:14)
-      at Test.run (node:internal/test_runner/test:1103:25)
-      at Test.processPendingSubtests (node:internal/test_runner/test:785:18)
-      at Test.postRun (node:internal/test_runner/test:1232:19)
-      at Test.run (node:internal/test_runner/test:1160:12)
-      at async startSubtestAfterBootstrap (node:internal/test_runner/harness:358:3)
-
-test at test/model-client.test.mjs:116:1
-✖ ModelClient fails when no tool call is returned (0.626792ms)
-  AssertionError [ERR_ASSERTION]: The input did not match the regular expression /All model endpoints failed/. Input:
-  
-  "TypeError: Cannot read properties of undefined (reading 'length')"
-  
-      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:133:3)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7) {
-    generatedMessage: true,
-    code: 'ERR_ASSERTION',
-    actual: TypeError: Cannot read properties of undefined (reading 'length')
-        at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
-        at ModelClient.nextStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/model-client.js:173:56)
-        at file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:135:14
-        at waitForActual (node:assert:632:21)
-        at strict.rejects (node:assert:769:31)
-        at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/model-client.test.mjs:133:16)
-        at Test.runInAsyncScope (node:async_hooks:214:14)
-        at Test.run (node:internal/test_runner/test:1103:25)
-        at Test.processPendingSubtests (node:internal/test_runner/test:785:18)
-        at Test.postRun (node:internal/test_runner/test:1232:19),
-    expected: /All model endpoints failed/,
-    operator: 'rejects',
-    diff: 'simple'
-  }
-
-test at test/prompts.test.mjs:51:1
-✖ buildUserPrompt keeps only recent 8 history items (1.569542ms)
-  TypeError: Cannot read properties of undefined (reading 'length')
-      at buildUserPrompt (/Users/yingjinglu/Documents/Projects/openpocket/dist/agent/prompts.js:248:50)
-      at TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/prompts.test.mjs:53:18)
-      at Test.runInAsyncScope (node:async_hooks:214:14)
-      at Test.run (node:internal/test_runner/test:1103:25)
-      at Test.processPendingSubtests (node:internal/test_runner/test:785:18)
-      at Test.postRun (node:internal/test_runner/test:1232:19)
-      at Test.run (node:internal/test_runner/test:1160:12)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7)
-
-test at test/setup-wizard.test.mjs:263:1
-✖ setup wizard supports codex cli auth option in model selection (30.156416ms)
-  Error: No scripted confirm value.
-      at FakePrompter.confirm (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:36:13)
-      at runApiKeyStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:584:44)
-      at async runSetupWizard (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:902:9)
-      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:276:7
-      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:265:5
-      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:264:3)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7)
-
-test at test/setup-wizard.test.mjs:312:1
-✖ setup wizard uses existing codex credential when codex login command fails (28.439791ms)
-  Error: No scripted confirm value.
-      at FakePrompter.confirm (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:36:13)
-      at runApiKeyStep (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:584:44)
-      at async runSetupWizard (/Users/yingjinglu/Documents/Projects/openpocket/dist/onboarding/setup-wizard.js:902:9)
-      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:336:7
-      at async file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:314:5
-      at async TestContext.<anonymous> (file:///Users/yingjinglu/Documents/Projects/openpocket/test/setup-wizard.test.mjs:313:3)
-      at async Test.run (node:internal/test_runner/test:1110:7)
-      at async Test.processPendingSubtests (node:internal/test_runner/test:785:7)