From 93c5e7b5b972007242626a57ce03fb76fd5ddad0 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@github.com>
Date: Mon, 30 Mar 2026 02:31:59 +0000
Subject: [PATCH 1/2] refactor: migrate test infrastructure from shell scripts
 to Rust-based skill-bench

- Remove shell script-based test infrastructure (agents/skill-bench/)
- Add TOML-based test cases in tests/ directory
- Convert test checks from shell scripts to skill-bench format
- Update mise.toml with test task for skill-bench
- Add logs/ directory with .gitkeep for test execution logs
- Update .gitignore to exclude log files but keep .gitkeep

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore                                    |  7 ++
 logs/.gitkeep                                 |  0
 mise.toml                                     |  6 +-
 .../functional-no-spec.toml                   | 59 ++++++++++++
 .../functional-with-spec.toml                 | 53 +++++++++++
 tests/concept-interviewing/triggering.toml    | 25 +++++
 tests/constitution-reminding/functional.toml  | 29 ++++++
 tests/constitution-reminding/triggering.toml  | 21 ++++
 .../functional-get-patent-id.toml             | 55 +++++++++++
 .../functional-get-statistics.toml            | 95 +++++++++++++++++++
 .../functional-import-csv.toml                | 29 ++++++
 .../functional-import-multiple-csvs.toml      | 43 +++++++++
 .../functional-init-db.toml                   | 21 ++++
 .../functional-integration.toml               | 51 ++++++++++
 .../functional-record-screening.toml          | 67 +++++++++++++
 tests/investigating-database/triggering.toml  | 21 ++++
 .../functional-file-review.toml               | 49 ++++++++++
 tests/legal-checking/functional.toml          | 31 ++++++
 tests/legal-checking/triggering.toml          | 21 ++++
 tests/targeting/functional-no-spec.toml       | 52 ++++++++++
 tests/targeting/functional-with-data.toml     | 66 +++++++++++++
 tests/targeting/functional-with-spec.toml     | 78 +++++++++++++++
 tests/targeting/triggering.toml               | 17 ++++
 23 files changed, 895 insertions(+), 1 deletion(-)
 create mode 100644 logs/.gitkeep
 create mode 100644 tests/concept-interviewing/functional-no-spec.toml
 create mode 100644 tests/concept-interviewing/functional-with-spec.toml
 create mode 100644 tests/concept-interviewing/triggering.toml
 create mode 100644 tests/constitution-reminding/functional.toml
 create mode 100644 tests/constitution-reminding/triggering.toml
 create mode 100644 tests/investigating-database/functional-get-patent-id.toml
 create mode 100644 tests/investigating-database/functional-get-statistics.toml
 create mode 100644 tests/investigating-database/functional-import-csv.toml
 create mode 100644 tests/investigating-database/functional-import-multiple-csvs.toml
 create mode 100644 tests/investigating-database/functional-init-db.toml
 create mode 100644 tests/investigating-database/functional-integration.toml
 create mode 100644 tests/investigating-database/functional-record-screening.toml
 create mode 100644 tests/investigating-database/triggering.toml
 create mode 100644 tests/legal-checking/functional-file-review.toml
 create mode 100644 tests/legal-checking/functional.toml
 create mode 100644 tests/legal-checking/triggering.toml
 create mode 100644 tests/targeting/functional-no-spec.toml
 create mode 100644 tests/targeting/functional-with-data.toml
 create mode 100644 tests/targeting/functional-with-spec.toml
 create mode 100644 tests/targeting/triggering.toml

diff --git a/.gitignore b/.gitignore
index bd07edd..722fe4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,10 @@ Cargo.lock
 .claude/worktrees/
 .claude/settings.local.json
 node_modules
+
+# Test logs
+/logs/*
+!/logs/.gitkeep
+
+# skill-bench test history
+.skill-bench/
diff --git a/logs/.gitkeep b/logs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/mise.toml b/mise.toml
index 8c7aa8a..b5b5210 100644
--- a/mise.toml
+++ b/mise.toml
@@ -1,8 +1,12 @@
 [tools]
-node = "20"
 
 [tasks]
 fmt = { description = "Format files with prettier", run = "npx prettier --write ." }
 pre-commit = { description = "Pre-commit hook to format files", depends = [
   "fmt",
 ] }
+
+[tasks.test]
+description = "Run skill-bench tests"
+run = "skill-bench run 'tests' --plugin-dir ./plugin --threads 4 --log ./logs"
+depends = ["fmt"]
diff --git a/tests/concept-interviewing/functional-no-spec.toml b/tests/concept-interviewing/functional-no-spec.toml
new file mode 100644
index 0000000..1ce96ba
--- /dev/null
+++ b/tests/concept-interviewing/functional-no-spec.toml
@@ -0,0 +1,59 @@
+# Test Case: Concept Interview Functional (no existing specification)
+
+name = "functional-no-spec"
+description = "Verify concept-interview uses question-responder when information is missing"
+timeout = 180 # seconds
+
+test_prompt = """
+I want to start a patent search for a new voice recognition system for smart home devices with real-time transcription and noise-resistant recognition.
+
+Before asking me any questions, please use the question-responder skill to check if the required information is already available. Then proceed with the concept interview and assignee verification.
+"""
+
+[answers]
+"competitors" = ["Google", "Amazon"]
+"target country" = "US"
+"country" = "US"
+"release date" = "2025-06-01"
+"date" = "2025-06-01"
+"target release date" = "2025-06-01"
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "concept-interviewing" }
+
+[[checks]]
+name = "question_responder_invoked"
+command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" }
+
+[[checks]]
+name = "concept_interview_invoked"
+command = { command = "skill-invoked", skill = "concept-interviewing" }
+
+[[checks]]
+name = "patent_assignee_check_invoked"
+command = { command = "skill-invoked", skill = "patent-assignee-check" }
+
+[[checks]]
+name = "constitution_loaded"
+command = { command = "skill-invoked", skill = "constitution-reminding" }
+
+[[checks]]
+name = "references_instructions_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "concept-interview.*references/instructions.md" }
+
+[[checks]]
+name = "specification_template_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "specification-template.md" }
+
+[[checks]]
+name = "specification_md_created"
+command = { command = "workspace-file", path = "0-specifications/specification.md" }
+
+[[checks]]
+name = "google_patent_mcp_succeeded"
+command = { command = "mcp-success", tool = "search_patents" }
diff --git a/tests/concept-interviewing/functional-with-spec.toml b/tests/concept-interviewing/functional-with-spec.toml
new file mode 100644
index 0000000..60d7504
--- /dev/null
+++ b/tests/concept-interviewing/functional-with-spec.toml
@@ -0,0 +1,53 @@
+# Test Case: Concept Interview Functional (with existing specification)
+
+name = "functional-with-spec"
+description = "Verify concept-interview verifies existing specification without re-interviewing"
+timeout = 120 # seconds
+
+test_prompt = """
+Use concept-interview to verify our existing product specification is complete and ready for the targeting phase. Do not perform any additional assignee checks - just verify the existing specification.
+"""
+
+[[setup]]
+path = "0-specifications/specification.md"
+content = """
+# Product Specification
+
+## 1. Product Concept
+
+Voice recognition system for smart home devices
+
+## 2. Target Market
+
+- **Country**: US
+- **Release Date**: 2025-06-01
+- **Cutoff Date**: 2005-06-01
+
+## 3. Competitors
+
+- **Google LLC**
+- **Amazon.com Inc.**
+
+## 4. Verified Assignee Names (Canonicalized)
+
+| Original Name | Verified Assignee Names                    | Status   | Notes                    |
+| ------------- | ------------------------------------------ | -------- | ------------------------ |
+| Google        | Google LLC, Google Inc., GOOGLE LLC        | Verified | Multiple name variations |
+| Amazon        | Amazon.com Inc., Amazon Technologies, Inc. | Verified | Multiple name variations |
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "concept-interviewing" }
+
+[[checks]]
+name = "concept_interview_invoked"
+command = { command = "skill-invoked", skill = "concept-interviewing" }
+
+[[checks]]
+name = "patent_assignee_check_not_invoked"
+command = { command = "skill-invoked", skill = "patent-assignee-check", deny = true }
diff --git a/tests/concept-interviewing/triggering.toml b/tests/concept-interviewing/triggering.toml
new file mode 100644
index 0000000..8b42650
--- /dev/null
+++ b/tests/concept-interviewing/triggering.toml
@@ -0,0 +1,25 @@
+# Test Case: Concept Interview - Triggering
+
+name = "triggering"
+description = "Verify concept-interviewing skill is triggered when discussing patent search"
+timeout = 60
+
+test_prompt = """
+I want to start a patent search for a new voice recognition system in the US, releasing in 2025. Competitors are Google and Amazon. The system is for smart home devices with real-time transcription and noise-resistant recognition. Please proceed with the assignee verification and create the specification file.
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "concept-interviewing" }
+
+[[checks]]
+name = "concept_interviewing_invoked"
+command = { command = "skill-invoked", skill = "concept-interviewing" }
+
+[[checks]]
+name = "patent_assignee_check_invoked"
+command = { command = "skill-invoked", skill = "patent-assignee-check" }
diff --git a/tests/constitution-reminding/functional.toml b/tests/constitution-reminding/functional.toml
new file mode 100644
index 0000000..050013f
--- /dev/null
+++ b/tests/constitution-reminding/functional.toml
@@ -0,0 +1,29 @@
+# Test Case: Constitution Reminding Functional
+
+name = "functional"
+description = "Verify constitution-reminding loads and displays core principles"
+timeout = 60 # seconds
+
+test_prompt = """
+Load the constitution skill to understand the core principles.
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "constitution-reminding" }
+
+[[checks]]
+name = "constitution_reminding_invoked"
+command = { command = "skill-invoked", skill = "constitution-reminding" }
+
+[[checks]]
+name = "references_instructions_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "constitution-reminding.*references/instructions.md" }
+
+[[checks]]
+name = "constitution_text_displayed"
+command = { command = "message-contains", text = "I." }
diff --git a/tests/constitution-reminding/triggering.toml b/tests/constitution-reminding/triggering.toml
new file mode 100644
index 0000000..fd2a607
--- /dev/null
+++ b/tests/constitution-reminding/triggering.toml
@@ -0,0 +1,21 @@
+# Test Case: Constitution Reminding - Triggering
+
+name = "triggering"
+description = "Verify constitution-reminding skill is triggered when asked about core principles"
+timeout = 60
+
+test_prompt = """
+Load the constitution skill to understand the core principles.
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "constitution-reminding" }
+
+[[checks]]
+name = "constitution_reminding_invoked"
+command = { command = "skill-invoked", skill = "constitution-reminding" }
diff --git a/tests/investigating-database/functional-get-patent-id.toml b/tests/investigating-database/functional-get-patent-id.toml
new file mode 100644
index 0000000..32a45c4
--- /dev/null
+++ b/tests/investigating-database/functional-get-patent-id.toml
@@ -0,0 +1,55 @@
+# Test Case: Investigating Database - Get Patent ID
+
+name = "functional-get-patent-id"
+description = "Verify investigating-database can retrieve patent ID by row number"
+timeout = 120 # seconds
+
+test_prompt = """
+Get the patent ID at row 2 from the database.
+"""
+
+[[setup]]
+path = "patents.db"
+content = """
+#!/usr/bin/env bash
+# Setup script to create test database
+sqlite3 patents.db <<EOF
+PRAGMA foreign_keys = ON;
+CREATE TABLE target_patents (
+    id TEXT PRIMARY KEY NOT NULL,
+    family_id TEXT,
+    title TEXT,
+    abstract_text TEXT,
+    publication_date TEXT,
+    country TEXT,
+    assignee TEXT,
+    filing_date TEXT,
+    grant_date TEXT,
+    citation_count INTEGER,
+    claim_count INTEGER,
+    extra_fields TEXT,
+    created_at TEXT DEFAULT (datetime('now')),
+    updated_at TEXT DEFAULT (datetime('now'))
+);
+INSERT INTO target_patents (id, family_id, title, abstract_text, publication_date, country, assignee) VALUES
+('US1234567A', 'US1234567', 'First Patent', 'Abstract 1', '2023-01-15', 'US', 'Assignee1'),
+('US7654321A', 'US7654321', 'Second Patent', 'Abstract 2', '2023-03-20', 'US', 'Assignee2'),
+('US9999999A', 'US9999999', 'Third Patent', 'Abstract 3', '2023-06-10', 'US', 'Assignee3');
+EOF
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "get_patent_id_script_executed"
+command = { command = "tool-use", tool = "Bash", param = "command", value = "get-patent-id.sh" }
+
+[[checks]]
+name = "correct_patent_id_retrieved"
+command = { command = "message-contains", text = "US7654321A" }
diff --git a/tests/investigating-database/functional-get-statistics.toml b/tests/investigating-database/functional-get-statistics.toml
new file mode 100644
index 0000000..0ef370a
--- /dev/null
+++ b/tests/investigating-database/functional-get-statistics.toml
@@ -0,0 +1,95 @@
+# Test Case: Investigating Database - Get Statistics
+
+name = "functional-get-statistics"
+description = "Verify investigating-database can retrieve screening progress statistics"
+timeout = 120 # seconds
+
+test_prompt = """
+Get the screening progress statistics from the database.
+"""
+
+[[setup]]
+path = "patents.db"
+content = """
+#!/usr/bin/env bash
+# Setup script to create test database with sample data
+sqlite3 patents.db <<EOF
+PRAGMA foreign_keys = ON;
+CREATE TABLE target_patents (
+    id TEXT PRIMARY KEY NOT NULL,
+    family_id TEXT,
+    title TEXT,
+    abstract_text TEXT,
+    publication_date TEXT,
+    country TEXT,
+    assignee TEXT,
+    filing_date TEXT,
+    grant_date TEXT,
+    citation_count INTEGER,
+    claim_count INTEGER,
+    extra_fields TEXT,
+    created_at TEXT DEFAULT (datetime('now')),
+    updated_at TEXT DEFAULT (datetime('now'))
+);
+CREATE TABLE screened_patents (
+    id TEXT PRIMARY KEY NOT NULL,
+    title TEXT,
+    legal_status TEXT,
+    judgment TEXT NOT NULL,
+    reason TEXT,
+    abstract_text TEXT,
+    screened_at TEXT DEFAULT (datetime('now')),
+    updated_at TEXT DEFAULT (datetime('now'))
+);
+CREATE VIEW v_screening_progress AS
+SELECT
+    (SELECT COUNT(*) FROM target_patents) as total_targets,
+    (SELECT COUNT(*) FROM screened_patents) as total_screened,
+    (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant,
+    (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant,
+    (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'expired') as expired;
+INSERT INTO target_patents (id, family_id, title, abstract_text, publication_date, country, assignee) VALUES
+('US1234567A', 'US1234567', 'Patent 1', 'Abstract 1', '2023-01-15', 'US', 'Assignee1'),
+('US7654321A', 'US7654321', 'Patent 2', 'Abstract 2', '2023-03-20', 'US', 'Assignee2'),
+('US9999999A', 'US9999999', 'Patent 3', 'Abstract 3', '2023-06-10', 'US', 'Assignee3'),
+('US1111111A', 'US1111111', 'Patent 4', 'Abstract 4', '2023-08-20', 'US', 'Assignee4'),
+('US2222222A', 'US2222222', 'Patent 5', 'Abstract 5', '2023-10-10', 'US', 'Assignee5');
+INSERT INTO screened_patents (id, title, judgment, reason) VALUES
+('US1234567A', 'Patent 1', 'relevant', 'Core technology'),
+('US7654321A', 'Patent 2', 'relevant', 'Related technology'),
+('US9999999A', 'Patent 3', 'irrelevant', 'Different domain'),
+('US1111111A', 'Patent 4', 'expired', 'Status expired');
+EOF
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "get_statistics_script_executed"
+command = { command = "tool-use", tool = "Bash", param = "command", value = "get-statistics.sh" }
+
+[[checks]]
+name = "statistics_contain_total_targets"
+command = { command = "message-contains", text = "total_targets" }
+
+[[checks]]
+name = "statistics_contain_count_5"
+command = { command = "message-contains", text = "5" }
+
+[[checks]]
+name = "statistics_contain_count_4"
+command = { command = "message-contains", text = "4" }
+
+[[checks]]
+name = "statistics_contain_count_2"
+command = { command = "message-contains", text = "2" }
+
+[[checks]]
+name = "statistics_contain_count_1"
+command = { command = "message-contains", text = "1" }
diff --git a/tests/investigating-database/functional-import-csv.toml b/tests/investigating-database/functional-import-csv.toml
new file mode 100644
index 0000000..de44358
--- /dev/null
+++ b/tests/investigating-database/functional-import-csv.toml
@@ -0,0 +1,29 @@
+# Test Case: Investigating Database - Import CSV
+
+name = "functional-import-csv"
+description = "Verify investigating-database can import CSV data into target_patents"
+timeout = 120 # seconds
+
+test_prompt = "Import test-patents.csv"
+
+[[setup]]
+path = "test-patents.csv"
+content = """
+search URL:,https://patents.google.com/?q=llm
+id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
+KR-102637029-B1,Device for Generating Multi-turn Chat Bot Data Using LLM,주식회사 마인즈앤컴퍼니,"고석태, 백영상",2023-10-11,2023-10-11,2024-02-15,2024-02-15,https://patents.google.com/patent/KR102637029B1/en,
+US-2024292070-A1,Iterative ai prompt optimization,"Loop Now Technologies, Inc.","Wu-Hsi Li, Edwin Chiu",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,https://example.com/figure.png
+US-2025200489-A1,Automatic quality assurance,"Forethought Technologies, Inc.","Sami Ghoche, Deon Nicholas",2022-02-28,2024-10-31,,,https://patents.google.com/patent/US20250200489A1/en,
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "csv_imported"
+command = { command = "db-query", db = "", expected = "", query = "SELECT COUNT(*) FROM target_patents;" }
diff --git a/tests/investigating-database/functional-import-multiple-csvs.toml b/tests/investigating-database/functional-import-multiple-csvs.toml
new file mode 100644
index 0000000..c283a93
--- /dev/null
+++ b/tests/investigating-database/functional-import-multiple-csvs.toml
@@ -0,0 +1,43 @@
+# Test Case: Investigating Database - Import Multiple CSV Files
+
+name = "functional-import-multiple-csvs"
+description = "Verify investigating-database can import multiple CSV files with different formats"
+timeout = 180 # seconds
+
+test_prompt = """
+Import all CSV files from the current directory into the patent database.
+"""
+
+[[setup]]
+path = "patents-simple.csv"
+content = """
+id,family_id,title,abstract_text,publication_date,country
+US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US
+US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US
+US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US
+"""
+
+[[setup]]
+path = "patents-google-format.csv"
+content = """
+search URL:,https://patents.google.com/?q=rag+systems
+id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
+KR-102030405-B1,RAG System for Document Analysis,"삼성전자 주식회사","홍길동",2022-05-10,2022-05-10,2023-08-20,2023-08-20,https://patents.google.com/patent/KR102030405B1/en,
+US-20240101234-A1,Information Retrieval Using Neural Networks,"Tech Corp Inc.","Jane Doe, John Smith",2022-03-15,2023-08-01,2024-01-15,,https://patents.google.com/patent/US20240101234A1/en,
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "database_created"
+command = { command = "workspace-file", path = "patents.db" }
+
+[[checks]]
+name = "all_csvs_imported"
+command = { command = "db-query", db = "", expected = "", query = "SELECT COUNT(*) FROM target_patents;" }
diff --git a/tests/investigating-database/functional-init-db.toml b/tests/investigating-database/functional-init-db.toml
new file mode 100644
index 0000000..12ccf47
--- /dev/null
+++ b/tests/investigating-database/functional-init-db.toml
@@ -0,0 +1,21 @@
+# Test Case: Investigating Database - Initialize Database
+
+name = "functional-init-db"
+description = "Verify investigating-database can initialize the patent database"
+timeout = 60 # seconds
+
+test_prompt = """
+Initialize the patent investigation database.
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "database_created"
+command = { command = "workspace-file", path = "patents.db" }
diff --git a/tests/investigating-database/functional-integration.toml b/tests/investigating-database/functional-integration.toml
new file mode 100644
index 0000000..65b6d08
--- /dev/null
+++ b/tests/investigating-database/functional-integration.toml
@@ -0,0 +1,51 @@
+# Test Case: Investigating Database - Integration Workflow
+
+name = "functional-integration"
+description = "Verify complete workflow: init, import, retrieve, record, and statistics"
+timeout = 300 # seconds
+
+test_prompt = """
+I have patent data in CSV format. Initialize the database, import the CSV, get a patent ID by row number, record a screening result, and show me the statistics.
+"""
+
+[[setup]]
+path = "1-targeting/csv/patents.csv"
+content = """
+id,family_id,title,abstract_text,publication_date,country,assignee,filing_date,grant_date,citation_count,claim_count
+US1234567A,US1234567,LLM-based Chatbot System,A multi-turn chatbot system using large language models.,2023-01-15,US,TechCorp Inc.,2022-06-01,2024-01-15,5,20
+US7654321A,US7654321,Vector Database Integration,Method for integrating vector databases with retrieval systems.,2023-03-20,US,DataFlow LLC,2022-09-15,,3,15
+US9999999A,US9999999,Conversation Context Management,System for managing context in multi-turn conversations.,2023-06-10,US,ChatAI Solutions,2022-12-01,,8,25
+US1111111A,US1111111,Machine Learning Model Training,Training method for neural network models.,2023-08-20,US,MLTech Corp.,2023-01-15,,12,18
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "database_initialized"
+command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('target_patents', 'screened_patents');", expected = "2" }
+
+[[checks]]
+name = "csv_imported"
+command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM target_patents;", expected = "4" }
+
+[[checks]]
+name = "patent_id_retrieved"
+command = { command = "tool-use", tool = "Bash", param = "command", value = "get-patent-id.sh" }
+
+[[checks]]
+name = "screening_recorded"
+command = { command = "tool-use", tool = "Bash", param = "command", value = "record-screening.sh" }
+
+[[checks]]
+name = "statistics_retrieved"
+command = { command = "tool-use", tool = "Bash", param = "command", value = "get-statistics.sh" }
+
+[[checks]]
+name = "workflow_completed_successfully"
+command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM screened_patents;", expected = "1" }
diff --git a/tests/investigating-database/functional-record-screening.toml b/tests/investigating-database/functional-record-screening.toml
new file mode 100644
index 0000000..3a58cce
--- /dev/null
+++ b/tests/investigating-database/functional-record-screening.toml
@@ -0,0 +1,67 @@
+# Test Case: Investigating Database - Record Screening
+
+name = "functional-record-screening"
+description = "Verify investigating-database can record screening results"
+timeout = 120 # seconds
+
+test_prompt = """
+Record a screening result for patent US1234567A with judgment 'relevant' and reason 'Core technology for LLM systems'.
+"""
+
+[[setup]]
+path = "patents.db"
+content = """
+#!/usr/bin/env bash
+# Setup script to create test database
+sqlite3 patents.db <<EOF
+PRAGMA foreign_keys = ON;
+CREATE TABLE target_patents (
+    id TEXT PRIMARY KEY NOT NULL,
+    family_id TEXT,
+    title TEXT,
+    abstract_text TEXT,
+    publication_date TEXT,
+    country TEXT,
+    assignee TEXT,
+    filing_date TEXT,
+    grant_date TEXT,
+    citation_count INTEGER,
+    claim_count INTEGER,
+    extra_fields TEXT,
+    created_at TEXT DEFAULT (datetime('now')),
+    updated_at TEXT DEFAULT (datetime('now'))
+);
+CREATE TABLE screened_patents (
+    id TEXT PRIMARY KEY NOT NULL,
+    title TEXT,
+    legal_status TEXT,
+    judgment TEXT NOT NULL,
+    reason TEXT,
+    abstract_text TEXT,
+    screened_at TEXT DEFAULT (datetime('now')),
+    updated_at TEXT DEFAULT (datetime('now'))
+);
+INSERT INTO target_patents (id, family_id, title, abstract_text, publication_date, country, assignee) VALUES
+('US1234567A', 'US1234567', 'LLM-based Chatbot System', 'A multi-turn chatbot system using LLM.', '2023-01-15', 'US', 'TechCorp');
+EOF
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "record_screening_script_executed"
+command = { command = "tool-use", tool = "Bash", param = "command", value = "record-screening.sh" }
+
+[[checks]]
+name = "screening_result_recorded"
+command = { command = "db-query", db = "patents.db", query = "SELECT judgment FROM screened_patents WHERE id = 'US1234567A';", expected = "relevant" }
+
+[[checks]]
+name = "reason_recorded"
+command = { command = "db-query", db = "patents.db", query = "SELECT reason FROM screened_patents WHERE id = 'US1234567A';", expected = "Core" }
diff --git a/tests/investigating-database/triggering.toml b/tests/investigating-database/triggering.toml
new file mode 100644
index 0000000..0cf2057
--- /dev/null
+++ b/tests/investigating-database/triggering.toml
@@ -0,0 +1,21 @@
+# Test Case: Investigating Database - Triggering
+
+name = "triggering"
+description = "Verify investigating-database skill is triggered when appropriate"
+timeout = 60 # seconds
+
+test_prompt = """
+I need to check the screening progress statistics.
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "investigating-database" }
+
+[[checks]]
+name = "investigating_database_invoked"
+command = { command = "skill-invoked", skill = "investigating-database" }
+
+[[checks]]
+name = "statistics_requested"
+command = { command = "message-contains", text = "statistics" }
diff --git a/tests/legal-checking/functional-file-review.toml b/tests/legal-checking/functional-file-review.toml
new file mode 100644
index 0000000..6891c69
--- /dev/null
+++ b/tests/legal-checking/functional-file-review.toml
@@ -0,0 +1,49 @@
+# Test Case: Legal Checking - File Review
+
+name = "functional-file-review"
+description = "Verify legal-checking reviews a file and identifies violations"
+timeout = 90 # seconds
+
+test_prompt = """
+Review the following file for legal compliance violations:
+
+test-claim-analysis.md
+"""
+
+[[setup]]
+path = "test-claim-analysis.md"
+content = """
+# Claim Analysis: US9876543B2
+
+## Element A: Wireless Communication Module
+
+The reference **clearly discloses** a wireless communication module in Column 3. This element is **satisfied** by the reference.
+
+## Element B: Neural Network Layers
+
+The reference **does not satisfy** this requirement because it only has 2 layers. Therefore, Claim 1 **is not anticipated** by the reference.
+
+## Element C: Data Transmission
+
+The alternative implementation using optical fibers **is equivalent** to the copper wires in the reference and **would be obvious** to one skilled in the art.
+
+## Conclusion
+
+The product **does not infringe** Claim 1 because it uses a different algorithm. There is **no risk** of infringement.
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "legal-checking" }
+
+[[checks]]
+name = "legal_checking_invoked"
+command = { command = "skill-invoked", skill = "legal-checking" }
+
+[[checks]]
+name = "test_file_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "test-claim-analysis.md" }
diff --git a/tests/legal-checking/functional.toml b/tests/legal-checking/functional.toml
new file mode 100644
index 0000000..2aec0fa
--- /dev/null
+++ b/tests/legal-checking/functional.toml
@@ -0,0 +1,31 @@
+# Test Case: Legal Checking Functional
+
+name = "functional"
+description = "Verify legal-checking automatically triggers on legal compliance keywords"
+timeout = 60 # seconds
+
+test_prompt = """
+Review this patent analysis for legal compliance violations:
+
+The claim **does not infringe** the reference because it **clearly discloses** all elements.
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "legal-checking" }
+
+[[checks]]
+name = "legal_checking_invoked"
+command = { command = "skill-invoked", skill = "legal-checking" }
+
+[[checks]]
+name = "violations_detected_1"
+command = { command = "message-contains", text = "does not infringe" }
+
+[[checks]]
+name = "violations_detected_2"
+command = { command = "message-contains", text = "clearly discloses" }
diff --git a/tests/legal-checking/triggering.toml b/tests/legal-checking/triggering.toml
new file mode 100644
index 0000000..2b2c823
--- /dev/null
+++ b/tests/legal-checking/triggering.toml
@@ -0,0 +1,21 @@
+# Test Case: Legal Checking - Triggering
+
+name = "triggering"
+description = "Verify legal-checking skill is triggered when asked about legal compliance"
+timeout = 60
+
+test_prompt = """
+Load the legal-checking skill to understand the legal compliance guidelines.
+"""
+
+[[checks]]
+name = "mcp_server_loaded"
+command = { command = "mcp-loaded", server = "google-patent-cli" }
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "legal-checking" }
+
+[[checks]]
+name = "legal_checking_invoked"
+command = { command = "skill-invoked", skill = "legal-checking" }
diff --git a/tests/targeting/functional-no-spec.toml b/tests/targeting/functional-no-spec.toml
new file mode 100644
index 0000000..060e05c
--- /dev/null
+++ b/tests/targeting/functional-no-spec.toml
@@ -0,0 +1,52 @@
+# Test Case: Targeting Functional (no existing specification)
+
+name = "functional-no-spec"
+description = "Verify targeting calls concept-interview when specification is missing"
+timeout = 600 # seconds
+
+test_prompt = """
+I want to search for patents related to a "folding dual-screen smartphone" for release in the US in Q1 2025. The main competitor is Samsung.
+
+Please conduct the concept interview and targeting steps.
+
+Before asking me any questions, please use the question-responder skill to check if the required information is already available. Then proceed with assignee verification and create the specification file automatically.
+"""
+
+[answers]
+"folding mechanism" = "Foldable device with single flexible display (like Galaxy Z Fold)"
+"display configuration" = "Same size screens, front-folding (inward)"
+"additional features" = "Hinge mechanism, multi-window functionality"
+"competitors" = "Only Samsung is needed"
+"clarifications" = "Foldable device with single flexible display (like Galaxy Z Fold), Same size screens, front-folding (inward), Hinge mechanism, multi-window functionality, Only Samsung is needed"
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "targeting" }
+
+[[checks]]
+name = "targeting_invoked"
+command = { command = "skill-invoked", skill = "targeting" }
+
+[[checks]]
+name = "concept_interview_invoked"
+command = { command = "skill-invoked", skill = "concept-interviewing" }
+
+[[checks]]
+name = "specification_template_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "specification-template.md" }
+
+[[checks]]
+name = "targeting_template_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "targeting-template.md" }
+
+[[checks]]
+name = "keywords_template_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "keywords-template.md" }
+
+[[checks]]
+name = "specification_md_created"
+command = { command = "workspace-file", path = "0-specifications/specification.md" }
+
+[[checks]]
+name = "google_patent_search_invoked"
+command = { command = "skill-invoked", skill = "google-patent-cli:patent-search" }
diff --git a/tests/targeting/functional-with-data.toml b/tests/targeting/functional-with-data.toml
new file mode 100644
index 0000000..81b67d4
--- /dev/null
+++ b/tests/targeting/functional-with-data.toml
@@ -0,0 +1,66 @@
+# Test Case: Targeting Functional (with CSV data)
+
+name = "functional-with-data"
+description = "Verify targeting process with pre-downloaded CSV data"
+timeout = 600 # seconds
+
+test_prompt = """
+I have placed downloaded CSV files in `1-targeting/csv/`.
+"""
+
+[[setup]]
+path = "0-specifications/specification.md"
+content = """
+# Product Specification
+
+**Product/Technology**:
+LLM-based multi-turn chatbot system with RAG (Retrieval-Augmented Generation) capabilities.
+
+**Background**:
+Current chatbots struggle with context awareness and factual accuracy in multi-turn conversations. This system combines LLM with vector database retrieval to provide accurate, context-aware responses.
+
+**Key Technical Features**:
+
+1. LLM-driven multi-turn conversation management
+2. Vector database integration for retrieval-augmented generation
+3. Automatic quality assurance for information retrieval and intent detection
+4. Iterative AI prompt optimization for various applications (video generation, etc.)
+
+**Target Release Date**: 2025-12-31
+
+**Priority Date Cutoff**: 2020-01-01
+
+**Competitors**:
+
+- Google
+- Microsoft
+- OpenAI
+
+**Target Market**:
+US and Korea markets, focusing on enterprise customer service and conversational AI applications.
+"""
+
+[[setup]]
+path = "1-targeting/csv/patents.csv"
+content = """
+id,family_id,title,abstract_text,publication_date,country
+US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US
+US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US
+US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "targeting" }
+
+[[checks]]
+name = "targeting_invoked"
+command = { command = "skill-invoked", skill = "targeting" }
+
+[[checks]]
+name = "database_created"
+command = { command = "workspace-file", path = "patents.db" }
+
+[[checks]]
+name = "csv_imported"
+command = { command = "db-query", db = "", expected = "", query = "SELECT COUNT(*) FROM target_patents;" }
diff --git a/tests/targeting/functional-with-spec.toml b/tests/targeting/functional-with-spec.toml
new file mode 100644
index 0000000..0845749
--- /dev/null
+++ b/tests/targeting/functional-with-spec.toml
@@ -0,0 +1,78 @@
+# Test Case: Targeting Functional (with existing specification)
+
+name = "functional-with-spec"
+description = "Verify targeting process with existing specification"
+timeout = 600 # seconds
+
+test_prompt = """
+I have placed an invention specification in `0-specifications/specification.md`. Please create a target population and run the patent search for a 2025 product release.
+
+Before asking me any questions, please use the question-responder skill to check if the required information is already available. Then proceed with the targeting process.
+"""
+
+[answers]
+"modifying keywords" = "Looks good, proceed to search."
+"synonyms" = "Looks good, proceed to search."
+"adjust query" = "Looks good, proceed to search."
+"hit counts" = "The count is acceptable, proceed to merge."
+"acceptable" = "The count is acceptable, proceed to merge."
+"~1000 hits" = "The count is acceptable, proceed to merge."
+
+[[setup]]
+path = "0-specifications/specification.md"
+content = """
+# Specification Dummy
+
+**Product/Technology**:
+Solar-powered auto-cleaning cat litter box with IoT notifications.
+
+**Background**:
+Current cat litter boxes require manual scooping and frequent bag changes, which leads to odor and hygiene issues.
+
+**Key Technical Features**:
+
+1. A solar panel integrated into the top hood that charges an internal battery.
+2. A rotating internal drum that separates solid waste into a sealed compartment.
+3. An IoT module (Wi-Fi) that sends push notifications to a smartphone when the waste compartment is full.
+
+**Target Release Date**: 2025-12-31
+
+**Priority Date Cutoff**: 2015-01-01
+
+**Competitors**:
+
+- Litter-Robot (AutoPets, LLC)
+- CatGenie (PetNovations, Ltd.)
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "targeting" }
+
+[[checks]]
+name = "targeting_invoked"
+command = { command = "skill-invoked", skill = "targeting" }
+
+[[checks]]
+name = "constitution_loaded"
+command = { command = "skill-invoked", skill = "constitution-reminding" }
+
+[[checks]]
+name = "specification_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "0-specifications/specification.md" }
+
+[[checks]]
+name = "targeting_template_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "targeting-template.md" }
+
+[[checks]]
+name = "keywords_template_read"
+command = { command = "tool-use", tool = "Read", param = "file_path", value = "keywords-template.md" }
+
+[[checks]]
+name = "targeting_md_created"
+command = { command = "workspace-file", path = "1-targeting/targeting.md" }
+
+[[checks]]
+name = "keywords_md_created"
+command = { command = "workspace-file", path = "1-targeting/keywords.md" }
diff --git a/tests/targeting/triggering.toml b/tests/targeting/triggering.toml
new file mode 100644
index 0000000..b295ad4
--- /dev/null
+++ b/tests/targeting/triggering.toml
@@ -0,0 +1,17 @@
+# Test Case: Targeting - Triggering
+
+name = "triggering"
+description = "Verify targeting skill is triggered when asked to execute targeting"
+timeout = 60
+
+test_prompt = """
+I have a product concept. Now I need to create a target population for patent searching.
+"""
+
+[[checks]]
+name = "skill_loaded"
+command = { command = "skill-loaded", skill = "targeting" }
+
+[[checks]]
+name = "targeting_invoked"
+command = { command = "skill-invoked", skill = "targeting" }

From c20033fbdf1ee963e3e75685a52098fbd4e506db Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@github.com>
Date: Mon, 30 Mar 2026 02:33:55 +0000
Subject: [PATCH 2/2] chore: update model versions in devcontainer config

- Update ANTHROPIC_DEFAULT_OPUS_MODEL to glm-5.1
- Update ANTHROPIC_DEFAULT_SONNET_MODEL to glm-5-turbo

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .devcontainer/post-create.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh
index 5ccae81..5cfd78a 100755
--- a/.devcontainer/post-create.sh
+++ b/.devcontainer/post-create.sh
@@ -80,8 +80,8 @@ EOF
         "ANTHROPIC_BASE_URL": "https://api.z.ai/api/anthropic",
         "API_TIMEOUT_MS": "3000000",
         "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
-        "ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5",
-        "ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-4.7",
+        "ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5.1",
+        "ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-5-turbo",
         "ANTHROPIC_DEFAULT_HAIKU_MODEL": "glm-4.5-air"
     }
 }