From 6762a45f3fc53119b71ebebc1c2f2b316a850ed6 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@github.com>
Date: Mon, 30 Mar 2026 02:49:19 +0000
Subject: [PATCH 1/2] chore: remove obsolete test infrastructure and unused
 dependencies

- Remove agents/skill-bench/ directory (shell script-based tests)
- Remove tests/constitution-reminding/ (skill no longer exists)
- Remove tests/investigating-database/ (split into multiple skills)
- Remove yq installation from devcontainer config (not needed)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .devcontainer/post-create.sh                  |  19 --
 agents/skill-bench/.gitignore                 |   1 -
 .../functional-analyze-patent.toml            |  94 ------
 .../functional-parallel-analysis.toml         |  98 ------
 .../cases/claim-analyzing/triggering.toml     |  20 --
 .../functional-no-spec.toml                   |  19 --
 .../functional-with-spec.toml                 |  57 ----
 .../concept-interviewing/triggering.toml      |  29 --
 .../evaluating/functional-analyze-patent.toml |  67 ----
 .../functional-parallel-analysis.toml         |  68 -----
 .../cases/evaluating/triggering.toml          |  19 --
 .../functional-import-csv.toml                |  32 --
 .../functional-import-multiple-csvs.toml      |  47 ---
 .../functional-init-db.toml                   |  24 --
 .../functional-various-patent-id.toml         |  88 ------
 .../investigation-preparing/triggering.toml   |  24 --
 .../functional-record-claims.toml             |  62 ----
 .../functional-record-elements.toml           |  66 ----
 .../functional-record-screening.toml          |  44 ---
 .../investigation-recording/triggering.toml   |  51 ----
 .../functional-complete-workflow.toml         | 286 ------------------
 .../functional-generate-report.toml           | 165 ----------
 .../investigation-reporting/triggering.toml   |  19 --
 .../functional-file-review.toml               |  53 ----
 .../cases/legal-checking/functional.toml      |  31 --
 .../cases/legal-checking/triggering.toml      |  24 --
 .../functional-parallel-search.toml           | 103 -------
 .../functional-search-prior-art.toml          |  84 -----
 .../prior-art-researching/triggering.toml     |  20 --
 .../functional-parallel-screening.toml        |  84 -----
 .../functional-resume-screening.toml          | 112 -------
 .../cases/screening/triggering.toml           |  19 --
 .../cases/targeting/functional-with-data.toml |  71 -----
 .../cases/targeting/functional-with-spec.toml |  86 ------
 .../cases/targeting/triggering.toml           |  19 --
 .../harness-plugin/.claude-plugin/plugin.json |   9 -
 .../harness-plugin/cases/basic.toml           |  37 ---
 .../skills/question-responder/SKILL.md        |  91 ------
 .../skills/question-responder/assets/.gitkeep |   1 -
 agents/skill-bench/runner.sh                  | 220 --------------
 agents/skill-bench/tools/check-db-query.sh    |  34 ---
 .../skill-bench/tools/check-file-content.sh   |  30 --
 .../tools/check-file-not-contains.sh          |  30 --
 .../skill-bench/tools/check-log-contains.sh   |  17 --
 agents/skill-bench/tools/check-mcp-loaded.sh  |  38 ---
 agents/skill-bench/tools/check-mcp-success.sh |  69 -----
 .../tools/check-mcp-tool-invoked.sh           |  23 --
 .../tools/check-output-contains.sh            |  20 --
 agents/skill-bench/tools/check-output-file.sh |  15 -
 agents/skill-bench/tools/check-param.sh       |  22 --
 .../skill-bench/tools/check-skill-invoked.sh  |  36 ---
 .../skill-bench/tools/check-skill-loaded.sh   |  30 --
 .../tools/check-skill-not-invoked.sh          |  22 --
 .../skill-bench/tools/check-text-contains.sh  |  31 --
 agents/skill-bench/tools/check-tool-use.sh    |  30 --
 .../skill-bench/tools/check-workspace-dir.sh  |  25 --
 .../skill-bench/tools/check-workspace-file.sh |  17 --
 agents/skill-bench/tools/setup-db.sh          |  71 -----
 tests/constitution-reminding/functional.toml  |  29 --
 tests/constitution-reminding/triggering.toml  |  21 --
 .../functional-get-patent-id.toml             |  55 ----
 .../functional-get-statistics.toml            |  95 ------
 .../functional-import-csv.toml                |  29 --
 .../functional-import-multiple-csvs.toml      |  43 ---
 .../functional-init-db.toml                   |  21 --
 .../functional-integration.toml               |  51 ----
 .../functional-record-screening.toml          |  67 ----
 tests/investigating-database/triggering.toml  |  21 --
 68 files changed, 3455 deletions(-)
 delete mode 100644 agents/skill-bench/.gitignore
 delete mode 100644 agents/skill-bench/cases/claim-analyzing/functional-analyze-patent.toml
 delete mode 100644 agents/skill-bench/cases/claim-analyzing/functional-parallel-analysis.toml
 delete mode 100644 agents/skill-bench/cases/claim-analyzing/triggering.toml
 delete mode 100644 agents/skill-bench/cases/concept-interviewing/functional-no-spec.toml
 delete mode 100644 agents/skill-bench/cases/concept-interviewing/functional-with-spec.toml
 delete mode 100644 agents/skill-bench/cases/concept-interviewing/triggering.toml
 delete mode 100644 agents/skill-bench/cases/evaluating/functional-analyze-patent.toml
 delete mode 100644 agents/skill-bench/cases/evaluating/functional-parallel-analysis.toml
 delete mode 100644 agents/skill-bench/cases/evaluating/triggering.toml
 delete mode 100644 agents/skill-bench/cases/investigation-preparing/functional-import-csv.toml
 delete mode 100644 agents/skill-bench/cases/investigation-preparing/functional-import-multiple-csvs.toml
 delete mode 100644 agents/skill-bench/cases/investigation-preparing/functional-init-db.toml
 delete mode 100644 agents/skill-bench/cases/investigation-preparing/functional-various-patent-id.toml
 delete mode 100644 agents/skill-bench/cases/investigation-preparing/triggering.toml
 delete mode 100644 agents/skill-bench/cases/investigation-recording/functional-record-claims.toml
 delete mode 100644 agents/skill-bench/cases/investigation-recording/functional-record-elements.toml
 delete mode 100644 agents/skill-bench/cases/investigation-recording/functional-record-screening.toml
 delete mode 100644 agents/skill-bench/cases/investigation-recording/triggering.toml
 delete mode 100644 agents/skill-bench/cases/investigation-reporting/functional-complete-workflow.toml
 delete mode 100644 agents/skill-bench/cases/investigation-reporting/functional-generate-report.toml
 delete mode 100644 agents/skill-bench/cases/investigation-reporting/triggering.toml
 delete mode 100644 agents/skill-bench/cases/legal-checking/functional-file-review.toml
 delete mode 100644 agents/skill-bench/cases/legal-checking/functional.toml
 delete mode 100644 agents/skill-bench/cases/legal-checking/triggering.toml
 delete mode 100644 agents/skill-bench/cases/prior-art-researching/functional-parallel-search.toml
 delete mode 100644 agents/skill-bench/cases/prior-art-researching/functional-search-prior-art.toml
 delete mode 100644 agents/skill-bench/cases/prior-art-researching/triggering.toml
 delete mode 100644 agents/skill-bench/cases/screening/functional-parallel-screening.toml
 delete mode 100644 agents/skill-bench/cases/screening/functional-resume-screening.toml
 delete mode 100644 agents/skill-bench/cases/screening/triggering.toml
 delete mode 100644 agents/skill-bench/cases/targeting/functional-with-data.toml
 delete mode 100644 agents/skill-bench/cases/targeting/functional-with-spec.toml
 delete mode 100644 agents/skill-bench/cases/targeting/triggering.toml
 delete mode 100644 agents/skill-bench/harness-plugin/.claude-plugin/plugin.json
 delete mode 100644 agents/skill-bench/harness-plugin/cases/basic.toml
 delete mode 100644 agents/skill-bench/harness-plugin/skills/question-responder/SKILL.md
 delete mode 100644 agents/skill-bench/harness-plugin/skills/question-responder/assets/.gitkeep
 delete mode 100755 agents/skill-bench/runner.sh
 delete mode 100755 agents/skill-bench/tools/check-db-query.sh
 delete mode 100755 agents/skill-bench/tools/check-file-content.sh
 delete mode 100755 agents/skill-bench/tools/check-file-not-contains.sh
 delete mode 100755 agents/skill-bench/tools/check-log-contains.sh
 delete mode 100755 agents/skill-bench/tools/check-mcp-loaded.sh
 delete mode 100755 agents/skill-bench/tools/check-mcp-success.sh
 delete mode 100755 agents/skill-bench/tools/check-mcp-tool-invoked.sh
 delete mode 100755 agents/skill-bench/tools/check-output-contains.sh
 delete mode 100755 agents/skill-bench/tools/check-output-file.sh
 delete mode 100755 agents/skill-bench/tools/check-param.sh
 delete mode 100755 agents/skill-bench/tools/check-skill-invoked.sh
 delete mode 100755 agents/skill-bench/tools/check-skill-loaded.sh
 delete mode 100755 agents/skill-bench/tools/check-skill-not-invoked.sh
 delete mode 100755 agents/skill-bench/tools/check-text-contains.sh
 delete mode 100755 agents/skill-bench/tools/check-tool-use.sh
 delete mode 100755 agents/skill-bench/tools/check-workspace-dir.sh
 delete mode 100755 agents/skill-bench/tools/check-workspace-file.sh
 delete mode 100755 agents/skill-bench/tools/setup-db.sh
 delete mode 100644 tests/constitution-reminding/functional.toml
 delete mode 100644 tests/constitution-reminding/triggering.toml
 delete mode 100644 tests/investigating-database/functional-get-patent-id.toml
 delete mode 100644 tests/investigating-database/functional-get-statistics.toml
 delete mode 100644 tests/investigating-database/functional-import-csv.toml
 delete mode 100644 tests/investigating-database/functional-import-multiple-csvs.toml
 delete mode 100644 tests/investigating-database/functional-init-db.toml
 delete mode 100644 tests/investigating-database/functional-integration.toml
 delete mode 100644 tests/investigating-database/functional-record-screening.toml
 delete mode 100644 tests/investigating-database/triggering.toml

diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh
index 5cfd78a..5e79aa3 100755
--- a/.devcontainer/post-create.sh
+++ b/.devcontainer/post-create.sh
@@ -17,25 +17,6 @@ if [ -z "$CI" ] && [ -z "$GITHUB_ACTIONS" ]; then
         echo "[Devcontainer Setup] Claude CLI already installed: $(claude --version)"
     fi
 
-    # Install yq (YAML/TOML processor) for skill-bench
-    if ! command -v yq >/dev/null 2>&1; then
-        echo "[Devcontainer Setup] Installing yq..."
-        YQ_VERSION=v4.52.4
-        wget https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_amd64 -O /tmp/yq
-        chmod +x /tmp/yq
-        sudo mv /tmp/yq /usr/local/bin/yq
-        echo "[Devcontainer Setup] yq installed: $(yq --version)"
-    else
-        echo "[Devcontainer Setup] yq already installed: $(yq --version)"
-    fi
-
-    echo "[Devcontainer Setup] Configuring tmux..."
-    cat > $HOME/.tmux.conf << 'EOF'
-# Display pane number
-bind-key p display-panes
-set display-panes-time 10000
-EOF
-
     echo "[Devcontainer Setup] Configuring claude alias..."
     echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.bashrc
     echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.zshrc
diff --git a/agents/skill-bench/.gitignore b/agents/skill-bench/.gitignore
deleted file mode 100644
index 333c1e9..0000000
--- a/agents/skill-bench/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-logs/
diff --git a/agents/skill-bench/cases/claim-analyzing/functional-analyze-patent.toml b/agents/skill-bench/cases/claim-analyzing/functional-analyze-patent.toml
deleted file mode 100644
index 4a112f7..0000000
--- a/agents/skill-bench/cases/claim-analyzing/functional-analyze-patent.toml
+++ /dev/null
@@ -1,94 +0,0 @@
-# Test Case: Claim Analyzing - Analyze Patent with User Questions
-
-name = "functional-analyze-patent"
-description = "Verify claim-analyzing skill uses Agent tool to analyze a patent, asks user about missing features, and stores similarities in database"
-timeout = 120
-
-test_prompt = """
-Perform claim analysis for patent US20240292070A1.
-- Compare product features against patent elements
-- Store similarity results in the database
-"""
-
-[answers]
-"API Gateway" = "yes"
-"Secure Key Management" = "no"
-
-[[setup]]
-path = "features.sql"
-content = """
--- Initialize database
-PRAGMA foreign_keys = ON;
-
--- Insert product features (only some features, not all)
-INSERT OR IGNORE INTO features (feature_name, description, category, presence, created_at, updated_at)
-VALUES
-    ('User Authentication', 'System authenticates users via email/password', 'Security', 'present', datetime('now'), datetime('now')),
-    ('Data Encryption', 'Data is encrypted at rest using AES-256', 'Security', 'present', datetime('now'), datetime('now'));
--- Note: 'API Gateway' and 'Secure Key Management' are missing intentionally
-"""
-
-[[setup]]
-path = "patent-data.sql"
-content = """
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Method for Secure Data Processing', 'TechCorp Inc', 'US', '2024-09-26', '2024-03-15', NULL, '{"source": "test"}');
-
--- Insert screened patents (marked as relevant)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES
-    ('US20240292070A1', 'relevant', 'Relates to secure data processing', 'A method for secure data processing comprising authentication, encryption, API gateway, and secure key management', datetime('now'), datetime('now'));
-
--- Insert claims
-INSERT OR IGNORE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'independent', '1. A method for secure data processing, comprising: authenticating a user; encrypting data; routing through API gateway; and managing secure keys.', datetime('now'), datetime('now'));
-
--- Insert elements (including ones with missing features)
-INSERT OR IGNORE INTO elements (patent_id, claim_number, element_label, element_description, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'A', 'authenticating a user', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'B', 'routing through API gateway', datetime('now'), datetime('now'));
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_features"
-type = "script"
-command = "setup-db.sh execute features.sql"
-
-[[setup]]
-name = "load_patent_data"
-type = "script"
-command = "setup-db.sh execute patent-data.sql"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh claim-analyzing"
-
-[[checks]]
-name = "claim-analyzing_invoked"
-type = "script"
-command = "check-skill-invoked.sh claim-analyzing"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "similarities_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>0' 'SELECT COUNT(*) FROM similarities;'"
-
-[[checks]]
-name = "new_feature_recorded"
-type = "script"
-command = "check-db-query.sh {} {} '>0' \"SELECT COUNT(*) FROM features WHERE feature_name LIKE 'API Gate%';\""
diff --git a/agents/skill-bench/cases/claim-analyzing/functional-parallel-analysis.toml b/agents/skill-bench/cases/claim-analyzing/functional-parallel-analysis.toml
deleted file mode 100644
index bfbccc9..0000000
--- a/agents/skill-bench/cases/claim-analyzing/functional-parallel-analysis.toml
+++ /dev/null
@@ -1,98 +0,0 @@
-# Test Case: Claim Analyzing - Parallel Analysis
-
-name = "functional-parallel-analysis"
-description = "Verify claim-analyzing skill can analyze multiple patents in parallel using teams"
-timeout = 600
-
-test_prompt = """
-I have 2 screened patents with claims and elements analyzed in the database.
-
-Please analyze claim similarities for both patents:
-1. Compare product features against patent elements for each patent
-2. Analyze each element for similarity level
-3. Store similarity results in the database
-4. Report the analysis results including patent IDs, similarity counts, and overall assessments
-"""
-
-[[setup]]
-path = "patents-with-elements.sql"
-content = """
--- Initialize database
-PRAGMA foreign_keys = ON;
-
--- Insert product features
-INSERT OR REPLACE INTO features (feature_name, description, category, presence, created_at, updated_at)
-VALUES
-    ('AI prompt generation', 'System generates initial prompts for AI models', 'Core', 'present', datetime('now'), datetime('now')),
-    ('AI output processing', 'System processes and analyzes AI model output', 'Core', 'present', datetime('now'), datetime('now')),
-    ('Quality metrics analysis', 'System analyzes quality metrics of AI output', 'Core', 'present', datetime('now'), datetime('now')),
-    ('Iterative refinement', 'System iteratively refines prompts based on feedback', 'Core', 'present', datetime('now'), datetime('now')),
-    ('Conversation context manager', 'Maintains context across multiple conversation turns', 'Core', 'present', datetime('now'), datetime('now')),
-    ('Response generator', 'Generates responses based on conversation context', 'Core', 'present', datetime('now'), datetime('now'));
-
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}'),
-    ('US20240320708A1', 'Multi-turn conversation system', 'Tech Corp', 'US', '2024-09-12', '2024-03-15', NULL, '{"source": "test"}');
-
--- Insert screened patents (marked as relevant)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES
-    ('US20240292070A1', 'relevant', 'Relates to AI prompt optimization', 'Systems and methods for optimizing AI prompts through iterative refinement', datetime('now'), datetime('now')),
-    ('US20240320708A1', 'relevant', 'Relates to multi-turn conversations', 'A system for managing multi-turn conversations with context tracking', datetime('now'), datetime('now'));
-
--- Insert claims for patent 1
-INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'independent', 'A method for optimizing AI prompts, comprising: generating an initial prompt; receiving output from an AI model; analyzing the output for quality metrics; and iteratively refining the prompt based on the quality metrics.', datetime('now'), datetime('now'));
-
--- Insert elements for patent 1
-INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'A', 'generating an initial prompt', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'B', 'receiving output from an AI model', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'C', 'analyzing the output for quality metrics', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'D', 'iteratively refining the prompt based on the quality metrics', datetime('now'), datetime('now'));
-
--- Insert claims for patent 2
-INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at)
-VALUES
-    ('US20240320708A1', 1, 'independent', 'A system for multi-turn conversations, comprising: a conversation manager configured to maintain context across multiple turns; and a response generator configured to generate responses based on the maintained context.', datetime('now'), datetime('now'));
-
--- Insert elements for patent 2
-INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description, created_at, updated_at)
-VALUES
-    ('US20240320708A1', 1, 'A', 'a conversation manager configured to maintain context across multiple turns', datetime('now'), datetime('now')),
-    ('US20240320708A1', 1, 'B', 'a response generator configured to generate responses based on the maintained context', datetime('now'), datetime('now'));
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_patent_data"
-type = "script"
-command = "setup-db.sh execute patents-with-elements.sql"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh claim-analyzing"
-
-[[checks]]
-name = "claim_analyzing_invoked"
-type = "script"
-command = "check-skill-invoked.sh claim-analyzing"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "similarities_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>=2' 'SELECT COUNT(DISTINCT patent_id) FROM similarities;'"
diff --git a/agents/skill-bench/cases/claim-analyzing/triggering.toml b/agents/skill-bench/cases/claim-analyzing/triggering.toml
deleted file mode 100644
index ecdfa47..0000000
--- a/agents/skill-bench/cases/claim-analyzing/triggering.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Test Case: Claim Analyzing - Triggering
-
-name = "triggering"
-description = "Verify claim-analyzing skill is triggered when asked to perform claim analysis"
-timeout = 60
-
-test_prompt = """
-I have a database of patents with claims and elements analyzed.
-Now I need to analyze claim similarities by comparing product features against patent elements.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh claim-analyzing"
-
-[[checks]]
-name = "claim_analyzing_invoked"
-type = "script"
-command = "check-skill-invoked.sh claim-analyzing"
diff --git a/agents/skill-bench/cases/concept-interviewing/functional-no-spec.toml b/agents/skill-bench/cases/concept-interviewing/functional-no-spec.toml
deleted file mode 100644
index 13f034b..0000000
--- a/agents/skill-bench/cases/concept-interviewing/functional-no-spec.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Test Case: Concept Interview Functional (no existing specification)
-
-name = "functional-no-spec"
-description = "Verify concept-interview uses question-responder when information is missing"
-timeout = 180                                                                                # seconds
-
-test_prompt = """
-I want to start a patent search for a new voice recognition system for smart home devices with real-time transcription and noise-resistant recognition.
-
-Before asking me any questions, please use the question-responder skill to check if the required information is already available. Then proceed with the concept interview and assignee verification.
-"""
-
-[answers]
-"competitors" = ["Google", "Amazon"]
-"target country" = "US"
-"country" = "US"
-"release date" = "2025-06-01"
-"date" = "2025-06-01"
-"target release date" = "2025-06-01"
diff --git a/agents/skill-bench/cases/concept-interviewing/functional-with-spec.toml b/agents/skill-bench/cases/concept-interviewing/functional-with-spec.toml
deleted file mode 100644
index 7af4684..0000000
--- a/agents/skill-bench/cases/concept-interviewing/functional-with-spec.toml
+++ /dev/null
@@ -1,57 +0,0 @@
-# Test Case: Concept Interview Functional (with existing specification)
-
-name = "functional-with-spec"
-description = "Verify concept-interview verifies existing specification without re-interviewing"
-timeout = 120                                                                                    # seconds
-
-test_prompt = """
-Use concept-interview to verify our existing product specification is complete and ready for the targeting phase. Do not perform any additional assignee checks - just verify the existing specification.
-"""
-
-[[setup]]
-path = "specification.md"
-content = """
-# Product Specification
-
-## 1. Product Concept
-
-Voice recognition system for smart home devices
-
-## 2. Target Market
-
-- **Country**: US
-- **Release Date**: 2025-06-01
-- **Cutoff Date**: 2005-06-01
-
-## 3. Competitors
-
-- **Google LLC**
-- **Amazon.com Inc.**
-
-## 4. Verified Assignee Names (Canonicalized)
-
-| Original Name | Verified Assignee Names                    | Status   | Notes                    |
-| ------------- | ------------------------------------------ | -------- | ------------------------ |
-| Google        | Google LLC, Google Inc., GOOGLE LLC        | Verified | Multiple name variations |
-| Amazon        | Amazon.com Inc., Amazon Technologies, Inc. | Verified | Multiple name variations |
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-type = "script"
-command = "check-mcp-loaded.sh google-patent-cli"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh concept-interviewing"
-
-[[checks]]
-name = "concept_interview_invoked"
-type = "script"
-command = "check-skill-invoked.sh concept-interviewing"
-
-[[checks]]
-name = "patent_assignee_check_not_invoked"
-type = "script"
-command = "check-skill-invoked.sh patent-assignee-check --not"
diff --git a/agents/skill-bench/cases/concept-interviewing/triggering.toml b/agents/skill-bench/cases/concept-interviewing/triggering.toml
deleted file mode 100644
index 23d66d5..0000000
--- a/agents/skill-bench/cases/concept-interviewing/triggering.toml
+++ /dev/null
@@ -1,29 +0,0 @@
-# Test Case: Concept Interview - Triggering
-
-name = "triggering"
-description = "Verify concept-interviewing skill is triggered when discussing patent search"
-timeout = 60
-
-test_prompt = """
-I want to start a patent search for a new voice recognition system in the US, releasing in 2025. Competitors are Google and Amazon. The system is for smart home devices with real-time transcription and noise-resistant recognition. Please proceed with the assignee verification and create the specification file.
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-type = "script"
-command = "check-mcp-loaded.sh google-patent-cli"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh concept-interviewing"
-
-[[checks]]
-name = "concept_interviewing_invoked"
-type = "script"
-command = "check-skill-invoked.sh concept-interviewing"
-
-[[checks]]
-name = "patent_assignee_check_invoked"
-type = "script"
-command = "check-skill-invoked.sh patent-assignee-check"
diff --git a/agents/skill-bench/cases/evaluating/functional-analyze-patent.toml b/agents/skill-bench/cases/evaluating/functional-analyze-patent.toml
deleted file mode 100644
index 92af628..0000000
--- a/agents/skill-bench/cases/evaluating/functional-analyze-patent.toml
+++ /dev/null
@@ -1,67 +0,0 @@
-# Test Case: Evaluating - Analyze Patent (Single)
-
-name = "functional-analyze-patent"
-description = "Verify evaluating skill uses Agent tool to analyze a patent and store claims/elements in database"
-timeout = 300                                                                                                     # seconds
-
-test_prompt = """
-I have a screened patent marked as relevant in the database.
-
-Please evaluate the patent:
-1. Get the next relevant patent that has not been evaluated
-2. Analyze the patent by decomposing claims into constituent elements
-3. Store claims and elements in the database
-4. Report the analysis results including patent ID, claim count, and element count
-"""
-
-[[setup]]
-path = "screened-patents.sql"
-content = """
--- Initialize database
-PRAGMA foreign_keys = ON;
-
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}');
-
--- Insert screened patents (marked as relevant)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES
-    ('US20240292070A1', 'relevant', 'Relates to AI prompt optimization', 'Systems and methods for optimizing AI prompts through iterative refinement', datetime('now'), datetime('now'));
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_screened_data"
-type = "script"
-command = "setup-db.sh execute screened-patents.sql"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh evaluating"
-
-[[checks]]
-name = "evaluating_invoked"
-type = "script"
-command = "check-skill-invoked.sh evaluating"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "claims_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>0' 'SELECT COUNT(*) FROM claims;'"
-
-[[checks]]
-name = "elements_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>0' 'SELECT COUNT(*) FROM elements;'"
diff --git a/agents/skill-bench/cases/evaluating/functional-parallel-analysis.toml b/agents/skill-bench/cases/evaluating/functional-parallel-analysis.toml
deleted file mode 100644
index 840809c..0000000
--- a/agents/skill-bench/cases/evaluating/functional-parallel-analysis.toml
+++ /dev/null
@@ -1,68 +0,0 @@
-# Test Case: Evaluating - Parallel Analysis
-
-name = "functional-parallel-analysis"
-description = "Verify evaluating skill can analyze multiple patents in parallel using teams"
-timeout = 600                                                                                # seconds (10 minutes for parallel processing)
-
-test_prompt = """
-I have 2 screened patents marked as relevant in the database.
-
-Please evaluate both patents:
-1. Analyze both patents by decomposing claims into constituent elements
-2. Store claims and elements in the database
-3. Report the analysis results including patent IDs, claim counts, and element counts
-"""
-
-[[setup]]
-path = "screened-patents.sql"
-content = """
--- Initialize database
-PRAGMA foreign_keys = ON;
-
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}'),
-    ('US20240346271A1', 'Conversational AI System', 'Tech Corp', 'US', '2024-10-03', '2024-05-01', NULL, '{"source": "test"}');
-
--- Insert screened patents (marked as relevant)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES
-    ('US20240292070A1', 'relevant', 'Relates to AI prompt optimization', 'Systems and methods for optimizing AI prompts through iterative refinement', datetime('now'), datetime('now')),
-    ('US20240346271A1', 'relevant', 'Relates to conversational AI', 'A conversational AI system with multi-turn capabilities', datetime('now'), datetime('now'));
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_screened_data"
-type = "script"
-command = "setup-db.sh execute screened-patents.sql"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh evaluating"
-
-[[checks]]
-name = "evaluating_invoked"
-type = "script"
-command = "check-skill-invoked.sh evaluating"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "claims_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>0' 'SELECT COUNT(*) FROM claims;'"
-
-[[checks]]
-name = "elements_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>0' 'SELECT COUNT(*) FROM elements;'"
diff --git a/agents/skill-bench/cases/evaluating/triggering.toml b/agents/skill-bench/cases/evaluating/triggering.toml
deleted file mode 100644
index fdee671..0000000
--- a/agents/skill-bench/cases/evaluating/triggering.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Test Case: Evaluating - Triggering
-
-name = "triggering"
-description = "Verify evaluating skill is triggered when asked to evaluate patents"
-timeout = 60
-
-test_prompt = """
-I have a database of screened patents marked as relevant. Now I need to evaluate them by analyzing claims and elements.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh evaluating"
-
-[[checks]]
-name = "evaluating_invoked"
-type = "script"
-command = "check-skill-invoked.sh evaluating"
diff --git a/agents/skill-bench/cases/investigation-preparing/functional-import-csv.toml b/agents/skill-bench/cases/investigation-preparing/functional-import-csv.toml
deleted file mode 100644
index adac3f6..0000000
--- a/agents/skill-bench/cases/investigation-preparing/functional-import-csv.toml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Test Case: Investigation Preparing - Import CSV
-
-name = "functional-import-csv"
-description = "Verify investigation-preparing can import CSV data into target_patents"
-timeout = 120                                                                          # seconds
-
-test_prompt = "Import test-patents.csv"
-
-[[setup]]
-path = "test-patents.csv"
-content = """
-search URL:,https://patents.google.com/?q=llm
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-KR-102637029-B1,Device for Generating Multi-turn Chat Bot Data Using LLM,주식회사 마인즈앤컴퍼니,"고석태, 백영상",2023-10-11,2023-10-11,2024-02-15,2024-02-15,https://patents.google.com/patent/KR102637029B1/en,
-US-2024292070-A1,Iterative ai prompt optimization,"Loop Now Technologies, Inc.","Wu-Hsi Li, Edwin Chiu",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,https://example.com/figure.png
-US-2025200489-A1,Automatic quality assurance,"Forethought Technologies, Inc.","Sami Ghoche, Deon Nicholas",2022-02-28,2024-10-31,,,https://patents.google.com/patent/US20250200489A1/en,
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-preparing"
-
-[[checks]]
-name = "investigating_database_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-preparing"
-
-[[checks]]
-name = "csv_imported"
-type = "script"
-command = "check-db-query.sh {} {} '3' 'SELECT COUNT(*) FROM target_patents;'"
diff --git a/agents/skill-bench/cases/investigation-preparing/functional-import-multiple-csvs.toml b/agents/skill-bench/cases/investigation-preparing/functional-import-multiple-csvs.toml
deleted file mode 100644
index a5e059d..0000000
--- a/agents/skill-bench/cases/investigation-preparing/functional-import-multiple-csvs.toml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Test Case: Investigation Preparing - Import Multiple CSV Files
-
-name = "functional-import-multiple-csvs"
-description = "Verify investigation-preparing can import multiple CSV files with different formats"
-timeout = 180                                                                                       # seconds
-
-test_prompt = """
-Import all CSV files from the current directory into the patent database.
-"""
-
-[[setup]]
-path = "patents-simple.csv"
-content = """
-id,family_id,title,abstract_text,publication_date,country
-US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US
-US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US
-US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US
-"""
-
-[[setup]]
-path = "patents-google-format.csv"
-content = """
-search URL:,https://patents.google.com/?q=rag+systems
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-KR-102030405-B1,RAG System for Document Analysis,"삼성전자 주식회사","홍길동",2022-05-10,2022-05-10,2023-08-20,2023-08-20,https://patents.google.com/patent/KR102030405B1/en,
-US-20240101234-A1,Information Retrieval Using Neural Networks,"Tech Corp Inc.","Jane Doe, John Smith",2022-03-15,2023-08-01,2024-01-15,,https://patents.google.com/patent/US20240101234A1/en,
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-preparing"
-
-[[checks]]
-name = "investigating_database_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-preparing"
-
-[[checks]]
-name = "database_created"
-type = "script"
-command = "check-workspace-file.sh patents.db"
-
-[[checks]]
-name = "all_csvs_imported"
-type = "script"
-command = "check-db-query.sh {} {} '5' 'SELECT COUNT(*) FROM target_patents;'"
diff --git a/agents/skill-bench/cases/investigation-preparing/functional-init-db.toml b/agents/skill-bench/cases/investigation-preparing/functional-init-db.toml
deleted file mode 100644
index 7d7a2f7..0000000
--- a/agents/skill-bench/cases/investigation-preparing/functional-init-db.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Test Case: Investigation Preparing - Initialize Database
-
-name = "functional-init-db"
-description = "Verify investigation-preparing can initialize the patent database"
-timeout = 60                                                                      # seconds
-
-test_prompt = """
-Initialize the patent investigation database.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-preparing"
-
-[[checks]]
-name = "investigating_database_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-preparing"
-
-[[checks]]
-name = "database_created"
-type = "script"
-command = "check-workspace-file.sh patents.db"
diff --git a/agents/skill-bench/cases/investigation-preparing/functional-various-patent-id.toml b/agents/skill-bench/cases/investigation-preparing/functional-various-patent-id.toml
deleted file mode 100644
index ef663e6..0000000
--- a/agents/skill-bench/cases/investigation-preparing/functional-various-patent-id.toml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Test Case: Investigation Preparing - Various Patent ID Formats
-
-name = "functional-various-patent-id"
-description = "Verify ETL correctly transforms various patent ID formats from real Google Patents data"
-timeout = 60
-
-test_prompt = """
-I have collected patents with various ID formats from Google Patents.
-Please initialize the patent database and import test-patents.csv.
-Verify that all patent IDs are correctly transformed following Google Patents format.
-"""
-
-[[setup]]
-path = "test-patents.csv"
-content = """
-search URL:,https://patents.google.com/?q=test
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-US-2024292070-A1,Test Patent 1 (US 9 digits with omitted zero),"Test Corp","Inventor A",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,
-US-2025200489-A1,Test Patent 2 (US 9 digits with omitted zero),"Test Corp","Inventor B",2022-02-28,2024-10-31,2025-06-19,,https://patents.google.com/patent/US20250200489A1/en,
-KR-102637029-B1,Test Patent 3 (KR 9 digits),"Test Corp","Inventor C",2023-10-11,2023-10-11,2024-02-15,2024-02-15,https://patents.google.com/patent/KR102637029B1/en,
-KR-102681147-B1,Test Patent 4 (KR 9 digits),"Test Corp","Inventor D",2023-10-31,2023-10-31,2024-07-04,2024-07-04,https://patents.google.com/patent/KR102681147B1/en,
-WO-2025073197-A1,Test Patent 5 (WO 10 digits),"Test Corp","Inventor E",2023-10-04,2024-07-22,2025-04-10,,https://patents.google.com/patent/WO2025073197A1/en,
-CA-3234744-A1,Test Patent 6 (CA 8 digits),"Test Corp","Inventor F",2023-04-10,2024-04-10,2025-05-01,,https://patents.google.com/patent/CA3234744A1/en,
-JP-7753310-B2,Test Patent 7 (JP 7 digits),"Test Corp","Inventor G",2023-09-28,2023-09-28,2025-10-14,2025-10-14,https://patents.google.com/patent/JP7753310B2/en,
-US-2025307897-A1,Test Patent 8 (US 9 digits with omitted zero),"Test Corp","Inventor H",2024-03-28,2024-03-28,2025-10-02,,https://patents.google.com/patent/US20250307897A1/en,
-HK-40120585-A,Test Patent 9 (HK 7 digits, 1 char kind),"Test Corp","Inventor I",2023-10-04,2025-07-10,2025-08-22,,https://patents.google.com/patent/HK40120585A/en,
-US20240292070A1,Test Patent 10 (US no hyphens correct),"Test Corp","Inventor J",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,
-"""
-
-[[checks]]
-name = "database_created"
-type = "script"
-command = "check-workspace-file.sh patents.db"
-
-[[checks]]
-name = "patents_imported"
-type = "script"
-command = "check-db-query.sh {} {} '9' 'SELECT COUNT(*) FROM target_patents;'"
-
-[[checks]]
-name = "patent_1_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'US20240292070A1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 1%\";'"
-
-[[checks]]
-name = "patent_2_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'US20250200489A1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 2%\";'"
-
-[[checks]]
-name = "patent_3_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'KR102637029B1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 3%\";'"
-
-[[checks]]
-name = "patent_4_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'KR102681147B1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 4%\";'"
-
-[[checks]]
-name = "patent_5_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'WO2025073197A1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 5%\";'"
-
-[[checks]]
-name = "patent_6_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'CA3234744A1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 6%\";'"
-
-[[checks]]
-name = "patent_7_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'JP7753310B2' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 7%\";'"
-
-[[checks]]
-name = "patent_8_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'US20250307897A1' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 8%\";'"
-
-[[checks]]
-name = "patent_9_correct"
-type = "script"
-command = "check-db-query.sh {} {} 'HK40120585A' 'SELECT patent_id FROM target_patents WHERE title LIKE \"Test Patent 9%\";'"
-
-[[checks]]
-name = "all_ids_valid_length"
-type = "script"
-command = "check-db-query.sh {} {} '0' 'SELECT COUNT(*) FROM target_patents WHERE length(patent_id) < 7 OR length(patent_id) > 15;'"
diff --git a/agents/skill-bench/cases/investigation-preparing/triggering.toml b/agents/skill-bench/cases/investigation-preparing/triggering.toml
deleted file mode 100644
index 15b7dc7..0000000
--- a/agents/skill-bench/cases/investigation-preparing/triggering.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Test Case: Investigation Preparing - Triggering
-
-name = "triggering"
-description = "Verify investigation-preparing skill is triggered when appropriate"
-timeout = 60                                                                       # seconds
-
-test_prompt = """
-I need to check the screening progress statistics.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-preparing"
-
-[[checks]]
-name = "investigating_database_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-preparing"
-
-[[checks]]
-name = "statistics_requested"
-type = "script"
-command = "check-text-contains.sh \"statistics\" \"progress\""
diff --git a/agents/skill-bench/cases/investigation-recording/functional-record-claims.toml b/agents/skill-bench/cases/investigation-recording/functional-record-claims.toml
deleted file mode 100644
index 6644c8e..0000000
--- a/agents/skill-bench/cases/investigation-recording/functional-record-claims.toml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Test Case: Investigation Recording - Record Claims
-
-name = "functional-record-claims"
-description = "Verify investigation-recording can record patent claims"
-timeout = 120                                                           # seconds
-
-test_prompt = """
-Record claims for patent US1234567A:
-- Claim 1 (independent): A method for processing natural language input using a transformer-based neural network.
-- Claim 2 (dependent): The method of claim 1, wherein the transformer network comprises a BERT architecture.
-- Claim 3 (dependent): The method of claim 1, further comprising a pre-processing step for token normalization.
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_test_data"
-type = "script"
-command = """
-sqlite3 patents.db <<EOF
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES ('US1234567A', 'relevant', 'Core technology for LLM systems', 'A transformer-based NLP processing method.', datetime('now'), datetime('now'));
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-recording"
-
-[[checks]]
-name = "investigation_recording_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-recording"
-
-[[checks]]
-name = "claims_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '3' "SELECT COUNT(*) FROM claims WHERE patent_id = 'US1234567A';" """
-
-[[checks]]
-name = "independent_claim_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT COUNT(*) FROM claims WHERE patent_id = 'US1234567A' AND claim_type = 'independent';" """
-
-[[checks]]
-name = "dependent_claims_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '2' "SELECT COUNT(*) FROM claims WHERE patent_id = 'US1234567A' AND claim_type = 'dependent';" """
-
-[[checks]]
-name = "claim_text_contains_transformer"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT CASE WHEN COUNT(*) > 0 THEN '1' ELSE '0' END FROM claims WHERE patent_id = 'US1234567A' AND claim_text LIKE '%transformer%';" """
-
-[[checks]]
-name = "batch_insert_used"
-type = "script"
-command = """check-log-contains.sh {} "INSERT INTO claims.*VALUES" """
diff --git a/agents/skill-bench/cases/investigation-recording/functional-record-elements.toml b/agents/skill-bench/cases/investigation-recording/functional-record-elements.toml
deleted file mode 100644
index 0d73cac..0000000
--- a/agents/skill-bench/cases/investigation-recording/functional-record-elements.toml
+++ /dev/null
@@ -1,66 +0,0 @@
-# Test Case: Investigation Recording - Record Elements
-
-name = "functional-record-elements"
-description = "Verify investigation-recording can record claim elements"
-timeout = 120                                                            # seconds
-
-test_prompt = """
-Record elements for Claim 1 of patent US1234567A:
-- Element A: receiving natural language input from a user interface
-- Element B: processing the input using a transformer-based neural network
-- Element C: generating a contextualized representation of the input
-- Element D: extracting semantic features from the contextualized representation
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_test_data"
-type = "script"
-command = """
-sqlite3 patents.db <<EOF
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES ('US1234567A', 'relevant', 'Core technology for LLM systems', 'A transformer-based NLP processing method.', datetime('now'), datetime('now'));
-
-INSERT OR IGNORE INTO claims (patent_id, claim_number, claim_type, claim_text)
-VALUES ('US1234567A', 1, 'independent', 'A method for processing natural language input using a transformer-based neural network.');
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-recording"
-
-[[checks]]
-name = "investigation_recording_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-recording"
-
-[[checks]]
-name = "elements_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '4' "SELECT COUNT(*) FROM elements WHERE patent_id = 'US1234567A';" """
-
-[[checks]]
-name = "elements_for_claim_1"
-type = "script"
-command = """check-db-query.sh {} {} '4' "SELECT COUNT(*) FROM elements WHERE patent_id = 'US1234567A' AND claim_number = 1;" """
-
-[[checks]]
-name = "element_labels_correct"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT CASE WHEN COUNT(DISTINCT element_label) = 4 THEN '1' ELSE '0' END FROM elements WHERE patent_id = 'US1234567A';" """
-
-[[checks]]
-name = "element_a_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT CASE WHEN COUNT(*) > 0 THEN '1' ELSE '0' END FROM elements WHERE patent_id = 'US1234567A' AND element_label = 'A';" """
-
-[[checks]]
-name = "batch_insert_used"
-type = "script"
-command = """check-log-contains.sh {} "INSERT INTO elements.*VALUES" """
diff --git a/agents/skill-bench/cases/investigation-recording/functional-record-screening.toml b/agents/skill-bench/cases/investigation-recording/functional-record-screening.toml
deleted file mode 100644
index 2a0fbb1..0000000
--- a/agents/skill-bench/cases/investigation-recording/functional-record-screening.toml
+++ /dev/null
@@ -1,44 +0,0 @@
-# Test Case: Investigation Recording - Record Screening
-
-name = "functional-record-screening"
-description = "Verify investigation-recording can record screening results"
-timeout = 120                                                               # seconds
-
-test_prompt = """
-Record screening result: patent US1234567A is relevant because it's core technology for LLM systems.
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_test_data"
-type = "script"
-command = """
-sqlite3 patents.db <<EOF
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, extra_fields)
-VALUES ('US1234567A', 'LLM-based Chatbot System', 'TechCorp', 'US', '2023-01-15', '2022-06-01', '{"source": "test"}');
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-recording"
-
-[[checks]]
-name = "investigation_recording_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-recording"
-
-[[checks]]
-name = "screening_result_recorded"
-type = "script"
-command = """check-db-query.sh {} {} 'relevant' "SELECT judgment FROM screened_patents WHERE patent_id = 'US1234567A';" """
-
-[[checks]]
-name = "reason_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT CASE WHEN reason LIKE '%LLM systems%' THEN '1' ELSE '0' END FROM screened_patents WHERE patent_id = 'US1234567A';" """
diff --git a/agents/skill-bench/cases/investigation-recording/triggering.toml b/agents/skill-bench/cases/investigation-recording/triggering.toml
deleted file mode 100644
index 1f4f884..0000000
--- a/agents/skill-bench/cases/investigation-recording/triggering.toml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Test Case: Investigation Recording - Triggering
-
-name = "triggering"
-description = "Verify investigation-recording skill is triggered when appropriate"
-timeout = 60                                                                       # seconds
-
-test_prompt = """
-I have evaluated a patent and need to record the screening result.
-
-The patent ID is US20240292070A1 and it's relevant to my research because it relates to AI prompt optimization.
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_test_data"
-type = "script"
-command = """
-sqlite3 patents.db <<EOF
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, extra_fields)
-VALUES ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', '{"source": "test"}');
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-recording"
-
-[[checks]]
-name = "investigation_recording_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-recording"
-
-[[checks]]
-name = "screening_result_recorded"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT CASE WHEN COUNT(*) >= 1 THEN '1' ELSE '0' END FROM screened_patents WHERE patent_id = 'US20240292070A1';" """
-
-[[checks]]
-name = "correct_judgment"
-type = "script"
-command = """check-db-query.sh {} {} 'relevant' "SELECT judgment FROM screened_patents WHERE patent_id = 'US20240292070A1';" """
-
-[[checks]]
-name = "reason_provided"
-type = "script"
-command = """check-db-query.sh {} {} '1' "SELECT CASE WHEN reason IS NOT NULL AND reason != '' THEN '1' ELSE '0' END FROM screened_patents WHERE patent_id = 'US20240292070A1';" """
diff --git a/agents/skill-bench/cases/investigation-reporting/functional-complete-workflow.toml b/agents/skill-bench/cases/investigation-reporting/functional-complete-workflow.toml
deleted file mode 100644
index e701858..0000000
--- a/agents/skill-bench/cases/investigation-reporting/functional-complete-workflow.toml
+++ /dev/null
@@ -1,286 +0,0 @@
-# Test Case: Investigation Reporting - Complete Workflow
-
-name = "functional-complete-workflow"
-description = "Verify investigation-reporting skill handles complete workflow with claim analysis and prior art"
-timeout = 120
-
-test_prompt = """
-What is the current progress of the patent investigation?
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-path = "specification.md"
-content = """
-# Product Specification
-
-**Product/Technology**:
-Solar-powered auto-cleaning cat litter box with IoT notifications.
-
-**Key Technical Features**:
-
-1. A solar panel integrated into the top hood that charges an internal battery.
-2. A rotating internal drum that separates solid waste into a sealed compartment.
-3. An IoT module (Wi-Fi) that sends push notifications to a smartphone when the waste compartment is full.
-
-**Target Release Date**: 2025-12-31
-
-**Priority Date Cutoff**: 2015-01-01
-
-**Competitors**:
-
-- Litter-Robot (AutoPets, LLC)
-- CatGenie (PetNovations, Ltd.)
-"""
-
-[[setup]]
-path = "targeting.md"
-content = """
-# Targeting Summary
-
-## Competitor Patent Research
-
-| Query | Hit Count | Keywords |
-|-------|-----------|----------|
-| assignee: ["AutoPets, LLC"] country: "US" | 156 | Litter Robot search |
-| assignee: ["PetNovations, Ltd."] country: "US" | 89 | CatGenie search |
-
-## Market Patent Research
-
-| Query | Hit Count | Keywords |
-|-------|-----------|----------|
-| query: "\"cat litter\" AND \"automatic\" AND \"cleaning\"" | 234 | Initial search |
-| query: "\"cat litter\" AND \"automatic\" AND \"cleaning\" AND \"solar\"" | 12 | Added solar keyword |
-
-## Final Search Commands
-
-Total patents collected: 3
-"""
-
-[[setup]]
-path = "keywords.md"
-content = """
-# Golden Keywords Registry
-
-## Product Concept Keywords
-
-| Keyword | Source | Verified |
-|---------|--------|----------|
-| "cat litter" | Manual | Yes |
-| "automatic" | Manual | Yes |
-| "cleaning" | Manual | Yes |
-| "solar" | Manual | Yes |
-| "rotating drum" | Manual | Yes |
-| "IoT" | Manual | Yes |
-"""
-
-[[setup]]
-path = "populate-complete-data.sql"
-content = """
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US9876543B2', 'Automatic Pet Litter Box with Rotating Drum', 'AutoPets, LLC', 'US', '2018-05-15', '2016-11-10', '2018-05-15', '{"source": "test"}'),
-    ('US20230123456A1', 'Solar-Powered Pet Waste Disposal System', 'EcoPet Solutions', 'US', '2023-08-20', '2022-02-14', NULL, '{"source": "test"}'),
-    ('US20240056789A1', 'IoT-Connected Animal Hygiene Device', 'SmartPet Inc.', 'US', '2024-03-01', '2023-09-01', NULL, '{"source": "test"}');
-
--- Insert screened patents (all 3)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text)
-VALUES
-    ('US9876543B2', 'relevant', 'Rotating drum mechanism with IoT notifications present', 'Automatic pet litter box with rotating drum for waste separation and Wi-Fi notifications.'),
-    ('US20230123456A1', 'relevant', 'Solar charging and IoT notifications present', 'Solar-powered pet waste disposal system with mobile app notifications.'),
-    ('US20240056789A1', 'relevant', 'IoT connectivity present but other key features missing', 'Animal hygiene device with IoT connectivity for remote monitoring.');
-"""
-
-[[setup]]
-name = "import_complete_data"
-type = "script"
-command = "setup-db.sh execute populate-complete-data.sql"
-
-[[setup]]
-path = "investigation/US9876543B2/evaluation.md"
-content = """
-# Evaluation: US9876543B2
-
-## Similarity Assessment
-
-**Overall Similarity**: Significant
-
-## Elements
-
-| Element | Disclosure | Verdict |
-|---------|------------|---------|
-| A. Solar panel charging | Not disclosed | Absent |
-| B. Rotating drum separation | Claims 1-5 disclose rotating drum with waste separation | Present |
-| C. IoT notifications | Claims 6-8 describe Wi-Fi notifications | Present |
-
-## Conclusion
-
-Significant overlap on key features (B, C). Missing solar (A).
-"""
-
-[[setup]]
-path = "investigation/US9876543B2/claim-analysis.md"
-content = """
-# Claim Analysis: US9876543B2
-
-## Claim 1 (Independent)
-
-**Elements**:
-1. A pet litter box apparatus
-2. A rotating drum mechanism
-3. A waste collection compartment
-4. A motor configured to rotate the drum
-
-**Analysis**:
-- Element 1: Present - Apparatus for pet waste management
-- Element 2: Present - Rotating drum with sifting slots
-- Element 3: Present - Sealed waste compartment below drum
-- Element 4: Present - Electric motor with timer control
-
-**Verdict**: All elements present. High risk of anticipation.
-"""
-
-[[setup]]
-path = "investigation/US9876543B2/prior-art.md"
-content = """
-# Prior Art Research: US9876543B2
-
-## Query Strategy
-
-Search terms: "rotating drum" AND "cat litter" AND "automatic"
-
-## Results
-
-| Patent ID | Title | Relevance | Notes |
-|-----------|-------|-----------|-------|
-| US20150012345A1 | Self-cleaning litter box | Alternative | Different drum design |
-| EP2345678A1 | Pet waste collection device | Aligned | Similar mechanism |
-
-## Conclusion
-
-Prior art found confirms drum separation mechanism is well-known. Consider design-around options.
-"""
-
-[[setup]]
-path = "investigation/US20230123456A1/evaluation.md"
-content = """
-# Evaluation: US20230123456A1
-
-## Similarity Assessment
-
-**Overall Similarity**: Significant
-
-## Elements
-
-| Element | Disclosure | Verdict |
-|---------|------------|---------|
-| A. Solar panel charging | Claims 1-3 disclose solar hood with battery | Present |
-| B. Rotating drum separation | Not disclosed | Absent |
-| C. IoT notifications | Claims 4-5 describe mobile app notifications | Present |
-
-## Conclusion
-
-Significant overlap on solar (A) and IoT (C). Different waste handling mechanism.
-"""
-
-[[setup]]
-path = "investigation/US20230123456A1/claim-analysis.md"
-content = """
-# Claim Analysis: US20230123456A1
-
-## Claim 1 (Independent)
-
-**Elements**:
-1. A solar-powered pet waste disposal system
-2. A solar panel mounted on a housing
-3. A rechargeable battery connected to the solar panel
-4. A waste collection mechanism
-5. A wireless communication module
-
-**Analysis**:
-- Element 1: Present - Solar-powered system for pet waste
-- Element 2: Present - Top-mounted solar panel hood
-- Element 3: Present - Li-ion battery with charge controller
-- Element 4: Present - Raking mechanism (not rotating drum)
-- Element 5: Present - Wi-Fi module for smartphone connection
-
-**Verdict**: Solar and IoT elements present. Different waste collection mechanism.
-"""
-
-[[setup]]
-path = "investigation/US20230123456A1/prior-art.md"
-content = """
-# Prior Art Research: US20230123456A1
-
-## Query Strategy
-
-Search terms: "solar" AND "pet litter" AND "automatic"
-
-## Results
-
-| Patent ID | Title | Relevance | Notes |
-|-----------|-------|-----------|-------|
-| US20190054321A1 | Solar animal waste device | Relevant | Earlier filing date |
-| CN108765432A | Pet box with power generation | Alternative | Different power source |
-
-## Conclusion
-
-Some prior art on solar-powered pet devices found. No exact combination found.
-"""
-
-[[setup]]
-path = "investigation/US20240056789A1/evaluation.md"
-content = """
-# Evaluation: US20240056789A1
-
-## Similarity Assessment
-
-**Overall Similarity**: Limited
-
-## Elements
-
-| Element | Disclosure | Verdict |
-|---------|------------|---------|
-| A. Solar panel charging | Not disclosed | Absent |
-| B. Rotating drum separation | Not disclosed | Absent |
-| C. IoT notifications | Claims 1-3 disclose IoT connectivity | Present |
-
-## Conclusion
-
-Limited similarity. Only IoT notification feature overlaps. Low risk.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-reporting"
-
-[[checks]]
-name = "investigation_reporting_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-reporting"
-
-[[checks]]
-name = "investigation_preparing_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-preparing"
-
-[[checks]]
-name = "progress_md_created"
-type = "script"
-command = "check-workspace-file.sh PROGRESS.md"
-
-[[checks]]
-name = "report_contains_investigation_table"
-type = "script"
-command = "check-file-content.sh PROGRESS.md Investigation Progress"
-
-[[checks]]
-name = "report_excludes_limited_similarity"
-type = "script"
-command = "check-file-not-contains.sh PROGRESS.md US20240056789A1"
diff --git a/agents/skill-bench/cases/investigation-reporting/functional-generate-report.toml b/agents/skill-bench/cases/investigation-reporting/functional-generate-report.toml
deleted file mode 100644
index e8f53f9..0000000
--- a/agents/skill-bench/cases/investigation-reporting/functional-generate-report.toml
+++ /dev/null
@@ -1,165 +0,0 @@
-# Test Case: Investigation Reporting - Generate Progress Report
-
-name = "functional-generate-report"
-description = "Verify investigation-reporting skill generates PROGRESS.md with correct statistics"
-timeout = 120
-
-test_prompt = """
-Give me a summary of the current patent investigation progress.
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-path = "specification.md"
-content = """
-# Product Specification
-
-**Product/Technology**:
-LLM-based multi-turn chatbot system with RAG (Retrieval-Augmented Generation) capabilities.
-
-**Key Technical Features**:
-
-1. LLM-driven multi-turn conversation management
-2. Vector database integration for retrieval-augmented generation
-3. Automatic quality assurance for information retrieval and intent detection
-
-**Target Release Date**: 2025-12-31
-
-**Priority Date Cutoff**: 2020-01-01
-"""
-
-[[setup]]
-path = "targeting.md"
-content = """
-# Targeting Summary
-
-## Competitor Patent Research
-
-| Query | Hit Count | Keywords |
-|-------|-----------|----------|
-| assignee: ["Google LLC"] country: "US" | 2450 | Broad search |
-| assignee: ["Google LLC"] country: "US" query: "\"chatbot\" AND \"RAG\"" | 45 | Added keywords |
-
-## Market Patent Research
-
-| Query | Hit Count | Keywords |
-|-------|-----------|----------|
-| query: "\"chatbot\" AND \"RAG\" AND \"LLM\"" | 128 | Initial search |
-
-## Final Search Commands
-
-Total patents collected: 5
-"""
-
-[[setup]]
-path = "keywords.md"
-content = """
-# Golden Keywords Registry
-
-## Product Concept Keywords
-
-| Keyword | Source | Verified |
-|---------|--------|----------|
-| "chatbot" | Manual | Yes |
-| "RAG" | Manual | Yes |
-| "LLM" | Manual | Yes |
-| "multi-turn" | Manual | Yes |
-"""
-
-[[setup]]
-path = "populate-test-data.sql"
-content = """
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}'),
-    ('KR102637029B1', 'Device for Generating Multi-turn Chat Bot Data Using LLM', '주식회사 마인즈앤컴퍼니', 'KR', '2024-02-15', '2023-10-11', '2024-02-15', '{"source": "test"}'),
-    ('US20240123456A1', 'RAG-based Question Answering System', 'Tech Corp Inc.', 'US', '2024-05-20', '2023-11-15', NULL, '{"source": "test"}'),
-    ('JP2024000555A', 'Expired Patent about Old Chatbot', 'Old Company', 'JP', '2020-01-01', '2019-01-01', NULL, '{"source": "test"}'),
-    ('CN123456789A', 'Unrelated Patent About Refrigerator', 'Appliance Co', 'CN', '2024-03-10', '2023-09-05', NULL, '{"source": "test"}');
-
--- Insert screened patents (3 out of 5)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text)
-VALUES
-    ('US20240292070A1', 'relevant', 'Multi-turn LLM chatbot system with iterative prompt optimization', 'Iterative AI prompt optimization system for multi-turn conversations using large language models.'),
-    ('KR102637029B1', 'relevant', 'Multi-turn chatbot data generation using LLM', 'Device for generating training data for multi-turn chatbot systems using large language models.'),
-    ('JP2024000555A', 'expired', 'Priority date before cutoff (2019-01-01 < 2020-01-01)', 'Expired patent about old chatbot technology with priority date before cutoff.');
-"""
-
-[[setup]]
-name = "import_test_data"
-type = "script"
-command = "setup-db.sh execute populate-test-data.sql"
-
-[[setup]]
-path = "investigation/US20240292070A1/evaluation.md"
-content = """
-# Evaluation: US20240292070A1
-
-## Similarity Assessment
-
-**Overall Similarity**: Significant
-
-## Elements
-
-| Element | Disclosure | Verdict |
-|---------|------------|---------|
-| A. LLM-driven conversation | Claims 1-3 disclose multi-turn LLM processing | Present |
-| B. Vector database integration | Claims 4-5 describe retrieval system | Present |
-| C. Quality assurance | Claim 6 describes automatic validation | Present |
-
-## Conclusion
-
-Significant overlap with target invention. Recommend claim analysis.
-"""
-
-[[setup]]
-path = "investigation/KR102637029B1/evaluation.md"
-content = """
-# Evaluation: KR102637029B1
-
-## Similarity Assessment
-
-**Overall Similarity**: Moderate
-
-## Elements
-
-| Element | Disclosure | Verdict |
-|---------|------------|---------|
-| A. LLM-driven conversation | Description mentions chatbot but limited detail | Partial |
-| B. Vector database integration | Not disclosed | Absent |
-| C. Quality assurance | Claims mention filtering | Partial |
-
-## Conclusion
-
-Moderate similarity. Missing key elements.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-reporting"
-
-[[checks]]
-name = "investigation_reporting_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-reporting"
-
-[[checks]]
-name = "investigation_preparing_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-preparing"
-
-[[checks]]
-name = "progress_md_created"
-type = "script"
-command = "check-workspace-file.sh PROGRESS.md"
-
-[[checks]]
-name = "screening_stats_retrieved"
-type = "script"
-command = "check-db-query.sh {} {} '>=3' 'SELECT COUNT(*) FROM screened_patents;'"
diff --git a/agents/skill-bench/cases/investigation-reporting/triggering.toml b/agents/skill-bench/cases/investigation-reporting/triggering.toml
deleted file mode 100644
index 9ed4391..0000000
--- a/agents/skill-bench/cases/investigation-reporting/triggering.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Test Case: Investigation Reporting - Triggering
-
-name = "triggering"
-description = "Verify investigation-reporting skill is triggered when asked for progress"
-timeout = 60
-
-test_prompt = """
-What is the current progress of the patent investigation?
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh investigation-reporting"
-
-[[checks]]
-name = "investigation_reporting_invoked"
-type = "script"
-command = "check-skill-invoked.sh investigation-reporting"
diff --git a/agents/skill-bench/cases/legal-checking/functional-file-review.toml b/agents/skill-bench/cases/legal-checking/functional-file-review.toml
deleted file mode 100644
index f0b4936..0000000
--- a/agents/skill-bench/cases/legal-checking/functional-file-review.toml
+++ /dev/null
@@ -1,53 +0,0 @@
-# Test Case: Legal Checking - File Review
-
-name = "functional-file-review"
-description = "Verify legal-checking reviews a file and identifies violations"
-timeout = 90                                                                   # seconds
-
-test_prompt = """
-Review the following file for legal compliance violations:
-
-test-claim-analysis.md
-"""
-
-[[setup]]
-path = "test-claim-analysis.md"
-content = """
-# Claim Analysis: US9876543B2
-
-## Element A: Wireless Communication Module
-
-The reference **clearly discloses** a wireless communication module in Column 3. This element is **satisfied** by the reference.
-
-## Element B: Neural Network Layers
-
-The reference **does not satisfy** this requirement because it only has 2 layers. Therefore, Claim 1 **is not anticipated** by the reference.
-
-## Element C: Data Transmission
-
-The alternative implementation using optical fibers **is equivalent** to the copper wires in the reference and **would be obvious** to one skilled in the art.
-
-## Conclusion
-
-The product **does not infringe** Claim 1 because it uses a different algorithm. There is **no risk** of infringement.
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-type = "script"
-command = "check-mcp-loaded.sh google-patent-cli"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh legal-checking"
-
-[[checks]]
-name = "legal_checking_invoked"
-type = "script"
-command = "check-skill-invoked.sh legal-checking"
-
-[[checks]]
-name = "test_file_read"
-type = "script"
-command = "check-tool-use.sh Read file_path \"test-claim-analysis.md\""
diff --git a/agents/skill-bench/cases/legal-checking/functional.toml b/agents/skill-bench/cases/legal-checking/functional.toml
deleted file mode 100644
index aa4ec3d..0000000
--- a/agents/skill-bench/cases/legal-checking/functional.toml
+++ /dev/null
@@ -1,31 +0,0 @@
-# Test Case: Legal Checking Functional
-
-name = "functional"
-description = "Verify legal-checking automatically triggers on legal compliance keywords"
-timeout = 60                                                                              # seconds
-
-test_prompt = """
-Review this patent analysis for legal compliance violations:
-
-The claim **does not infringe** the reference because it **clearly discloses** all elements.
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-type = "script"
-command = "check-mcp-loaded.sh google-patent-cli"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh legal-checking"
-
-[[checks]]
-name = "legal_checking_invoked"
-type = "script"
-command = "check-skill-invoked.sh legal-checking"
-
-[[checks]]
-name = "violations_detected"
-type = "script"
-command = "check-text-contains.sh \"does not infringe\" \"clearly discloses\""
diff --git a/agents/skill-bench/cases/legal-checking/triggering.toml b/agents/skill-bench/cases/legal-checking/triggering.toml
deleted file mode 100644
index 0d05540..0000000
--- a/agents/skill-bench/cases/legal-checking/triggering.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Test Case: Legal Checking - Triggering
-
-name = "triggering"
-description = "Verify legal-checking skill is triggered when asked about legal compliance"
-timeout = 60
-
-test_prompt = """
-Load the legal-checking skill to understand the legal compliance guidelines.
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-type = "script"
-command = "check-mcp-loaded.sh google-patent-cli"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh legal-checking"
-
-[[checks]]
-name = "legal_checking_invoked"
-type = "script"
-command = "check-skill-invoked.sh legal-checking"
diff --git a/agents/skill-bench/cases/prior-art-researching/functional-parallel-search.toml b/agents/skill-bench/cases/prior-art-researching/functional-parallel-search.toml
deleted file mode 100644
index 0ffa048..0000000
--- a/agents/skill-bench/cases/prior-art-researching/functional-parallel-search.toml
+++ /dev/null
@@ -1,103 +0,0 @@
-# Test Case: Prior Art Researching - Parallel Search
-
-name = "functional-parallel-search"
-description = "Verify prior-art-researching skill can process multiple patents in parallel using teams"
-timeout = 600
-
-test_prompt = """
-I have 2 patents with similarities analyzed in the database.
-
-Please search for prior art:
-1. Get the patents with Moderate/Significant similarities but no prior art results
-2. Search for prior art references (both patent and non-patent literature) for each patent
-3. Analyze relevance of each reference
-4. Store prior art results in the database
-5. Report the search results including patent IDs, reference counts, and relevance levels
-"""
-
-[[setup]]
-path = "patents-with-similarities.sql"
-content = """
--- Initialize database
-PRAGMA foreign_keys = ON;
-
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}'),
-    ('US20240320708A1', 'Multi-turn conversation system', 'Tech Corp', 'US', '2024-09-12', '2024-03-15', NULL, '{"source": "test"}');
-
--- Insert screened patents (marked as relevant)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES
-    ('US20240292070A1', 'relevant', 'Relates to AI prompt optimization', 'Systems and methods for optimizing AI prompts through iterative refinement', datetime('now'), datetime('now')),
-    ('US20240320708A1', 'relevant', 'Relates to multi-turn conversations', 'A system for managing multi-turn conversations with context tracking', datetime('now'), datetime('now'));
-
--- Insert claims for patent 1
-INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'independent', 'A method for optimizing AI prompts, comprising: generating an initial prompt; receiving output from an AI model; analyzing the output for quality metrics; and iteratively refining the prompt based on the quality metrics.', datetime('now'), datetime('now'));
-
--- Insert elements for patent 1
-INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'A', 'generating an initial prompt', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'B', 'receiving output from an AI model', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'C', 'analyzing the output for quality metrics', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'D', 'iteratively refining the prompt based on the quality metrics', datetime('now'), datetime('now'));
-
--- Insert similarities for patent 1 (all Moderate/Significant)
-INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'A', 'Significant', 'Product has similar prompt generation feature', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'B', 'Moderate', 'Product has AI output processing but different implementation', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'C', 'Significant', 'Product has quality metrics analysis', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'D', 'Moderate', 'Product has iterative refinement but different approach', datetime('now'), datetime('now'));
-
--- Insert claims for patent 2
-INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at)
-VALUES
-    ('US20240320708A1', 1, 'independent', 'A system for multi-turn conversations, comprising: a conversation manager configured to maintain context across multiple turns; and a response generator configured to generate responses based on the maintained context.', datetime('now'), datetime('now'));
-
--- Insert elements for patent 2
-INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description, created_at, updated_at)
-VALUES
-    ('US20240320708A1', 1, 'A', 'a conversation manager configured to maintain context across multiple turns', datetime('now'), datetime('now')),
-    ('US20240320708A1', 1, 'B', 'a response generator configured to generate responses based on the maintained context', datetime('now'), datetime('now'));
-
--- Insert similarities for patent 2 (all Moderate/Significant)
-INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at)
-VALUES
-    ('US20240320708A1', 1, 'A', 'Significant', 'Product has similar conversation manager', datetime('now'), datetime('now')),
-    ('US20240320708A1', 1, 'B', 'Significant', 'Product has response generator with context tracking', datetime('now'), datetime('now'));
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_patent_data"
-type = "script"
-command = "setup-db.sh execute patents-with-similarities.sql"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh prior-art-researching"
-
-[[checks]]
-name = "prior_art_researching_invoked"
-type = "script"
-command = "check-skill-invoked.sh prior-art-researching"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "prior_arts_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>=2' 'SELECT COUNT(DISTINCT patent_id) FROM prior_art_elements;'"
diff --git a/agents/skill-bench/cases/prior-art-researching/functional-search-prior-art.toml b/agents/skill-bench/cases/prior-art-researching/functional-search-prior-art.toml
deleted file mode 100644
index 26c3c60..0000000
--- a/agents/skill-bench/cases/prior-art-researching/functional-search-prior-art.toml
+++ /dev/null
@@ -1,84 +0,0 @@
-# Test Case: Prior Art Researching - Search Prior Art (Single)
-
-name = "functional-search-prior-art"
-description = "Verify prior-art-researching skill uses Agent tool to search prior art and store results in database"
-timeout = 600
-
-test_prompt = """
-I have a patent with similarities analyzed in the database.
-
-Please search for prior art:
-1. Get the next patent that has Moderate/Significant similarities but no prior art results
-2. Search for prior art references (both patent and non-patent literature)
-3. Analyze relevance of each reference
-4. Store prior art results in the database
-5. Report the search results including patent ID, reference count, and relevance levels
-"""
-
-[[setup]]
-path = "patent-with-similarities.sql"
-content = """
--- Initialize database
-PRAGMA foreign_keys = ON;
-
--- Insert target patents
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}');
-
--- Insert screened patents (marked as relevant)
-INSERT OR IGNORE INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at, updated_at)
-VALUES
-    ('US20240292070A1', 'relevant', 'Relates to AI prompt optimization', 'Systems and methods for optimizing AI prompts through iterative refinement', datetime('now'), datetime('now'));
-
--- Insert claims
-INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'independent', 'A method for optimizing AI prompts, comprising: generating an initial prompt; receiving output from an AI model; analyzing the output for quality metrics; and iteratively refining the prompt based on the quality metrics.', datetime('now'), datetime('now'));
-
--- Insert elements
-INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description, created_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'A', 'generating an initial prompt', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'B', 'receiving output from an AI model', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'C', 'analyzing the output for quality metrics', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'D', 'iteratively refining the prompt based on the quality metrics', datetime('now'), datetime('now'));
-
--- Insert similarities (all Moderate/Significant - no Limited)
-INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at)
-VALUES
-    ('US20240292070A1', 1, 'A', 'Significant', 'Product has similar prompt generation feature', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'B', 'Moderate', 'Product has AI output processing but different implementation', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'C', 'Significant', 'Product has quality metrics analysis', datetime('now'), datetime('now')),
-    ('US20240292070A1', 1, 'D', 'Moderate', 'Product has iterative refinement but different approach', datetime('now'), datetime('now'));
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "load_patent_data"
-type = "script"
-command = "setup-db.sh execute patent-with-similarities.sql"
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh prior-art-researching"
-
-[[checks]]
-name = "prior_art_researching_invoked"
-type = "script"
-command = "check-skill-invoked.sh prior-art-researching"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "prior_arts_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>0' 'SELECT COUNT(*) FROM prior_arts;'"
diff --git a/agents/skill-bench/cases/prior-art-researching/triggering.toml b/agents/skill-bench/cases/prior-art-researching/triggering.toml
deleted file mode 100644
index c1996a8..0000000
--- a/agents/skill-bench/cases/prior-art-researching/triggering.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Test Case: Prior Art Researching - Triggering
-
-name = "triggering"
-description = "Verify prior-art-researching skill is triggered when asked to perform prior art search"
-timeout = 60
-
-test_prompt = """
-I have collected patents with similarities analyzed in the database.
-Now I need to search for prior art for patents with Moderate/Significant similarities.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh prior-art-researching"
-
-[[checks]]
-name = "prior_art_researching_invoked"
-type = "script"
-command = "check-skill-invoked.sh prior-art-researching"
diff --git a/agents/skill-bench/cases/screening/functional-parallel-screening.toml b/agents/skill-bench/cases/screening/functional-parallel-screening.toml
deleted file mode 100644
index fabb765..0000000
--- a/agents/skill-bench/cases/screening/functional-parallel-screening.toml
+++ /dev/null
@@ -1,84 +0,0 @@
-# Test Case: Screening - Parallel Processing
-
-name = "functional-parallel-screening"
-description = "Verify screening skill can process patents in parallel using teams"
-timeout = 300                                                                      # seconds
-
-test_prompt = """
-I have collected 2 patents from Google Patents and saved them to test-patents.csv.
-
-The patent database has already been initialized and the CSV has been imported.
-
-Please:
-1. Screen all patents in the database to determine relevance for LLM-based multi-turn chatbot systems with RAG capabilities
-2. Report the screening results including patent IDs, titles, and relevance judgments
-
-The product specification is in specification.md.
-"""
-
-[[setup]]
-path = "import-csv.sql"
-content = """
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('KR102637029B1', 'Device for Generating Multi-turn Chat Bot Data Using LLM', '주식회사 마인즈앤컴퍼니', 'KR', '2024-02-15', '2023-10-11', '2024-02-15', '{"source": "test"}'),
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}');
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "import_csv_data"
-type = "script"
-command = "setup-db.sh execute import-csv.sql"
-
-[[setup]]
-path = "specification.md"
-content = """
-# Product Specification
-
-**Product/Technology**:
-LLM-based multi-turn chatbot system with RAG (Retrieval-Augmented Generation) capabilities.
-
-**Key Technical Features**:
-
-1. LLM-driven multi-turn conversation management
-2. Vector database integration for retrieval-augmented generation
-3. Automatic quality assurance for information retrieval and intent detection
-
-**Target Release Date**: 2025-12-31
-
-**Priority Date Cutoff**: 2020-01-01
-"""
-
-[[setup]]
-path = "test-patents.csv"
-content = """
-search URL:,https://patents.google.com/?q=llm
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-KR-102637029-B1,Device for Generating Multi-turn Chat Bot Data Using LLM,주식회사 마인즈앤컴퍼니,"고석태, 백영상",2023-10-11,2023-10-11,2024-02-15,2024-02-15,https://patents.google.com/patent/KR102637029B1/en,
-US-2024292070-A1,Iterative ai prompt optimization,"Loop Now Technologies, Inc.","Wu-Hsi Li, Edwin Chiu",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh screening"
-
-[[checks]]
-name = "screening_invoked"
-type = "script"
-command = "check-skill-invoked.sh screening"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "screened_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>=2' 'SELECT COUNT(*) FROM screened_patents;'"
diff --git a/agents/skill-bench/cases/screening/functional-resume-screening.toml b/agents/skill-bench/cases/screening/functional-resume-screening.toml
deleted file mode 100644
index a7dd3b9..0000000
--- a/agents/skill-bench/cases/screening/functional-resume-screening.toml
+++ /dev/null
@@ -1,112 +0,0 @@
-# Test Case: Screening - Resume Capability
-
-name = "functional-resume-screening"
-description = "Verify screening skill can resume from partial screening without duplicates"
-timeout = 300                                                                               # seconds
-
-test_prompt = """
-I have already imported 3 patents into the database and partially screened them.
-
-Patent KR-102637029-B1 has already been screened and marked as 'relevant'.
-
-Please:
-1. Resume screening for the remaining unscreened patents
-2. Use parallel processing to speed up the work
-3. Ensure no duplicate screening entries are created
-4. Report the final screening status
-
-The product specification is in specification.md.
-Target domain: LLM-based multi-turn chatbot systems with RAG capabilities.
-"""
-
-[[setup]]
-path = "import-csv.sql"
-content = """
-INSERT OR IGNORE INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date, grant_date, extra_fields)
-VALUES
-    ('KR102637029B1', 'Device for Generating Multi-turn Chat Bot Data Using LLM', '주식회사 마인즈앤컴퍼니', 'KR', '2024-02-15', '2023-10-11', '2024-02-15', '{"source": "test"}'),
-    ('US20240292070A1', 'Iterative AI prompt optimization', 'Loop Now Technologies, Inc.', 'US', '2024-08-29', '2024-04-10', NULL, '{"source": "test"}'),
-    ('CN123456789A1', 'Medical Image Analysis System', 'Beijing Medical Tech', 'CN', '2024-01-20', '2023-05-10', NULL, '{"source": "test"}');
-"""
-
-[[setup]]
-path = "pre-screened-data.sql"
-content = """
-INSERT INTO screened_patents (patent_id, judgment, reason, abstract_text, screened_at)
-VALUES (
-    'KR102637029B1',
-    'relevant',
-    'Directly relates to LLM-based multi-turn chatbot data generation',
-    'Device for Generating Multi-turn Chat Bot Data Using LLM',
-    datetime('now')
-);
-"""
-
-[[setup]]
-name = "init_db"
-type = "script"
-command = "setup-db.sh init"
-
-[[setup]]
-name = "import_csv_data"
-type = "script"
-command = "setup-db.sh execute import-csv.sql"
-
-[[setup]]
-name = "load_pre_screened_data"
-type = "script"
-command = "setup-db.sh execute pre-screened-data.sql"
-
-[[setup]]
-path = "specification.md"
-content = """
-# Product Specification
-
-**Product/Technology**:
-LLM-based multi-turn chatbot system with RAG (Retrieval-Augmented Generation) capabilities.
-
-**Key Technical Features**:
-
-1. LLM-driven multi-turn conversation management
-2. Vector database integration for retrieval-augmented generation
-3. Automatic quality assurance for information retrieval and intent detection
-
-**Target Release Date**: 2025-12-31
-
-**Priority Date Cutoff**: 2020-01-01
-"""
-
-[[setup]]
-path = "test-patents.csv"
-content = """
-search URL:,https://patents.google.com/?q=llm
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-KR-102637029-B1,Device for Generating Multi-turn Chat Bot Data Using LLM,주식회사 마인즈앤컴퍼니,"고석태, 백영상",2023-10-11,2023-10-11,2024-02-15,2024-02-15,https://patents.google.com/patent/KR102637029B1/en,
-US-2024292070-A1,Iterative ai prompt optimization,"Loop Now Technologies, Inc.","Wu-Hsi Li, Edwin Chiu",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,
-CN-123456789-A1,Medical Image Analysis System,"Beijing Medical Tech","Zhang San, Li Si",2023-05-10,2023-05-10,2024-01-20,,https://patents.google.com/patent/CN123456789A1/en,
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh screening"
-
-[[checks]]
-name = "screening_invoked"
-type = "script"
-command = "check-skill-invoked.sh screening"
-
-[[checks]]
-name = "agent_used"
-type = "script"
-command = "check-tool-use.sh Agent"
-
-[[checks]]
-name = "screened_stored"
-type = "script"
-command = "check-db-query.sh {} {} '>=3' 'SELECT COUNT(*) FROM screened_patents;'"
-
-[[checks]]
-name = "no_duplicates"
-type = "script"
-command = "check-db-query.sh {} {} '=1' 'SELECT COUNT(*) FROM screened_patents WHERE patent_id = \"KR102637029B1\";'"
diff --git a/agents/skill-bench/cases/screening/triggering.toml b/agents/skill-bench/cases/screening/triggering.toml
deleted file mode 100644
index 553f74f..0000000
--- a/agents/skill-bench/cases/screening/triggering.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Test Case: Screening - Triggering
-
-name = "triggering"
-description = "Verify screening skill is triggered when asked to screen patents"
-timeout = 60
-
-test_prompt = """
-I have a database of collected patents. Now I need to screen them to remove noise and filter by relevance.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh screening"
-
-[[checks]]
-name = "screening_invoked"
-type = "script"
-command = "check-skill-invoked.sh screening"
diff --git a/agents/skill-bench/cases/targeting/functional-with-data.toml b/agents/skill-bench/cases/targeting/functional-with-data.toml
deleted file mode 100644
index ffd1fce..0000000
--- a/agents/skill-bench/cases/targeting/functional-with-data.toml
+++ /dev/null
@@ -1,71 +0,0 @@
-# Test Case: Targeting Functional (with CSV data)
-
-name = "functional-with-data"
-description = "Verify targeting process with pre-downloaded CSV data"
-timeout = 600                                                         # seconds
-
-test_prompt = """
-I have placed downloaded CSV files in `patents.csv`.
-Please create a target population based on the specification.md.
-"""
-
-[[setup]]
-path = "specification.md"
-content = """
-# Product Specification
-
-**Product/Technology**:
-LLM-based multi-turn chatbot system with RAG (Retrieval-Augmented Generation) capabilities.
-
-**Background**:
-Current chatbots struggle with context awareness and factual accuracy in multi-turn conversations. This system combines LLM with vector database retrieval to provide accurate, context-aware responses.
-
-**Key Technical Features**:
-
-1. LLM-driven multi-turn conversation management
-2. Vector database integration for retrieval-augmented generation
-3. Automatic quality assurance for information retrieval and intent detection
-4. Iterative AI prompt optimization for various applications (video generation, etc.)
-
-**Target Release Date**: 2025-12-31
-
-**Priority Date Cutoff**: 2020-01-01
-
-**Competitors**:
-
-- Google
-- Microsoft
-- OpenAI
-
-**Target Market**:
-US and Korea markets, focusing on enterprise customer service and conversational AI applications.
-"""
-
-[[setup]]
-path = "patents.csv"
-content = """
-id,family_id,title,abstract_text,publication_date,country
-US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US
-US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US
-US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh targeting"
-
-[[checks]]
-name = "targeting_invoked"
-type = "script"
-command = "check-skill-invoked.sh targeting"
-
-[[checks]]
-name = "database_created"
-type = "script"
-command = "check-workspace-file.sh patents.db"
-
-[[checks]]
-name = "csv_imported"
-type = "script"
-command = "check-db-query.sh {} {} '3' 'SELECT COUNT(*) FROM target_patents;'"
diff --git a/agents/skill-bench/cases/targeting/functional-with-spec.toml b/agents/skill-bench/cases/targeting/functional-with-spec.toml
deleted file mode 100644
index 60ffa9b..0000000
--- a/agents/skill-bench/cases/targeting/functional-with-spec.toml
+++ /dev/null
@@ -1,86 +0,0 @@
-# Test Case: Targeting Functional (with existing specification)
-
-name = "functional-with-spec"
-description = "Verify targeting process with existing specification"
-timeout = 600                                                        # seconds
-
-test_prompt = """
-I have placed an invention specification in `specification.md`. Please create a target population and run the patent search for a 2025 product release.
-
-Before asking me any questions, please use the question-responder skill to check if the required information is already available. Then proceed with the targeting process.
-"""
-
-[answers]
-"modifying keywords" = "Looks good, proceed to search."
-"synonyms" = "Looks good, proceed to search."
-"adjust query" = "Looks good, proceed to search."
-"hit counts" = "The count is acceptable, proceed to merge."
-"acceptable" = "The count is acceptable, proceed to merge."
-"~1000 hits" = "The count is acceptable, proceed to merge."
-
-[[setup]]
-path = "specification.md"
-content = """
-# Specification Dummy
-
-**Product/Technology**:
-Solar-powered auto-cleaning cat litter box with IoT notifications.
-
-**Background**:
-Current cat litter boxes require manual scooping and frequent bag changes, which leads to odor and hygiene issues.
-
-**Key Technical Features**:
-
-1. A solar panel integrated into the top hood that charges an internal battery.
-2. A rotating internal drum that separates solid waste into a sealed compartment.
-3. An IoT module (Wi-Fi) that sends push notifications to a smartphone when the waste compartment is full.
-
-**Target Release Date**: 2025-12-31
-
-**Priority Date Cutoff**: 2015-01-01
-
-**Competitors**:
-
-- Litter-Robot (AutoPets, LLC)
-- CatGenie (PetNovations, Ltd.)
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh targeting"
-
-[[checks]]
-name = "targeting_invoked"
-type = "script"
-command = "check-skill-invoked.sh targeting"
-
-[[checks]]
-name = "concept_interview_not_invoked"
-type = "script"
-command = "check-skill-not-invoked.sh concept-interviewing"
-
-[[checks]]
-name = "targeting_template_read"
-type = "script"
-command = "check-tool-use.sh Read file_path \"targeting-template.md\""
-
-[[checks]]
-name = "keywords_template_read"
-type = "script"
-command = "check-tool-use.sh Read file_path \"keywords-template.md\""
-
-[[checks]]
-name = "targeting_md_created"
-type = "script"
-command = "check-workspace-file.sh targeting.md"
-
-[[checks]]
-name = "keywords_md_created"
-type = "script"
-command = "check-workspace-file.sh keywords.md"
-
-[[checks]]
-name = "google_patent_search_invoked"
-type = "script"
-command = "check-skill-invoked.sh google-patent-cli:patent-search"
diff --git a/agents/skill-bench/cases/targeting/triggering.toml b/agents/skill-bench/cases/targeting/triggering.toml
deleted file mode 100644
index 57cf46d..0000000
--- a/agents/skill-bench/cases/targeting/triggering.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Test Case: Targeting - Triggering
-
-name = "triggering"
-description = "Verify targeting skill is triggered when asked to execute targeting"
-timeout = 60
-
-test_prompt = """
-I have a product concept. Now I need to create a target population for patent searching.
-"""
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh targeting"
-
-[[checks]]
-name = "targeting_invoked"
-type = "script"
-command = "check-skill-invoked.sh targeting"
diff --git a/agents/skill-bench/harness-plugin/.claude-plugin/plugin.json b/agents/skill-bench/harness-plugin/.claude-plugin/plugin.json
deleted file mode 100644
index 97b0c7b..0000000
--- a/agents/skill-bench/harness-plugin/.claude-plugin/plugin.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "name": "skill-bench-harness",
-  "version": "0.1.0",
-  "description": "Test harness utilities for skill-bench testing framework",
-  "author": {
-    "name": "sonesuke"
-  },
-  "mcpServers": {}
-}
diff --git a/agents/skill-bench/harness-plugin/cases/basic.toml b/agents/skill-bench/harness-plugin/cases/basic.toml
deleted file mode 100644
index 571c480..0000000
--- a/agents/skill-bench/harness-plugin/cases/basic.toml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Test Case: Question Responder - Basic Functionality
-
-name = "basic"
-description = "Verify question-responder finds answers from test case"
-timeout = 60
-
-test_prompt = """
-I need to ask: What are the product concept, target country, release date, and competitors for the patent search? Please use the question-responder skill to find the answer.
-"""
-
-[answers]
-"product concept" = "Voice recognition system for smart home devices with real-time transcription and noise-resistant recognition"
-"target country" = "US"
-"country" = "US"
-"release date" = "2025-06-01"
-"date" = "2025-06-01"
-"competitors" = ["Google", "Amazon"]
-
-[[checks]]
-name = "skill_loaded"
-type = "script"
-command = "check-skill-loaded.sh skill-bench-harness:question-responder"
-
-[[checks]]
-name = "question_responder_invoked"
-type = "script"
-command = "check-skill-invoked.sh question-responder"
-
-[[checks]]
-name = "answer_contains_us"
-type = "script"
-command = "check-output-contains.sh US"
-
-[[checks]]
-name = "answer_contains_google"
-type = "script"
-command = "check-output-contains.sh Google"
diff --git a/agents/skill-bench/harness-plugin/skills/question-responder/SKILL.md b/agents/skill-bench/harness-plugin/skills/question-responder/SKILL.md
deleted file mode 100644
index 13ee4e0..0000000
--- a/agents/skill-bench/harness-plugin/skills/question-responder/SKILL.md
+++ /dev/null
@@ -1,91 +0,0 @@
----
-name: question-responder
-description: "Find answers to common questions from project context before asking the user. Always use this skill when you need information like competitors, target country, or release date."
-context: fork
-metadata:
-  author: sonesuke
-  version: 0.1.0
----
-
-# Question Responder
-
-## Purpose
-
-Before asking the user questions, check if answers can be found in project context.
-
-## When to Use
-
-Use this skill whenever you need to ask the user a question. First check if the answer exists in:
-
-- Project documentation
-- Test data files
-- Configuration files
-
-## Process
-
-1. **Get Test Case Path**: Read the `SKILL_BENCH_TEST_CASE` environment variable to get the current test case file path
-
-2. **Read Test Case**: Use the Read tool to read the test case TOML file from the path
-
-3. **Find Answers**: Look for `[answers]` section in the test case file
-   - This section contains keyword-answer pairs
-
-4. **Match Question**: Check if the input question contains keywords from the `[answers]` keys
-   - For example, if the question is "Who are the competitors?", look for keys like "competitors"
-   - Use substring or keyword matching
-
-5. **Return Result**:
-   - **If answer found**: Return the answer value from the matched key
-   - **If not found**: Return "ANSWER_NOT_FOUND" to indicate user input is needed
-
-## Implementation Notes
-
-- The test case path is provided via `SKILL_BENCH_TEST_CASE` environment variable
-- Access environment variables using the Bash tool: `echo $SKILL_BENCH_TEST_CASE`
-- Match questions using flexible keyword matching (e.g., "competitors" matches "Who are the competitors?")
-
-## Test Case Format
-
-Answers are embedded directly in the test case TOML file using the `[answers]` section:
-
-```toml
-# Test Case: Concept Interview - Uses Question Responder
-name = "uses-question-responder"
-description = "Verify concept-interview uses question-responder when information is missing"
-timeout = 180
-
-test_prompt = """
-I want to start a patent search for a new voice recognition system...
-"""
-
-[answers]
-"competitors" = ["Google", "Amazon"]
-"target country" = "US"
-"release date" = "2025-06-01"
-"country" = "US"
-"date" = "2025-06-01"
-```
-
-The `[answers]` section contains keyword-value pairs that match common questions.
-
-## Context Isolation
-
-This skill uses `context: fork` to run in an isolated sub-agent context. This ensures:
-
-- The main AI agent doesn't see the answer files
-- Test integrity is maintained
-- Answers are only revealed when explicitly requested
-
-## Usage Example
-
-```yaml
-# Instead of:
-AskUserQuestion:
-  questions:
-    - question: "What is the target country?"
-
-# Use:
-Skill:
-  skill: question-responder
-  args: "What is the target country for patent search?"
-```
diff --git a/agents/skill-bench/harness-plugin/skills/question-responder/assets/.gitkeep b/agents/skill-bench/harness-plugin/skills/question-responder/assets/.gitkeep
deleted file mode 100644
index 557f538..0000000
--- a/agents/skill-bench/harness-plugin/skills/question-responder/assets/.gitkeep
+++ /dev/null
@@ -1 +0,0 @@
-# Assets directory for question-responder skill
diff --git a/agents/skill-bench/runner.sh b/agents/skill-bench/runner.sh
deleted file mode 100755
index ad874d5..0000000
--- a/agents/skill-bench/runner.sh
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/bin/bash
-# agents/skill-bench/runner.sh
-# Skill test runner for patent-kit.
-# All execution happens inside the container.
-#
-# Usage: ./runner.sh [pattern]
-#   pattern:  Glob pattern to match test files (default: "cases/*/*.toml")
-
-set -o pipefail
-
-# Determine workspace root
-WORKSPACE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
-# Determine skill-bench root
-SKILL_BENCH_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-# Determine tools directory
-TOOLS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/tools"
-# Resolve pattern relative to skill-bench root
-PATTERN="${1:-cases/*/*.toml}"
-# Convert to absolute path
-if [[ "$PATTERN" != /* ]]; then
-    TARGET_PATTERN="$SKILL_BENCH_ROOT/$PATTERN"
-else
-    TARGET_PATTERN="$PATTERN"
-fi
-
-echo "=================================================="
-echo "[SkillBench] Starting Skill Test Runner"
-echo "[SkillBench] Workspace: $WORKSPACE_ROOT"
-echo "[SkillBench] Pattern: $TARGET_PATTERN"
-echo "=================================================="
-
-TOTAL_CASES=0
-TOTAL_PASS=0
-TOTAL_FAIL=0
-
-# --- Collect test files matching pattern ---
-TEST_FILES=()
-TEST_SKILLS=()
-TEST_NAMES=()
-
-for TEST_FILE in $TARGET_PATTERN; do
-    [ -f "$TEST_FILE" ] || continue
-
-    TEST_FILE_REL="${TEST_FILE#$WORKSPACE_ROOT/}"
-    SKILL_NAME=$(basename "$(dirname "$TEST_FILE_REL")")
-    TEST_NAME=$(basename "$TEST_FILE" .toml)
-
-    TEST_FILES+=("$TEST_FILE")
-    TEST_SKILLS+=("$SKILL_NAME")
-    TEST_NAMES+=("$TEST_NAME")
-done
-
-if [ ${#TEST_FILES[@]} -eq 0 ]; then
-    echo "[SkillBench] No test files found matching pattern: $TARGET_PATTERN"
-    exit 1
-fi
-
-# --- Process each test file ---
-for IDX in "${!TEST_FILES[@]}"; do
-    TEST_FILE="${TEST_FILES[$IDX]}"
-    SKILL_NAME="${TEST_SKILLS[$IDX]}"
-    TEST_NAME="${TEST_NAMES[$IDX]}"
-    TEST_CASE_NAME="${SKILL_NAME}/${TEST_NAME}"
-    TOTAL_CASES=$((TOTAL_CASES + 1))
-
-    # Read test configuration
-    TEST_PROMPT=$(yq eval '.test_prompt' "$TEST_FILE")
-    TEST_TIMEOUT=$(yq eval '.timeout // 300' "$TEST_FILE")
-
-    echo ""
-    echo "──────────────────────────────────────────────────"
-    echo "[SkillBench] Test Case: $TEST_CASE_NAME"
-    echo "──────────────────────────────────────────────────"
-
-    # --- Phase 1: Setup and Execute trial ---
-    # Create skill-specific log directory
-    LOG_DIR="$SKILL_BENCH_ROOT/logs/${SKILL_NAME}"
-    mkdir -p "$LOG_DIR"
-
-    TIMESTAMP=$(date +%Y%m%d_%H%M%S)
-    LOG_FILE="$LOG_DIR/${TIMESTAMP}_${TEST_NAME}.log"
-    WORK_DIR="/tmp/skill-bench-${TIMESTAMP}_${SKILL_NAME}-${TEST_NAME}"
-
-    # Setup workspace
-    echo "[SkillBench]   📦 Setting up workspace: $WORK_DIR"
-    rm -rf "${WORK_DIR}"
-    mkdir -p "${WORK_DIR}"
-
-    # Copy plugin directory as claude-plugin (required for skill testing)
-    # patent-kit uses 'plugin/' while google-patent-cli uses 'claude-plugin/'
-    cp -r "$WORKSPACE_ROOT/plugin" "$WORK_DIR/claude-plugin" 2>/dev/null || true
-
-    # Read setup files from test.toml [[setup]] array
-    NUM_SETUP=$(yq eval '.setup | length // 0' "$TEST_FILE")
-    if [ "$NUM_SETUP" -gt 0 ]; then
-        for SETUP_IDX in $(seq 0 $((NUM_SETUP - 1))); do
-            SETUP_TYPE=$(yq eval ".setup[$SETUP_IDX].type // \"\"" "$TEST_FILE")
-            SETUP_NAME=$(yq eval ".setup[$SETUP_IDX].name // \"\"" "$TEST_FILE")
-
-            if [ "$SETUP_TYPE" = "script" ]; then
-                # Execute script setup
-                SETUP_COMMAND=$(yq eval ".setup[$SETUP_IDX].command" "$TEST_FILE")
-                if [ -n "$SETUP_NAME" ]; then
-                    echo "[SkillBench]   → Setup: $SETUP_NAME"
-                else
-                    echo "[SkillBench]   → Setup: $SETUP_COMMAND"
-                fi
-
-                # Execute in WORK_DIR with tools in PATH
-                (cd "$WORK_DIR" && PATH="$TOOLS_DIR:$PATH" bash -c "$SETUP_COMMAND")
-            else
-                # File content setup (default behavior)
-                SETUP_PATH=$(yq eval ".setup[$SETUP_IDX].path" "$TEST_FILE")
-                if [ -z "$SETUP_PATH" ]; then
-                    echo "[SkillBench]   ⚠️  Skipping setup with no path (index $SETUP_IDX)"
-                    continue
-                fi
-
-                SETUP_DIR=$(dirname "$WORK_DIR/$SETUP_PATH")
-                mkdir -p "$SETUP_DIR"
-                yq eval ".setup[$SETUP_IDX].content" "$TEST_FILE" > "$WORK_DIR/${SETUP_PATH}"
-
-                if [ -n "$SETUP_NAME" ]; then
-                    echo "[SkillBench]   → Setup: $SETUP_NAME ($SETUP_PATH)"
-                fi
-            fi
-        done
-    fi
-
-    # Execute trial
-    echo "[SkillBench]   Running trial → $LOG_FILE"
-    START_TIME=$(date +%s)
-
-    # Unset CLAUDECODE to avoid nested session error
-    (cd "$WORK_DIR" && unset CLAUDECODE && SKILL_BENCH_TEST_CASE="$TEST_FILE" claude -p \
-        --dangerously-skip-permissions \
-        --verbose \
-        --output-format stream-json \
-        --plugin-dir ./claude-plugin \
-        --plugin-dir "$WORKSPACE_ROOT/agents/skill-bench/harness-plugin" \
-        -- "$TEST_PROMPT" < /dev/null | jq -c '(. + {timestamp: now})') > "$LOG_FILE" 2>&1
-
-    EXIT_CODE=$?
-    END_TIME=$(date +%s)
-    DURATION=$(( END_TIME - START_TIME ))
-
-    if [ $EXIT_CODE -eq 0 ]; then
-        echo "[SkillBench]   ✅ Trial finished (took ${DURATION}s)"
-    else
-        echo "[SkillBench]   ⚠️  Trial exited with code $EXIT_CODE (took ${DURATION}s)"
-    fi
-
-    # --- Phase 2: Evaluate trial ---
-    echo "[SkillBench]   Running evaluation..."
-
-    CASE_PASS=true
-
-    # Run checks from test.toml
-    NUM_CHECKS=$(yq eval '.checks | length' "$TEST_FILE")
-    for CHECK_IDX in $(seq 0 $((NUM_CHECKS - 1))); do
-        CHECK_NAME=$(yq eval ".checks[$CHECK_IDX].name" "$TEST_FILE")
-        CHECK_CMD=$(yq eval ".checks[$CHECK_IDX].command" "$TEST_FILE")
-
-        # Parse check command into script and args
-        # Use eval to properly handle quoted arguments
-        CHECK_SCRIPT=$(echo "$CHECK_CMD" | awk '{print $1}')
-        CHECK_ARGS=$(echo "$CHECK_CMD" | cut -d' ' -f2-)
-
-        if [ -n "$CHECK_ARGS" ]; then
-            # Command has arguments: script.sh arg1 arg2...
-            # Use eval to properly expand quoted arguments
-            # Replace {} placeholders with LOG_FILE and WORK_DIR (if present)
-            if echo "$CHECK_ARGS" | grep -q '{}'; then
-                CHECK_ARGS=$(echo "$CHECK_ARGS" | sed 's|{}|"'$LOG_FILE'"|1' | sed 's|{}|"'$WORK_DIR'"|1')
-                # {} placeholders were replaced, don't pass LOG_FILE and WORK_DIR again
-                if eval "\"$TOOLS_DIR/$CHECK_SCRIPT\" $CHECK_ARGS" >/dev/null 2>&1; then
-                    echo "[SkillBench]     ✅ $CHECK_NAME"
-                else
-                    echo "[SkillBench]     ❌ $CHECK_NAME"
-                    CASE_PASS=false
-                fi
-            else
-                # No {} placeholders, pass LOG_FILE and WORK_DIR as first two args
-                if eval "\"$TOOLS_DIR/$CHECK_SCRIPT\" \"$LOG_FILE\" \"$WORK_DIR\" $CHECK_ARGS" >/dev/null 2>&1; then
-                    echo "[SkillBench]     ✅ $CHECK_NAME"
-                else
-                    echo "[SkillBench]     ❌ $CHECK_NAME"
-                    CASE_PASS=false
-                fi
-            fi
-        else
-            # Command has no arguments: script.sh
-            # Still pass LOG_FILE and WORK_DIR
-            if $TOOLS_DIR/$CHECK_SCRIPT "$LOG_FILE" "$WORK_DIR" >/dev/null 2>&1; then
-                echo "[SkillBench]     ✅ $CHECK_NAME"
-            else
-                echo "[SkillBench]     ❌ $CHECK_NAME"
-                CASE_PASS=false
-            fi
-        fi
-    done
-
-    # Display case result
-    if [ "$CASE_PASS" = true ]; then
-        echo "[SkillBench]   ✅ $TEST_CASE_NAME: PASS"
-        TOTAL_PASS=$((TOTAL_PASS + 1))
-    else
-        echo "[SkillBench]   ❌ $TEST_CASE_NAME: FAIL"
-        TOTAL_FAIL=$((TOTAL_FAIL + 1))
-    fi
-done
-
-# --- Summary ---
-echo ""
-echo "=================================================="
-echo "[SkillBench] Test Summary"
-echo "[SkillBench] Total: $TOTAL_CASES | Pass: $TOTAL_PASS | Fail: $TOTAL_FAIL"
-echo "=================================================="
-
-exit "$TOTAL_FAIL"
diff --git a/agents/skill-bench/tools/check-db-query.sh b/agents/skill-bench/tools/check-db-query.sh
deleted file mode 100755
index dde6114..0000000
--- a/agents/skill-bench/tools/check-db-query.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env bash
-# Check database query result
-# Usage: check-db-query.sh <log_file> <work_dir> <expected_result> <query>
-# Note: query comes last to avoid issues with special characters
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-EXPECTED="$3"
-QUERY="$4"
-
-cd "$WORK_DIR" || exit 1
-
-if [ -f "patents.db" ]; then
-    RESULT=$(sqlite3 patents.db "$QUERY" 2>/dev/null | tr -d '\n')
-    # Handle numeric comparisons like '>0', '<5', '=10'
-    if [[ "$EXPECTED" =~ ^([<>]=?|=)([0-9]+)$ ]]; then
-        OP="${BASH_REMATCH[1]}"
-        NUM="${BASH_REMATCH[2]}"
-        if [ "$OP" = ">" ] && [ "$RESULT" -gt "$NUM" ]; then
-            exit 0
-        elif [ "$OP" = ">=" ] && [ "$RESULT" -ge "$NUM" ]; then
-            exit 0
-        elif [ "$OP" = "<" ] && [ "$RESULT" -lt "$NUM" ]; then
-            exit 0
-        elif [ "$OP" = "<=" ] && [ "$RESULT" -le "$NUM" ]; then
-            exit 0
-        elif [ "$OP" = "=" ] && [ "$RESULT" -eq "$NUM" ]; then
-            exit 0
-        fi
-    elif [ "$RESULT" = "$EXPECTED" ]; then
-        exit 0
-    fi
-fi
-exit 1
diff --git a/agents/skill-bench/tools/check-file-content.sh b/agents/skill-bench/tools/check-file-content.sh
deleted file mode 100755
index cf862d0..0000000
--- a/agents/skill-bench/tools/check-file-content.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# check-file-content.sh - Check if a workspace file contains specific content
-# Usage: check-file-content.sh <log_file> <work_dir> <filename> <search_string>
-
-LOG_FILE="${1:-}"
-WORK_DIR="${2:-}"
-FILENAME="${3:-}"
-SEARCH_STRING="${4:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$WORK_DIR" ] || [ -z "$FILENAME" ] || [ -z "$SEARCH_STRING" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <filename> <search_string>" >&2
-    exit 1
-fi
-
-# Check if the file exists in the workspace
-FILE_PATH="$WORK_DIR/$FILENAME"
-
-if [ ! -f "$FILE_PATH" ]; then
-    # File doesn't exist - failure
-    exit 1
-fi
-
-# Check if the file contains the search string
-if grep -q "$SEARCH_STRING" "$FILE_PATH"; then
-    # File contains the string - success
-    exit 0
-else
-    # File doesn't contain the string - failure
-    exit 1
-fi
diff --git a/agents/skill-bench/tools/check-file-not-contains.sh b/agents/skill-bench/tools/check-file-not-contains.sh
deleted file mode 100755
index 67ac7a3..0000000
--- a/agents/skill-bench/tools/check-file-not-contains.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# check-file-not-contains.sh - Check if a workspace file does NOT contain specific content
-# Usage: check-file-not-contains.sh <log_file> <work_dir> <filename> <search_string>
-
-LOG_FILE="${1:-}"
-WORK_DIR="${2:-}"
-FILENAME="${3:-}"
-SEARCH_STRING="${4:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$WORK_DIR" ] || [ -z "$FILENAME" ] || [ -z "$SEARCH_STRING" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <filename> <search_string>" >&2
-    exit 1
-fi
-
-# Check if the file exists in the workspace
-FILE_PATH="$WORK_DIR/$FILENAME"
-
-if [ ! -f "$FILE_PATH" ]; then
-    # File doesn't exist - treat as not containing (success for this check)
-    exit 0
-fi
-
-# Check if the file does NOT contain the search string
-if grep -q "$SEARCH_STRING" "$FILE_PATH"; then
-    # File contains the string - failure (we expect it NOT to)
-    exit 1
-else
-    # File doesn't contain the string - success
-    exit 0
-fi
diff --git a/agents/skill-bench/tools/check-log-contains.sh b/agents/skill-bench/tools/check-log-contains.sh
deleted file mode 100755
index bbc58a3..0000000
--- a/agents/skill-bench/tools/check-log-contains.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-# Check if log file contains a specific pattern
-# Usage: check-log-contains.sh <log_file> <pattern>
-
-LOG_FILE="$1"
-PATTERN="$2"
-
-if [ ! -f "$LOG_FILE" ]; then
-    echo "Error: Log file not found: $LOG_FILE"
-    exit 1
-fi
-
-if grep -q "$PATTERN" "$LOG_FILE"; then
-    exit 0
-else
-    exit 1
-fi
diff --git a/agents/skill-bench/tools/check-mcp-loaded.sh b/agents/skill-bench/tools/check-mcp-loaded.sh
deleted file mode 100755
index 31f6257..0000000
--- a/agents/skill-bench/tools/check-mcp-loaded.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-# Check if MCP server loaded successfully in a log file
-# Usage: check-mcp-loaded.sh <log_file> <work_dir> <mcp_server_name>
-# Returns: 0 if MCP server loaded successfully, 1 if failed or not found
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-MCP_SERVER_NAME="$3"
-
-if [[ -z "$LOG_FILE" ]] || [[ -z "$MCP_SERVER_NAME" ]]; then
-  echo "Usage: $0 <log_file> <work_dir> <mcp_server_name>" >&2
-  exit 2
-fi
-
-if [[ ! -f "$LOG_FILE" ]]; then
-  echo "Log file not found: $LOG_FILE" >&2
-  exit 2
-fi
-
-# Check MCP server status in init message (first line is init message)
-# The mcp_servers array contains objects with name and status fields
-STATUS=$(head -1 "$LOG_FILE" | jq -r '
-  .mcp_servers? // []
-  | .[] | select(.name? | test("'"$MCP_SERVER_NAME"'"))
-  | .status // "not_found"
-')
-
-if [[ "$STATUS" == "not_found" ]]; then
-    echo "MCP server $MCP_SERVER_NAME not found in log" >&2
-    exit 1
-fi
-
-if [[ "$STATUS" == "failed" ]]; then
-    echo "MCP server $MCP_SERVER_NAME failed to load (status: failed)" >&2
-    exit 1
-fi
-
-exit 0
diff --git a/agents/skill-bench/tools/check-mcp-success.sh b/agents/skill-bench/tools/check-mcp-success.sh
deleted file mode 100755
index f6b9db4..0000000
--- a/agents/skill-bench/tools/check-mcp-success.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/bash
-# Check if MCP tool calls succeeded in a log file
-# Usage: check-mcp-success.sh <log_file> <work_dir> <mcp_tool_name> [--optional]
-#   --optional: If no MCP calls are made, return success (default: fail)
-# Returns: 0 if all MCP calls succeeded (or none made with --optional), 1 if any failed
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-MCP_TOOL_NAME="$3"
-OPTIONAL_FLAG="${4:-}"
-
-if [[ -z "$LOG_FILE" ]] || [[ -z "$MCP_TOOL_NAME" ]]; then
-  echo "Usage: $0 <log_file> <work_dir> <mcp_tool_name> [--optional]" >&2
-  exit 2
-fi
-
-if [[ ! -f "$LOG_FILE" ]]; then
-  echo "Log file not found: $LOG_FILE" >&2
-  exit 2
-fi
-
-# Extract tool_use IDs for the specified MCP tool from assistant messages
-TOOL_USE_IDS=$(jq -r '
-  .[]
-  | select(.type? == "assistant")
-  | (.message.content? // [])
-  | select(type == "array")
-  | .[]
-  | select(type == "object" and .type? == "tool_use" and (.name? // "") | test("'"$MCP_TOOL_NAME"'"))
-  | .id
-' "$LOG_FILE")
-
-# Count how many tool_use IDs we found
-ID_COUNT=$(echo "$TOOL_USE_IDS" | grep -c '^\w*$' || true)
-
-# If no MCP calls were made
-if [[ $ID_COUNT -eq 0 ]]; then
-  if [[ "$OPTIONAL_FLAG" == "--optional" ]]; then
-    # Optional check: return success if no calls were made
-    exit 0
-  else
-    # Required check: return failure if no calls were made
-    echo "No $MCP_TOOL_NAME tool calls found in log" >&2
-    exit 1
-  fi
-fi
-
-# Check if any of the corresponding tool_results have is_error: true
-while IFS= read -r tool_id; do
-  if [[ -n "$tool_id" ]]; then
-    ERROR_CHECK=$(jq -r "
-      .[]
-      | select(.type? == \"user\")
-      | (.message.content? // [])
-      | select(type == \"array\")
-      | .[]
-      | select(type == \"object\" and .type? == \"tool_result\" and .tool_use_id? == \"$tool_id\")
-      | .is_error // false
-    " "$LOG_FILE")
-
-    if [[ "$ERROR_CHECK" == "true" ]]; then
-      echo "MCP tool $MCP_TOOL_NAME (tool_use_id: $tool_id) returned an error" >&2
-      exit 1
-    fi
-  fi
-done <<< "$TOOL_USE_IDS"
-
-# All MCP calls succeeded
-exit 0
diff --git a/agents/skill-bench/tools/check-mcp-tool-invoked.sh b/agents/skill-bench/tools/check-mcp-tool-invoked.sh
deleted file mode 100755
index 700b37b..0000000
--- a/agents/skill-bench/tools/check-mcp-tool-invoked.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# check-mcp-tool-invoked.sh - Check if a specific MCP tool was invoked
-# Usage: check-mcp-tool-invoked.sh <log_file> <work_dir> <mcp_tool_name>
-#   mcp_tool_name: e.g., "search_patents", "fetch_patent", etc.
-
-LOG_FILE="${1:-}"
-WORK_DIR="${2:-}"
-TOOL_NAME="${3:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$TOOL_NAME" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <mcp_tool_name>" >&2
-    exit 1
-fi
-
-# Check if the MCP tool was invoked in the log
-# MCP tools appear as: "name":"mcp__plugin_xxx__tool_name"
-if grep -q '"name":"mcp__'" "$LOG_FILE" && grep -q '"name":"mcp__.*__'"$TOOL_NAME"'"' "$LOG_FILE"; then
-    # Tool was invoked - success
-    exit 0
-else
-    # Tool was not invoked - failure
-    exit 1
-fi
diff --git a/agents/skill-bench/tools/check-output-contains.sh b/agents/skill-bench/tools/check-output-contains.sh
deleted file mode 100755
index 5b26e3d..0000000
--- a/agents/skill-bench/tools/check-output-contains.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-# check-output-contains.sh - Check if the agent output contains a specific string
-# Usage: check-output-contains.sh <log_file> <work_dir> <search_string>
-
-LOG_FILE="${1:-}"
-WORK_DIR="${2:-}"
-SEARCH_STRING="${3:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$SEARCH_STRING" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <search_string>" >&2
-    exit 1
-fi
-
-# Check if the output contains the search string
-# We look for assistant messages in the log
-if grep -q '"type":"assistant"' "$LOG_FILE" && grep -q "content" "$LOG_FILE" && grep -qi "$SEARCH_STRING" "$LOG_FILE"; then
-    exit 0
-else
-    exit 1
-fi
diff --git a/agents/skill-bench/tools/check-output-file.sh b/agents/skill-bench/tools/check-output-file.sh
deleted file mode 100755
index d0a9ebd..0000000
--- a/agents/skill-bench/tools/check-output-file.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-# Check if output_file was created in log
-# Usage: check-output-file.sh <log_file> <work_dir>
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-
-if [ -z "$LOG_FILE" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir>" >&2
-    exit 1
-fi
-
-# Check if output_file exists in tool_result content
-# Use try/catch to handle invalid JSON in content field
-jq -s '[.[] | select(.type == "user") | .message.content[]? | select(type == "object" and .type == "tool_result" and .tool_use_id? and .content? != null and (.content | type) == "string") | .content | try fromjson catch null | select(. != null) | .output_file] | length > 0' "$LOG_FILE"
diff --git a/agents/skill-bench/tools/check-param.sh b/agents/skill-bench/tools/check-param.sh
deleted file mode 100755
index 27ff5b1..0000000
--- a/agents/skill-bench/tools/check-param.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Check if parameter was used in tool call
-# Usage: check-param.sh <log_file> <work_dir> <tool_name> <param_name> <expected_value>
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-TOOL_NAME="$3"
-PARAM_NAME="$4"
-EXPECTED_VALUE="$5"
-
-if [ -z "$LOG_FILE" ] || [ -z "$TOOL_NAME" ] || [ -z "$PARAM_NAME" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <tool_name> <param_name> [expected_value]" >&2
-    exit 1
-fi
-
-if [ -n "$EXPECTED_VALUE" ]; then
-    # Check if parameter equals expected value (handle both string and array)
-    jq -s "[.[] | select(.type == \"assistant\") | .message.content[]? | select(type == \"object\" and .type == \"tool_use\") | select(.name | test(\"$TOOL_NAME\"; \"i\")) | .input.$PARAM_NAME | (if type == \"array\" then .[] == \"$EXPECTED_VALUE\" else . == \"$EXPECTED_VALUE\" end)] | any" "$LOG_FILE"
-else
-    # Check if parameter exists
-    jq -s "[.[] | select(.type == \"assistant\") | .message.content[]? | select(type == \"object\" and .type == \"tool_use\") | select(.name | test(\"$TOOL_NAME\"; \"i\")) | .input.$PARAM_NAME] | length > 0" "$LOG_FILE"
-fi
diff --git a/agents/skill-bench/tools/check-skill-invoked.sh b/agents/skill-bench/tools/check-skill-invoked.sh
deleted file mode 100755
index 304ad3a..0000000
--- a/agents/skill-bench/tools/check-skill-invoked.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-# check-skill-invoked.sh - Check if a specific skill was invoked
-# Usage: check-skill-invoked.sh <log_file> <work_dir> <skill_name> [--not]
-#   --not: Invert the check (verify skill was NOT invoked)
-
-LOG_FILE="${1:-}"
-WORK_DIR="${2:-}"
-SKILL_NAME="${3:-}"
-INVERT="${4:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$SKILL_NAME" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <skill_name> [--not]" >&2
-    exit 1
-fi
-
-# Check if the skill was invoked in the log
-# Note: Log is JSONL format with "name":"Skill" and "skill":"patent-kit:<skill-name>"
-if grep -q '"Skill"' "$LOG_FILE" && grep -q '"skill":".*'"$SKILL_NAME" "$LOG_FILE"; then
-    # Skill was invoked
-    if [ "$INVERT" = "--not" ]; then
-        # We expected it NOT to be invoked, but it was - failure
-        exit 1
-    else
-        # We expected it to be invoked, and it was - success
-        exit 0
-    fi
-else
-    # Skill was not invoked
-    if [ "$INVERT" = "--not" ]; then
-        # We expected it NOT to be invoked, and it wasn't - success
-        exit 0
-    else
-        # We expected it to be invoked, but it wasn't - failure
-        exit 1
-    fi
-fi
diff --git a/agents/skill-bench/tools/check-skill-loaded.sh b/agents/skill-bench/tools/check-skill-loaded.sh
deleted file mode 100755
index 43f37b5..0000000
--- a/agents/skill-bench/tools/check-skill-loaded.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Check if a skill was loaded successfully in a log file
-# Usage: check-skill-loaded.sh <log_file> <work_dir> <skill_name>
-# Returns: 0 if skill found in init skills array, 1 if not found
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-SKILL_NAME="$3"
-
-if [[ -z "$LOG_FILE" ]] || [[ -z "$SKILL_NAME" ]]; then
-    echo "Usage: $0 <log_file> <work_dir> <skill_name>" >&2
-    exit 2
-fi
-
-if [[ ! -f "$LOG_FILE" ]]; then
-    echo "Log file not found: $LOG_FILE" >&2
-    exit 2
-fi
-
-# Check if skill is in the init skills array (first line is init message)
-FOUND=$(head -1 "$LOG_FILE" | jq -c '
-  .skills | any(.[]; contains("'$SKILL_NAME'"))
-')
-
-if [[ "$FOUND" != "true" ]]; then
-    echo "Skill $SKILL_NAME not found in init skills array" >&2
-    exit 1
-fi
-
-exit 0
diff --git a/agents/skill-bench/tools/check-skill-not-invoked.sh b/agents/skill-bench/tools/check-skill-not-invoked.sh
deleted file mode 100755
index 597bd38..0000000
--- a/agents/skill-bench/tools/check-skill-not-invoked.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# check-skill-not-invoked.sh - Check if a specific skill was NOT invoked
-# Usage: check-skill-not-invoked.sh <log_file> <work_dir> <skill_name>
-
-LOG_FILE="${1:-}"
-WORK_DIR="${2:-}"
-SKILL_NAME="${3:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$SKILL_NAME" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <skill_name>" >&2
-    exit 1
-fi
-
-# Check if the skill was NOT invoked in the log
-# Note: Log is JSONL format with "name":"Skill" and "skill":"patent-kit:<skill-name>"
-if grep -q '"Skill"' "$LOG_FILE" && grep -q '"skill":".*'"$SKILL_NAME" "$LOG_FILE"; then
-    # Skill was invoked - we expected it NOT to be - failure
-    exit 1
-else
-    # Skill was not invoked - success
-    exit 0
-fi
diff --git a/agents/skill-bench/tools/check-text-contains.sh b/agents/skill-bench/tools/check-text-contains.sh
deleted file mode 100755
index f059f92..0000000
--- a/agents/skill-bench/tools/check-text-contains.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-# Check if text content contains specific patterns
-# Usage: check-text-contains.sh <log_file> <work_dir> <pattern1> [pattern2] ...
-#   log_file: Path to the log file
-#   work_dir: Path to the workspace directory
-#   pattern: Text pattern to search for (can specify multiple)
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-shift 2
-PATTERNS=("$@")
-
-if [ -z "$LOG_FILE" ] || [ ${#PATTERNS[@]} -eq 0 ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <pattern1> [pattern2] ..." >&2
-    exit 1
-fi
-
-# Check if any pattern is found in assistant text content
-FOUND=false
-for PATTERN in "${PATTERNS[@]}"; do
-    if grep -q "\"text\":\"[^\"]*$PATTERN[^\"]*\"" "$LOG_FILE" 2>/dev/null; then
-        FOUND=true
-        break
-    fi
-done
-
-if [ "$FOUND" = "true" ]; then
-    exit 0
-else
-    exit 1
-fi
diff --git a/agents/skill-bench/tools/check-tool-use.sh b/agents/skill-bench/tools/check-tool-use.sh
deleted file mode 100755
index 1b07873..0000000
--- a/agents/skill-bench/tools/check-tool-use.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Check if a specific tool was called with specific parameters
-# Usage: check-tool-use.sh <log_file> <work_dir> <tool_name> <param_name> <param_pattern>
-#   log_file: Path to the log file
-#   work_dir: Path to the workspace directory
-#   tool_name: Name of the tool to check (e.g., "Read", "Write")
-#   param_name: Name of the parameter to check (e.g., "file_path")
-#   param_pattern: Pattern to match in the parameter value (regex)
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-TOOL_NAME="$3"
-PARAM_NAME="${4:-}"
-PARAM_PATTERN="${5:-}"
-
-if [ -z "$LOG_FILE" ] || [ -z "$TOOL_NAME" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <tool_name> [param_name] [param_pattern]" >&2
-    exit 1
-fi
-
-if [ -n "$PARAM_NAME" ] && [ -n "$PARAM_PATTERN" ]; then
-    # Check if tool was called with specific parameter matching pattern
-    jq -s "[.[] | select(.type == \"assistant\") | .message.content[]? | select(type == \"object\" and .type == \"tool_use\" and .name == \"$TOOL_NAME\") | select(.input.$PARAM_NAME | test(\"$PARAM_PATTERN\"))] | length > 0" "$LOG_FILE"
-elif [ -n "$PARAM_NAME" ]; then
-    # Check if tool was called with specific parameter (any value)
-    jq -s "[.[] | select(.type == \"assistant\") | .message.content[]? | select(type == \"object\" and .type == \"tool_use\" and .name == \"$TOOL_NAME\") | .input.$PARAM_NAME] | length > 0" "$LOG_FILE"
-else
-    # Check if tool was called (any parameters)
-    jq -s "[.[] | select(.type == \"assistant\") | .message.content[]? | select(type == \"object\" and .type == \"tool_use\" and .name == \"$TOOL_NAME\")] | length > 0" "$LOG_FILE"
-fi
diff --git a/agents/skill-bench/tools/check-workspace-dir.sh b/agents/skill-bench/tools/check-workspace-dir.sh
deleted file mode 100755
index b5ac0d3..0000000
--- a/agents/skill-bench/tools/check-workspace-dir.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# Check if directories exist in the workspace
-# Usage: check-workspace-dir.sh <log_file> <work_dir> <dir1> [dir2] ...
-#   log_file: Path to the log file
-#   work_dir: Path to the workspace directory
-#   dir: Directory path to check (can specify multiple, all must exist)
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-shift 2
-DIRS=("$@")
-
-if [ -z "$WORK_DIR" ] || [ ${#DIRS[@]} -eq 0 ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <dir1> [dir2] ..." >&2
-    exit 1
-fi
-
-# Check if all directories exist
-for DIR in "${DIRS[@]}"; do
-    if [ ! -d "$WORK_DIR/$DIR" ]; then
-        exit 1
-    fi
-done
-
-exit 0
diff --git a/agents/skill-bench/tools/check-workspace-file.sh b/agents/skill-bench/tools/check-workspace-file.sh
deleted file mode 100755
index 674e745..0000000
--- a/agents/skill-bench/tools/check-workspace-file.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-# Check if a file exists in the workspace
-# Usage: check-workspace-file.sh <log_file> <work_dir> <file_path>
-#   log_file: Path to the log file
-#   work_dir: Path to the workspace directory
-#   file_path: Relative path to the file to check
-
-LOG_FILE="$1"
-WORK_DIR="$2"
-FILE_PATH="$3"
-
-if [ -z "$WORK_DIR" ] || [ -z "$FILE_PATH" ]; then
-    echo "[Error] Usage: $0 <log_file> <work_dir> <file_path>" >&2
-    exit 1
-fi
-
-[ -f "$WORK_DIR/$FILE_PATH" ]
diff --git a/agents/skill-bench/tools/setup-db.sh b/agents/skill-bench/tools/setup-db.sh
deleted file mode 100755
index 5750d98..0000000
--- a/agents/skill-bench/tools/setup-db.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-# Generic database setup script
-# Usage:
-#   setup-db.sh init                    - Initialize database
-#   setup-db.sh execute <file.sql>       - Execute SQL file
-
-set -e
-
-COMMAND="${1:-}"
-shift || true
-
-case "$COMMAND" in
-    init)
-        # Check if database already exists
-        if [ -f patents.db ]; then
-            echo "Database already exists, skipping init"
-            exit 0
-        fi
-
-        # Find initialize-database.sql
-        # Try multiple possible locations since we might be run from different directories
-        SQL_FILE=""
-
-        # Check if we're in workspace (has claude-plugin directory)
-        if [ -d "./claude-plugin" ]; then
-            SQL_FILE="./claude-plugin/skills/investigation-preparing/references/sql/initialize-database.sql"
-        fi
-
-        # Fallback to script-relative path
-        if [ -z "$SQL_FILE" ] || [ ! -f "$SQL_FILE" ]; then
-            SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-            SQL_FILE="$SCRIPT_DIR/../../plugin/skills/investigation-preparing/references/sql/initialize-database.sql"
-        fi
-
-        if [ -z "$SQL_FILE" ] || [ ! -f "$SQL_FILE" ]; then
-            echo "Error: initialize-database.sql not found"
-            exit 1
-        fi
-
-        # Initialize database
-        sqlite3 patents.db < "$SQL_FILE"
-        echo "Database initialized"
-        ;;
-
-    execute)
-        SQL_FILE="$1"
-
-        if [ -z "$SQL_FILE" ]; then
-            echo "Error: SQL file not specified"
-            echo "Usage: setup-db.sh execute <file.sql>"
-            exit 1
-        fi
-
-        if [ ! -f "$SQL_FILE" ]; then
-            echo "Error: SQL file not found: $SQL_FILE"
-            exit 1
-        fi
-
-        # Execute SQL file
-        sqlite3 patents.db < "$SQL_FILE"
-        echo "SQL file executed: $SQL_FILE"
-        ;;
-
-    *)
-        echo "Error: Unknown command '$COMMAND'"
-        echo "Usage:"
-        echo "  setup-db.sh init                    - Initialize database"
-        echo "  setup-db.sh execute <file.sql>       - Execute SQL file"
-        exit 1
-        ;;
-esac
diff --git a/tests/constitution-reminding/functional.toml b/tests/constitution-reminding/functional.toml
deleted file mode 100644
index 050013f..0000000
--- a/tests/constitution-reminding/functional.toml
+++ /dev/null
@@ -1,29 +0,0 @@
-# Test Case: Constitution Reminding Functional
-
-name = "functional"
-description = "Verify constitution-reminding loads and displays core principles"
-timeout = 60 # seconds
-
-test_prompt = """
-Load the constitution skill to understand the core principles.
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-command = { command = "mcp-loaded", server = "google-patent-cli" }
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "constitution-reminding" }
-
-[[checks]]
-name = "constitution_reminding_invoked"
-command = { command = "skill-invoked", skill = "constitution-reminding" }
-
-[[checks]]
-name = "references_instructions_read"
-command = { command = "tool-use", tool = "Read", param = "file_path", value = "constitution-reminding.*references/instructions.md" }
-
-[[checks]]
-name = "constitution_text_displayed"
-command = { command = "message-contains", text = "I." }
diff --git a/tests/constitution-reminding/triggering.toml b/tests/constitution-reminding/triggering.toml
deleted file mode 100644
index fd2a607..0000000
--- a/tests/constitution-reminding/triggering.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Test Case: Constitution Reminding - Triggering
-
-name = "triggering"
-description = "Verify constitution-reminding skill is triggered when asked about core principles"
-timeout = 60
-
-test_prompt = """
-Load the constitution skill to understand the core principles.
-"""
-
-[[checks]]
-name = "mcp_server_loaded"
-command = { command = "mcp-loaded", server = "google-patent-cli" }
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "constitution-reminding" }
-
-[[checks]]
-name = "constitution_reminding_invoked"
-command = { command = "skill-invoked", skill = "constitution-reminding" }
diff --git a/tests/investigating-database/functional-get-patent-id.toml b/tests/investigating-database/functional-get-patent-id.toml
deleted file mode 100644
index 32a45c4..0000000
--- a/tests/investigating-database/functional-get-patent-id.toml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Test Case: Investigating Database - Get Patent ID
-
-name = "functional-get-patent-id"
-description = "Verify investigating-database can retrieve patent ID by row number"
-timeout = 120 # seconds
-
-test_prompt = """
-Get the patent ID at row 2 from the database.
-"""
-
-[[setup]]
-path = "patents.db"
-content = """
-#!/usr/bin/env bash
-# Setup script to create test database
-sqlite3 patents.db <<EOF
-PRAGMA foreign_keys = ON;
-CREATE TABLE target_patents (
-    id TEXT PRIMARY KEY NOT NULL,
-    family_id TEXT,
-    title TEXT,
-    abstract_text TEXT,
-    publication_date TEXT,
-    country TEXT,
-    assignee TEXT,
-    filing_date TEXT,
-    grant_date TEXT,
-    citation_count INTEGER,
-    claim_count INTEGER,
-    extra_fields TEXT,
-    created_at TEXT DEFAULT (datetime('now')),
-    updated_at TEXT DEFAULT (datetime('now'))
-);
-INSERT INTO target_patents (id, family_id, title, abstract_text, publication_date, country, assignee) VALUES
-('US1234567A', 'US1234567', 'First Patent', 'Abstract 1', '2023-01-15', 'US', 'Assignee1'),
-('US7654321A', 'US7654321', 'Second Patent', 'Abstract 2', '2023-03-20', 'US', 'Assignee2'),
-('US9999999A', 'US9999999', 'Third Patent', 'Abstract 3', '2023-06-10', 'US', 'Assignee3');
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "get_patent_id_script_executed"
-command = { command = "tool-use", tool = "Bash", param = "command", value = "get-patent-id.sh" }
-
-[[checks]]
-name = "correct_patent_id_retrieved"
-command = { command = "message-contains", text = "US7654321A" }
diff --git a/tests/investigating-database/functional-get-statistics.toml b/tests/investigating-database/functional-get-statistics.toml
deleted file mode 100644
index 0ef370a..0000000
--- a/tests/investigating-database/functional-get-statistics.toml
+++ /dev/null
@@ -1,95 +0,0 @@
-# Test Case: Investigating Database - Get Statistics
-
-name = "functional-get-statistics"
-description = "Verify investigating-database can retrieve screening progress statistics"
-timeout = 120 # seconds
-
-test_prompt = """
-Get the screening progress statistics from the database.
-"""
-
-[[setup]]
-path = "patents.db"
-content = """
-#!/usr/bin/env bash
-# Setup script to create test database with sample data
-sqlite3 patents.db <<EOF
-PRAGMA foreign_keys = ON;
-CREATE TABLE target_patents (
-    id TEXT PRIMARY KEY NOT NULL,
-    family_id TEXT,
-    title TEXT,
-    abstract_text TEXT,
-    publication_date TEXT,
-    country TEXT,
-    assignee TEXT,
-    filing_date TEXT,
-    grant_date TEXT,
-    citation_count INTEGER,
-    claim_count INTEGER,
-    extra_fields TEXT,
-    created_at TEXT DEFAULT (datetime('now')),
-    updated_at TEXT DEFAULT (datetime('now'))
-);
-CREATE TABLE screened_patents (
-    id TEXT PRIMARY KEY NOT NULL,
-    title TEXT,
-    legal_status TEXT,
-    judgment TEXT NOT NULL,
-    reason TEXT,
-    abstract_text TEXT,
-    screened_at TEXT DEFAULT (datetime('now')),
-    updated_at TEXT DEFAULT (datetime('now'))
-);
-CREATE VIEW v_screening_progress AS
-SELECT
-    (SELECT COUNT(*) FROM target_patents) as total_targets,
-    (SELECT COUNT(*) FROM screened_patents) as total_screened,
-    (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant,
-    (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant,
-    (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'expired') as expired;
-INSERT INTO target_patents (id, family_id, title, abstract_text, publication_date, country, assignee) VALUES
-('US1234567A', 'US1234567', 'Patent 1', 'Abstract 1', '2023-01-15', 'US', 'Assignee1'),
-('US7654321A', 'US7654321', 'Patent 2', 'Abstract 2', '2023-03-20', 'US', 'Assignee2'),
-('US9999999A', 'US9999999', 'Patent 3', 'Abstract 3', '2023-06-10', 'US', 'Assignee3'),
-('US1111111A', 'US1111111', 'Patent 4', 'Abstract 4', '2023-08-20', 'US', 'Assignee4'),
-('US2222222A', 'US2222222', 'Patent 5', 'Abstract 5', '2023-10-10', 'US', 'Assignee5');
-INSERT INTO screened_patents (id, title, judgment, reason) VALUES
-('US1234567A', 'Patent 1', 'relevant', 'Core technology'),
-('US7654321A', 'Patent 2', 'relevant', 'Related technology'),
-('US9999999A', 'Patent 3', 'irrelevant', 'Different domain'),
-('US1111111A', 'Patent 4', 'expired', 'Status expired');
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "get_statistics_script_executed"
-command = { command = "tool-use", tool = "Bash", param = "command", value = "get-statistics.sh" }
-
-[[checks]]
-name = "statistics_contain_total_targets"
-command = { command = "message-contains", text = "total_targets" }
-
-[[checks]]
-name = "statistics_contain_count_5"
-command = { command = "message-contains", text = "5" }
-
-[[checks]]
-name = "statistics_contain_count_4"
-command = { command = "message-contains", text = "4" }
-
-[[checks]]
-name = "statistics_contain_count_2"
-command = { command = "message-contains", text = "2" }
-
-[[checks]]
-name = "statistics_contain_count_1"
-command = { command = "message-contains", text = "1" }
diff --git a/tests/investigating-database/functional-import-csv.toml b/tests/investigating-database/functional-import-csv.toml
deleted file mode 100644
index de44358..0000000
--- a/tests/investigating-database/functional-import-csv.toml
+++ /dev/null
@@ -1,29 +0,0 @@
-# Test Case: Investigating Database - Import CSV
-
-name = "functional-import-csv"
-description = "Verify investigating-database can import CSV data into target_patents"
-timeout = 120 # seconds
-
-test_prompt = "Import test-patents.csv"
-
-[[setup]]
-path = "test-patents.csv"
-content = """
-search URL:,https://patents.google.com/?q=llm
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-KR-102637029-B1,Device for Generating Multi-turn Chat Bot Data Using LLM,주식회사 마인즈앤컴퍼니,"고석태, 백영상",2023-10-11,2023-10-11,2024-02-15,2024-02-15,https://patents.google.com/patent/KR102637029B1/en,
-US-2024292070-A1,Iterative ai prompt optimization,"Loop Now Technologies, Inc.","Wu-Hsi Li, Edwin Chiu",2023-02-24,2024-04-10,2024-08-29,,https://patents.google.com/patent/US20240292070A1/en,https://example.com/figure.png
-US-2025200489-A1,Automatic quality assurance,"Forethought Technologies, Inc.","Sami Ghoche, Deon Nicholas",2022-02-28,2024-10-31,,,https://patents.google.com/patent/US20250200489A1/en,
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "csv_imported"
-command = { command = "db-query", db = "", expected = "", query = "SELECT COUNT(*) FROM target_patents;" }
diff --git a/tests/investigating-database/functional-import-multiple-csvs.toml b/tests/investigating-database/functional-import-multiple-csvs.toml
deleted file mode 100644
index c283a93..0000000
--- a/tests/investigating-database/functional-import-multiple-csvs.toml
+++ /dev/null
@@ -1,43 +0,0 @@
-# Test Case: Investigating Database - Import Multiple CSV Files
-
-name = "functional-import-multiple-csvs"
-description = "Verify investigating-database can import multiple CSV files with different formats"
-timeout = 180 # seconds
-
-test_prompt = """
-Import all CSV files from the current directory into the patent database.
-"""
-
-[[setup]]
-path = "patents-simple.csv"
-content = """
-id,family_id,title,abstract_text,publication_date,country
-US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US
-US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US
-US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US
-"""
-
-[[setup]]
-path = "patents-google-format.csv"
-content = """
-search URL:,https://patents.google.com/?q=rag+systems
-id,title,assignee,inventor/author,priority date,filing/creation date,publication date,grant date,result link,representative figure link
-KR-102030405-B1,RAG System for Document Analysis,"삼성전자 주식회사","홍길동",2022-05-10,2022-05-10,2023-08-20,2023-08-20,https://patents.google.com/patent/KR102030405B1/en,
-US-20240101234-A1,Information Retrieval Using Neural Networks,"Tech Corp Inc.","Jane Doe, John Smith",2022-03-15,2023-08-01,2024-01-15,,https://patents.google.com/patent/US20240101234A1/en,
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "database_created"
-command = { command = "workspace-file", path = "patents.db" }
-
-[[checks]]
-name = "all_csvs_imported"
-command = { command = "db-query", db = "", expected = "", query = "SELECT COUNT(*) FROM target_patents;" }
diff --git a/tests/investigating-database/functional-init-db.toml b/tests/investigating-database/functional-init-db.toml
deleted file mode 100644
index 12ccf47..0000000
--- a/tests/investigating-database/functional-init-db.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Test Case: Investigating Database - Initialize Database
-
-name = "functional-init-db"
-description = "Verify investigating-database can initialize the patent database"
-timeout = 60 # seconds
-
-test_prompt = """
-Initialize the patent investigation database.
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "database_created"
-command = { command = "workspace-file", path = "patents.db" }
diff --git a/tests/investigating-database/functional-integration.toml b/tests/investigating-database/functional-integration.toml
deleted file mode 100644
index 65b6d08..0000000
--- a/tests/investigating-database/functional-integration.toml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Test Case: Investigating Database - Integration Workflow
-
-name = "functional-integration"
-description = "Verify complete workflow: init, import, retrieve, record, and statistics"
-timeout = 300 # seconds
-
-test_prompt = """
-I have patent data in CSV format. Initialize the database, import the CSV, get a patent ID by row number, record a screening result, and show me the statistics.
-"""
-
-[[setup]]
-path = "1-targeting/csv/patents.csv"
-content = """
-id,family_id,title,abstract_text,publication_date,country,assignee,filing_date,grant_date,citation_count,claim_count
-US1234567A,US1234567,LLM-based Chatbot System,A multi-turn chatbot system using large language models.,2023-01-15,US,TechCorp Inc.,2022-06-01,2024-01-15,5,20
-US7654321A,US7654321,Vector Database Integration,Method for integrating vector databases with retrieval systems.,2023-03-20,US,DataFlow LLC,2022-09-15,,3,15
-US9999999A,US9999999,Conversation Context Management,System for managing context in multi-turn conversations.,2023-06-10,US,ChatAI Solutions,2022-12-01,,8,25
-US1111111A,US1111111,Machine Learning Model Training,Training method for neural network models.,2023-08-20,US,MLTech Corp.,2023-01-15,,12,18
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "database_initialized"
-command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('target_patents', 'screened_patents');", expected = "2" }
-
-[[checks]]
-name = "csv_imported"
-command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM target_patents;", expected = "4" }
-
-[[checks]]
-name = "patent_id_retrieved"
-command = { command = "tool-use", tool = "Bash", param = "command", value = "get-patent-id.sh" }
-
-[[checks]]
-name = "screening_recorded"
-command = { command = "tool-use", tool = "Bash", param = "command", value = "record-screening.sh" }
-
-[[checks]]
-name = "statistics_retrieved"
-command = { command = "tool-use", tool = "Bash", param = "command", value = "get-statistics.sh" }
-
-[[checks]]
-name = "workflow_completed_successfully"
-command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM screened_patents;", expected = "1" }
diff --git a/tests/investigating-database/functional-record-screening.toml b/tests/investigating-database/functional-record-screening.toml
deleted file mode 100644
index 3a58cce..0000000
--- a/tests/investigating-database/functional-record-screening.toml
+++ /dev/null
@@ -1,67 +0,0 @@
-# Test Case: Investigating Database - Record Screening
-
-name = "functional-record-screening"
-description = "Verify investigating-database can record screening results"
-timeout = 120 # seconds
-
-test_prompt = """
-Record a screening result for patent US1234567A with judgment 'relevant' and reason 'Core technology for LLM systems'.
-"""
-
-[[setup]]
-path = "patents.db"
-content = """
-#!/usr/bin/env bash
-# Setup script to create test database
-sqlite3 patents.db <<EOF
-PRAGMA foreign_keys = ON;
-CREATE TABLE target_patents (
-    id TEXT PRIMARY KEY NOT NULL,
-    family_id TEXT,
-    title TEXT,
-    abstract_text TEXT,
-    publication_date TEXT,
-    country TEXT,
-    assignee TEXT,
-    filing_date TEXT,
-    grant_date TEXT,
-    citation_count INTEGER,
-    claim_count INTEGER,
-    extra_fields TEXT,
-    created_at TEXT DEFAULT (datetime('now')),
-    updated_at TEXT DEFAULT (datetime('now'))
-);
-CREATE TABLE screened_patents (
-    id TEXT PRIMARY KEY NOT NULL,
-    title TEXT,
-    legal_status TEXT,
-    judgment TEXT NOT NULL,
-    reason TEXT,
-    abstract_text TEXT,
-    screened_at TEXT DEFAULT (datetime('now')),
-    updated_at TEXT DEFAULT (datetime('now'))
-);
-INSERT INTO target_patents (id, family_id, title, abstract_text, publication_date, country, assignee) VALUES
-('US1234567A', 'US1234567', 'LLM-based Chatbot System', 'A multi-turn chatbot system using LLM.', '2023-01-15', 'US', 'TechCorp');
-EOF
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "record_screening_script_executed"
-command = { command = "tool-use", tool = "Bash", param = "command", value = "record-screening.sh" }
-
-[[checks]]
-name = "screening_result_recorded"
-command = { command = "db-query", db = "patents.db", query = "SELECT judgment FROM screened_patents WHERE id = 'US1234567A';", expected = "relevant" }
-
-[[checks]]
-name = "reason_recorded"
-command = { command = "db-query", db = "patents.db", query = "SELECT reason FROM screened_patents WHERE id = 'US1234567A';", expected = "Core" }
diff --git a/tests/investigating-database/triggering.toml b/tests/investigating-database/triggering.toml
deleted file mode 100644
index 0cf2057..0000000
--- a/tests/investigating-database/triggering.toml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Test Case: Investigating Database - Triggering
-
-name = "triggering"
-description = "Verify investigating-database skill is triggered when appropriate"
-timeout = 60 # seconds
-
-test_prompt = """
-I need to check the screening progress statistics.
-"""
-
-[[checks]]
-name = "skill_loaded"
-command = { command = "skill-loaded", skill = "investigating-database" }
-
-[[checks]]
-name = "investigating_database_invoked"
-command = { command = "skill-invoked", skill = "investigating-database" }
-
-[[checks]]
-name = "statistics_requested"
-command = { command = "message-contains", text = "statistics" }

From 9c4f45496424fc7bbda61f8c26ab3dd7bde3a2a3 Mon Sep 17 00:00:00 2001
From: Claude Code <noreply@github.com>
Date: Mon, 30 Mar 2026 02:50:36 +0000
Subject: [PATCH 2/2] fix: remove constitution-reminding checks from test cases

- Remove constitution_loaded checks from concept-interviewing and targeting tests
- Skill no longer exists in current codebase

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/concept-interviewing/functional-no-spec.toml | 4 ----
 tests/targeting/functional-with-spec.toml          | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/tests/concept-interviewing/functional-no-spec.toml b/tests/concept-interviewing/functional-no-spec.toml
index 1ce96ba..3990b7f 100644
--- a/tests/concept-interviewing/functional-no-spec.toml
+++ b/tests/concept-interviewing/functional-no-spec.toml
@@ -38,10 +38,6 @@ command = { command = "skill-invoked", skill = "concept-interviewing" }
 name = "patent_assignee_check_invoked"
 command = { command = "skill-invoked", skill = "patent-assignee-check" }
 
-[[checks]]
-name = "constitution_loaded"
-command = { command = "skill-invoked", skill = "constitution-reminding" }
-
 [[checks]]
 name = "references_instructions_read"
 command = { command = "tool-use", tool = "Read", param = "file_path", value = "concept-interview.*references/instructions.md" }
diff --git a/tests/targeting/functional-with-spec.toml b/tests/targeting/functional-with-spec.toml
index 0845749..0346e4e 100644
--- a/tests/targeting/functional-with-spec.toml
+++ b/tests/targeting/functional-with-spec.toml
@@ -53,10 +53,6 @@ command = { command = "skill-loaded", skill = "targeting" }
 name = "targeting_invoked"
 command = { command = "skill-invoked", skill = "targeting" }
 
-[[checks]]
-name = "constitution_loaded"
-command = { command = "skill-invoked", skill = "constitution-reminding" }
-
 [[checks]]
 name = "specification_read"
 command = { command = "tool-use", tool = "Read", param = "file_path", value = "0-specifications/specification.md" }