From e73fb5c6ae71bb8591c16c9c8f32a9da8a32b1c9 Mon Sep 17 00:00:00 2001 From: Jiale Cheng <92421404+chengjl19@users.noreply.github.com> Date: Sun, 8 Mar 2026 15:12:46 +0800 Subject: [PATCH] Update shared memory and optimize paper writing workflow --- .agents/skills/memory-manager/SKILL.md | 32 ++- .../skills/memory-manager/agents/openai.yaml | 2 +- .../references/memory-layout.md | 1 + .../scripts/shared_memory_retrieval.py | 253 ++++++++++++++++++ .agents/skills/paper-writing/SKILL.md | 22 +- .agents/skills/project-context/SKILL.md | 14 +- .../skills/project-context/agents/openai.yaml | 2 +- .../references/context-schema.md | 38 ++- .../scripts/project_context.py | 57 +++- .agents/skills/research-workflow/SKILL.md | 39 +-- .agents/skills/run-governor/SKILL.md | 2 + .../run-governor/references/run-layout.md | 1 + AGENTS.md | 20 +- 13 files changed, 448 insertions(+), 35 deletions(-) create mode 100644 .agents/skills/memory-manager/scripts/shared_memory_retrieval.py diff --git a/.agents/skills/memory-manager/SKILL.md b/.agents/skills/memory-manager/SKILL.md index b9a3d00..5656232 100644 --- a/.agents/skills/memory-manager/SKILL.md +++ b/.agents/skills/memory-manager/SKILL.md @@ -55,6 +55,22 @@ Retrieve early when useful, but do not block execution: 4. Flag stale entries with low confidence. 5. If retrieval is low-yield and task is time-sensitive, continue with search/deep research directly. +## Shared Retrieval Policy + +Treat shared memory as an optional read-only source, not as project-local memory. + +1. Query project-local memory first. +2. If local retrieval is low-yield, query the user-configured shared repo from `project-context`. +3. Resolve the local shared repo path from `memory.shared_repo.path`; if missing, ask the user where the repo should live and persist it through `project-context`. +4. Use read-only retrieval against the local shared repo checkout; do not mirror shared records into `.project_local` by default. +5. Avoid syncing the shared repo on every run or stage. +6. Sync only when: + - the shared repo checkout is missing and the user approved clone/bootstrap + - a retrieval gap remains and the local shared repo is suspected stale + - immediately before exporting shared memory +7. Treat hits as `external/shared` evidence until they are validated in the current project. +8. Do not rewrite shared records into local `episode/procedure/insight` as if they were observed locally unless the current run reproduced them. + ## Writeback Policy Write conservatively and continuously: @@ -173,16 +189,30 @@ Treat shared export as post-task work: 2. Export only verified/high-value records. 3. Never export noisy `working` state. 4. Require `human-checkpoint` before publishing. +5. Sync the shared repo before export so dedupe/conflict checks run against the latest branch tip. ## Shared Repository Contract When exporting: -1. Target `https://github.com/recursive-forge/open-research-memory`. +1. Target `https://github.com/TenureAI/open-research-memory`. 2. Use pull-based flow: local export -> `codex/*` branch -> PR -> review -> merge. 3. Never push directly to `main`. 4. Enforce schema and required sections. +## Shared Retrieval Helper + +Use the helper script for lightweight read-only search of a local shared repo checkout: + +```bash +python3 .agents/skills/memory-manager/scripts/shared_memory_retrieval.py \ + --repo-root /path/to/open-research-memory \ + --query "cuda out of memory" \ + --type procedure \ + --task-type debug \ + --limit 5 +``` + ## Required Operation Output For each memory operation, emit: diff --git a/.agents/skills/memory-manager/agents/openai.yaml b/.agents/skills/memory-manager/agents/openai.yaml index b9613a9..efad7ac 100644 --- a/.agents/skills/memory-manager/agents/openai.yaml +++ b/.agents/skills/memory-manager/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Memory Manager" short_description: "Maintain working todo memory and reusable research records." - default_prompt: "Use memory manager to keep working state fresh, track active/done/blocked todos, and write evidence-linked episode/procedure/insight records." + default_prompt: "Use memory manager to keep working state fresh, track active/done/blocked todos, write evidence-linked episode/procedure/insight records, and use the shared memory repo only as a read-only retrieval source unless an approved export is happening." diff --git a/.agents/skills/memory-manager/references/memory-layout.md b/.agents/skills/memory-manager/references/memory-layout.md index 63ff5ba..7d18c52 100644 --- a/.agents/skills/memory-manager/references/memory-layout.md +++ b/.agents/skills/memory-manager/references/memory-layout.md @@ -33,3 +33,4 @@ Notes: 1. Keep working state and reports run-scoped. 2. Keep long-term memory in `.project_local//memory/` plus index metadata in `index.db`. 3. Treat old `memory/` and `.agent/memory.db` layouts as legacy and migrate when touched. +4. Shared memory repos live outside `.project_local` and are treated as read-only retrieval sources, not as run-scoped state. diff --git a/.agents/skills/memory-manager/scripts/shared_memory_retrieval.py b/.agents/skills/memory-manager/scripts/shared_memory_retrieval.py new file mode 100644 index 0000000..211b42e --- /dev/null +++ b/.agents/skills/memory-manager/scripts/shared_memory_retrieval.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +"""Read-only shared memory retrieval helper. + +Search a local open-research-memory clone without importing it into project memory. +""" + +from __future__ import annotations + +import argparse +import ast +import json +import re +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Iterable, List, Tuple + +ROOT_TYPES = { + "episodes": "episode", + "procedures": "procedure", + "insights": "insight", +} + + +@dataclass +class Record: + path: Path + metadata: Dict[str, Any] + body: str + + +def parse_frontmatter(text: str) -> Tuple[Dict[str, Any], str]: + if not text.startswith("---\n"): + return {}, text + + lines = text.splitlines() + end_idx = None + for idx in range(1, len(lines)): + if lines[idx].strip() == "---": + end_idx = idx + break + if end_idx is None: + return {}, text + + metadata: Dict[str, Any] = {} + for line in lines[1:end_idx]: + if not line.strip() or ":" not in line: + continue + key, raw_value = line.split(":", 1) + metadata[key.strip()] = parse_scalar(raw_value.strip()) + body = "\n".join(lines[end_idx + 1 :]).strip() + return metadata, body + + +def parse_scalar(value: str) -> Any: + if not value: + return "" + if value[0] in {'"', "'"} and value[-1] == value[0]: + return value[1:-1] + if value.startswith("[") and value.endswith("]"): + try: + parsed = ast.literal_eval(value) + except (SyntaxError, ValueError): + return value + return parsed if isinstance(parsed, list) else value + lowered = value.lower() + if lowered == "true": + return True + if lowered == "false": + return False + try: + if "." in value: + return float(value) + return int(value) + except ValueError: + return value + + +def load_records(repo_root: Path) -> List[Record]: + records: List[Record] = [] + for folder, expected_type in ROOT_TYPES.items(): + root = repo_root / folder + if not root.exists(): + continue + for path in sorted(root.rglob("*.md")): + text = path.read_text(encoding="utf-8") + metadata, body = parse_frontmatter(text) + if not metadata: + continue + declared_type = str(metadata.get("type", "")).strip() + if declared_type and declared_type != expected_type: + continue + metadata.setdefault("type", expected_type) + metadata.setdefault("tags", []) + records.append(Record(path=path, metadata=metadata, body=body)) + return records + + +def normalize_terms(text: str) -> List[str]: + return [term for term in re.split(r"[^a-z0-9_+-]+", text.lower()) if len(term) >= 2] + + +def matches_filters(record: Record, args: argparse.Namespace) -> bool: + metadata = record.metadata + + if args.type and metadata.get("type") != args.type: + return False + if args.status and str(metadata.get("status", "")).strip() != args.status: + return False + if args.task_type and str(metadata.get("task_type", "")).strip() != args.task_type: + return False + if args.error_signature: + error_signature = str(metadata.get("error_signature", "")).lower() + if args.error_signature.lower() not in error_signature: + return False + if args.tag: + tags = {str(tag).lower() for tag in metadata.get("tags", []) if str(tag).strip()} + requested = {tag.lower() for tag in args.tag} + if not requested.issubset(tags): + return False + return True + + +def score_record(record: Record, query_terms: Iterable[str], args: argparse.Namespace) -> Tuple[int, List[str]]: + metadata = record.metadata + title = str(metadata.get("title", "")).lower() + tags = " ".join(str(tag).lower() for tag in metadata.get("tags", [])) + error_signature = str(metadata.get("error_signature", "")).lower() + project = str(metadata.get("project", "")).lower() + task_type = str(metadata.get("task_type", "")).lower() + body = record.body.lower() + + score = 0 + matched: List[str] = [] + for term in query_terms: + term_score = 0 + if term in title: + term_score += 4 + if term in tags: + term_score += 3 + if term in error_signature: + term_score += 3 + if term in project or term in task_type: + term_score += 2 + if term in body: + term_score += 1 + if term_score: + matched.append(term) + score += term_score + + if args.type and metadata.get("type") == args.type: + score += 2 + if args.task_type and metadata.get("task_type") == args.task_type: + score += 2 + if args.status and metadata.get("status") == args.status: + score += 1 + if args.tag: + tags = {str(tag).lower() for tag in metadata.get("tags", [])} + score += sum(1 for tag in args.tag if tag.lower() in tags) + + if not query_terms: + score = 1 + + return score, sorted(set(matched)) + + +def format_result(record: Record, score: int, matched_terms: List[str], repo_root: Path) -> Dict[str, Any]: + metadata = record.metadata + body_preview = " ".join(record.body.split()) + if len(body_preview) > 220: + body_preview = body_preview[:217] + "..." + return { + "id": metadata.get("id", ""), + "title": metadata.get("title", ""), + "type": metadata.get("type", ""), + "status": metadata.get("status", ""), + "task_type": metadata.get("task_type", ""), + "project": metadata.get("project", ""), + "tags": metadata.get("tags", []), + "error_signature": metadata.get("error_signature", ""), + "score": score, + "matched_terms": matched_terms, + "path": str(record.path.relative_to(repo_root)), + "preview": body_preview, + "source": "shared-repo-readonly", + } + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Search a local shared memory repo") + parser.add_argument("--repo-root", required=True, help="Path to local open-research-memory clone") + parser.add_argument("--query", default="", help="Free-text query") + parser.add_argument("--type", choices=sorted(set(ROOT_TYPES.values())), help="Record type filter") + parser.add_argument("--status", default="", help="Status filter") + parser.add_argument("--task-type", default="", help="Task type filter") + parser.add_argument("--tag", action="append", default=[], help="Required tag filter; repeatable") + parser.add_argument("--error-signature", default="", help="Substring match on error_signature") + parser.add_argument("--limit", type=int, default=5, help="Maximum number of matches to return") + parser.add_argument("--json", action="store_true", help="Emit JSON output") + return parser + + +def main() -> int: + args = build_parser().parse_args() + repo_root = Path(args.repo_root).resolve() + if not repo_root.exists(): + print(f"Shared repo path does not exist: {repo_root}", file=sys.stderr) + return 2 + if not (repo_root / ".git").exists(): + print(f"Shared repo path is not a git checkout: {repo_root}", file=sys.stderr) + return 2 + + records = [record for record in load_records(repo_root) if matches_filters(record, args)] + query_terms = normalize_terms(args.query) + + ranked: List[Tuple[int, List[str], Record]] = [] + for record in records: + score, matched_terms = score_record(record, query_terms, args) + if score <= 0: + continue + ranked.append((score, matched_terms, record)) + + ranked.sort(key=lambda item: str(item[2].metadata.get("title", "")).lower()) + ranked.sort(key=lambda item: str(item[2].metadata.get("updated_at", "")), reverse=True) + ranked.sort(key=lambda item: item[0], reverse=True) + results = [ + format_result(record, score, matched_terms, repo_root) + for score, matched_terms, record in ranked[: max(args.limit, 1)] + ] + + if args.json: + print(json.dumps({"repo_root": str(repo_root), "results": results}, ensure_ascii=True, indent=2)) + return 0 + + print(f"SharedRepo: {repo_root}") + print(f"Query: {args.query or ''}") + print(f"Results: {len(results)}") + for idx, result in enumerate(results, start=1): + print(f"{idx}. [{result['type']}/{result['status']}] {result['title']} score={result['score']}") + print(f" path={result['path']}") + if result["task_type"]: + print(f" task_type={result['task_type']}") + if result["tags"]: + print(f" tags={','.join(result['tags'])}") + if result["matched_terms"]: + print(f" matched_terms={','.join(result['matched_terms'])}") + if result["preview"]: + print(f" preview={result['preview']}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.agents/skills/paper-writing/SKILL.md b/.agents/skills/paper-writing/SKILL.md index c9ab4af..858fecf 100644 --- a/.agents/skills/paper-writing/SKILL.md +++ b/.agents/skills/paper-writing/SKILL.md @@ -1,6 +1,6 @@ --- name: paper-writing -description: Write CS/AI papers with progressive disclosure. Use when drafting or revising sections such as abstract, introduction, related work, method, figures, experiments, or rebuttal text. Starts from a top-level workflow and loads only the section-specific reference needed. Includes arXiv source-fetch workflow for mining LaTeX organization from exemplar papers. +description: Write CS/AI papers with progressive disclosure. Use only when the user explicitly asks for a paper-writing deliverable, such as drafting or revising sections like abstract, introduction, related work, method, figures, experiments, or rebuttal text. Starts from a top-level workflow and loads only the section-specific reference needed. Includes arXiv source-fetch workflow for mining LaTeX organization from exemplar papers. --- # Paper Writing @@ -16,6 +16,26 @@ Provide a section-aware paper-writing workflow that is: Do not load all writing guidance at once. Start here, then open only the reference file needed for the current section. +## Activation Gate + +Activate this skill only when the user explicitly asks for paper-writing output. + +Use this skill for: + +1. drafting a paper or a named paper section +2. revising existing paper prose +3. writing rebuttal text +4. turning existing claims, evidence, and results into paper-ready prose + +Do not use this skill for: + +1. topic scoping +2. literature investigation without a writing deliverable +3. feasibility analysis +4. experiment design +5. experiment execution +6. deciding whether a project is paper-worthy + ## Default Workflow 1. identify paper type and draft stage diff --git a/.agents/skills/project-context/SKILL.md b/.agents/skills/project-context/SKILL.md index e4f6079..f96581b 100644 --- a/.agents/skills/project-context/SKILL.md +++ b/.agents/skills/project-context/SKILL.md @@ -13,6 +13,7 @@ Scope boundary: 1. manages runtime context and secrets only 2. does not store heavy experiment artifacts (checkpoints, dataset caches, large logs) +3. stores per-project shared-memory source configuration, but not shared-memory records ## Trigger @@ -22,6 +23,7 @@ Use this skill when any of these are needed: 2. preflight checks before experiment/report/eval execution 3. missing runtime fields during task execution 4. per-run context snapshot for reproducibility +5. first use of shared-memory retrieval or export for a project ## Private Directory Contract @@ -50,6 +52,7 @@ Do not ask for all fields at once. 5. ask only for missing required fields for the current task 6. during execution, allow blocker-only delta prompts (e.g. missing API URL/key) 7. persist immediately for reuse +8. when shared-memory retrieval/export is needed, ask the user where the local shared-memory repo should live if `memory.shared_repo.path` is missing If new missing fields appear later, run preflight again and collect only deltas. @@ -69,6 +72,7 @@ Recommended order in research execution: 3. `project-context` preflight resolves runtime context and consumes remote reuse decision 4. `experiment-execution` runs with resolved context 5. `project-context` snapshot writes run-scoped frozen context +6. shared-memory retrieval/export reuses `memory.shared_repo.*` from `context.json` ## Script @@ -86,6 +90,14 @@ python3 .agents/skills/project-context/scripts/project_context.py preflight \ --run-id 20260303_130000-my-sft-project ``` +```bash +python3 .agents/skills/project-context/scripts/project_context.py preflight \ + --project-root . \ + --project-slug my-sft-project \ + --task-type generic \ + --require memory.shared_repo.path +``` + ```bash python3 .agents/skills/project-context/scripts/project_context.py show \ --project-root . \ @@ -105,5 +117,5 @@ For each operation, emit: 1. `Project`: root and slug 2. `Action`: preflight/show/snapshot 3. `State`: loaded + newly collected fields -4. `Paths`: local/context/secrets/snapshot/runtime paths +4. `Paths`: local/context/secrets/snapshot/runtime/shared-memory paths 5. `Missing`: unresolved required fields (if any) diff --git a/.agents/skills/project-context/agents/openai.yaml b/.agents/skills/project-context/agents/openai.yaml index 6313277..cc36374 100644 --- a/.agents/skills/project-context/agents/openai.yaml +++ b/.agents/skills/project-context/agents/openai.yaml @@ -2,4 +2,4 @@ version: 1 agent: display_name: "Project Context" short_description: "Collect and persist per-project runtime context with private local storage and incremental preflight prompts." - default_prompt: "Use project-context to manage local/remote runtime context in .project_local/, reuse stored remote profiles when appropriate, and snapshot run configuration safely." + default_prompt: "Use project-context to manage local/remote runtime context in .project_local/, reuse stored remote profiles when appropriate, persist shared-memory repo configuration such as memory.shared_repo.path, and snapshot run configuration safely." diff --git a/.agents/skills/project-context/references/context-schema.md b/.agents/skills/project-context/references/context-schema.md index 5c1ef73..218477e 100644 --- a/.agents/skills/project-context/references/context-schema.md +++ b/.agents/skills/project-context/references/context-schema.md @@ -1,4 +1,4 @@ -# Project Context Schema (V2) +# Project Context Schema (V3) Context is split into two JSON files under: @@ -13,7 +13,7 @@ This schema is for runtime context only. Heavy runtime artifacts stay under: ```json { - "schema_version": 2, + "schema_version": 3, "project": { "slug": "vision-sft-v2", "name": "vision-sft-v2" @@ -38,6 +38,17 @@ This schema is for runtime context only. Heavy runtime artifacts stay under: "queue": "train", "gpu_type": "H100" }, + "memory": { + "shared_repo": { + "enabled": true, + "path": "/Users/alice/work/open-research-memory", + "url": "https://github.com/TenureAI/open-research-memory", + "branch": "main", + "mode": "readonly-source", + "sync_policy": "minimal", + "auto_clone_if_missing": false + } + }, "tracking": { "run_notes": "" }, @@ -49,7 +60,7 @@ This schema is for runtime context only. Heavy runtime artifacts stay under: ```json { - "schema_version": 2, + "schema_version": 3, "api": { "endpoint": "https://internal-gateway.example.com", "key": "" @@ -110,7 +121,7 @@ Snapshot payload: ```json { - "schema_version": 2, + "schema_version": 3, "run_id": "20260304_090000-vision-sft-v2", "task_type": "sft", "context": {"...": "..."}, @@ -119,6 +130,25 @@ Snapshot payload: } ``` +## Shared Memory Source Fields + +Store shared-memory source configuration in `context.json` so the user is not re-prompted every run: + +- `memory.shared_repo.enabled` +- `memory.shared_repo.path` +- `memory.shared_repo.url` +- `memory.shared_repo.branch` +- `memory.shared_repo.mode` +- `memory.shared_repo.sync_policy` +- `memory.shared_repo.auto_clone_if_missing` + +Recommended semantics: + +- `mode=readonly-source` +- `sync_policy=minimal` +- ask the user for `memory.shared_repo.path` when shared retrieval/export is first needed +- do not hardcode machine-specific paths into prompts or skills + ## Legacy Compatibility Legacy field: diff --git a/.agents/skills/project-context/scripts/project_context.py b/.agents/skills/project-context/scripts/project_context.py index 2e8bb34..5bc0da7 100755 --- a/.agents/skills/project-context/scripts/project_context.py +++ b/.agents/skills/project-context/scripts/project_context.py @@ -22,7 +22,12 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Tuple -SCHEMA_VERSION = 2 +SCHEMA_VERSION = 3 + +DEFAULT_SHARED_MEMORY_URL = "https://github.com/TenureAI/open-research-memory" +DEFAULT_SHARED_MEMORY_BRANCH = "main" +DEFAULT_SHARED_MEMORY_MODE = "readonly-source" +DEFAULT_SHARED_MEMORY_SYNC_POLICY = "minimal" TASK_REQUIRED_FIELDS = { "generic": ["execution.execution_target", "execution.local_project_root"], @@ -62,6 +67,9 @@ "execution.runtime_output_root": "Runtime output root path", "execution.runtime_host": "Runtime host", "execution.workspace_root": "Workspace root path", + "memory.shared_repo.path": "Local shared memory repo path", + "memory.shared_repo.url": "Shared memory repo URL", + "memory.shared_repo.branch": "Shared memory branch", "cluster.name": "Cluster name", "cluster.scheduler": "Scheduler (e.g. slurm/k8s/ray/local)", "cluster.queue": "Cluster queue/partition", @@ -245,6 +253,41 @@ def set_if_missing(key: str, value: str) -> None: nested_set(context, "execution.runtime_output_root", str(Path(str(runtime_root)) / "runs")) updated.append("execution.runtime_output_root") + def normalize_bool(key: str, default: bool) -> None: + value = nested_get(context, key) + if value in (None, ""): + nested_set(context, key, default) + updated.append(key) + return + if isinstance(value, bool): + return + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in {"1", "true", "yes", "y", "on"}: + nested_set(context, key, True) + updated.append(key) + elif lowered in {"0", "false", "no", "n", "off"}: + nested_set(context, key, False) + updated.append(key) + + set_if_missing("memory.shared_repo.url", DEFAULT_SHARED_MEMORY_URL) + set_if_missing("memory.shared_repo.branch", DEFAULT_SHARED_MEMORY_BRANCH) + set_if_missing("memory.shared_repo.mode", DEFAULT_SHARED_MEMORY_MODE) + set_if_missing("memory.shared_repo.sync_policy", DEFAULT_SHARED_MEMORY_SYNC_POLICY) + normalize_bool("memory.shared_repo.enabled", False) + normalize_bool("memory.shared_repo.auto_clone_if_missing", False) + + shared_path = nested_get(context, "memory.shared_repo.path") + if isinstance(shared_path, str): + normalized_path = shared_path.strip() + if normalized_path != shared_path: + nested_set(context, "memory.shared_repo.path", normalized_path) + updated.append("memory.shared_repo.path") + shared_path = normalized_path + if shared_path not in (None, "") and nested_get(context, "memory.shared_repo.enabled") is False: + nested_set(context, "memory.shared_repo.enabled", True) + updated.append("memory.shared_repo.enabled") + return updated @@ -263,6 +306,16 @@ def detect_context(project_slug: str, project_root: Path) -> Dict[str, Any]: "python_path": sys.executable, "local_project_root": str(project_root.resolve()), }, + "memory": { + "shared_repo": { + "enabled": False, + "url": DEFAULT_SHARED_MEMORY_URL, + "branch": DEFAULT_SHARED_MEMORY_BRANCH, + "mode": DEFAULT_SHARED_MEMORY_MODE, + "sync_policy": DEFAULT_SHARED_MEMORY_SYNC_POLICY, + "auto_clone_if_missing": False, + } + }, } conda_env = os.environ.get("CONDA_DEFAULT_ENV", "") @@ -466,6 +519,8 @@ def preflight(args: argparse.Namespace) -> int: "local_project_root": str(nested_get(context, "execution.local_project_root") or ""), "runtime_project_root": str(nested_get(context, "execution.runtime_project_root") or ""), "runtime_output_root": str(nested_get(context, "execution.runtime_output_root") or ""), + "shared_memory_repo_path": str(nested_get(context, "memory.shared_repo.path") or ""), + "shared_memory_repo_url": str(nested_get(context, "memory.shared_repo.url") or ""), }, ensure_ascii=True, ) diff --git a/.agents/skills/research-workflow/SKILL.md b/.agents/skills/research-workflow/SKILL.md index d6943eb..6fff2f5 100644 --- a/.agents/skills/research-workflow/SKILL.md +++ b/.agents/skills/research-workflow/SKILL.md @@ -15,17 +15,18 @@ For non-trivial tasks, run this order: 1. Initialize run policy with `run-governor`. 2. Resolve runtime context with `project-context` before experiment/report/eval execution. -3. Understand user objective and current code/evidence state. -4. Clarify ambiguous requirements through `human-checkpoint`. -5. Complete intake checkpoint before planning or decomposition. -6. Run one `memory-manager` bootstrap (`retrieve/init-working`). -7. Run deep research when needed. -8. Build an execution plan (use `research-plan` for planning-heavy requests). -9. Confirm plan as required by mode. -10. Execute with trigger-based working-memory updates. -11. Replan on major issues when needed. -12. Emit stage reports and maintain report index. -13. Close task, write memory close-out, then optionally publish shared memory. +3. Resolve shared-memory source config from `project-context` when shared retrieval or export may be needed. +4. Understand user objective and current code/evidence state. +5. Clarify ambiguous requirements through `human-checkpoint`. +6. Complete intake checkpoint before planning or decomposition. +7. Run one `memory-manager` bootstrap (`retrieve/init-working`). +8. Run deep research when needed. +9. Build an execution plan (use `research-plan` for planning-heavy requests). +10. Confirm plan as required by mode. +11. Execute with trigger-based working-memory updates. +12. Replan on major issues when needed. +13. Emit stage reports and maintain report index. +14. Close task, write memory close-out, then optionally publish shared memory. ## Mode-Aware Interaction Policy @@ -97,16 +98,19 @@ Use these in combination: 6. Cooldown: no more than one non-forced memory operation per cycle. 7. Avoid per-command memory writes; batch observations into one delta update. 8. Use search/deep research directly when topic is time-sensitive, new, or currently blocked. -9. For open-ended research/scoping requests, run deep research before giving decomposition or roadmap recommendations. -9.1 For mid-run new research requests, run deep research re-entry before further execution. -10. For unknown errors, use this branch: +9. If project-local memory retrieval is low-yield, shared-memory retrieval may query the configured local shared repo as a read-only source. +10. Do not sync the shared repo on every cycle; prefer the current local checkout and sync only on explicit gap handling or before export. +11. For open-ended research/scoping requests, run deep research before giving decomposition or roadmap recommendations. +11.1 For mid-run new research requests, run deep research re-entry before further execution. +12. For unknown errors, use this branch: - local evidence triage (logs, stack trace, recent changes) + - shared-memory retrieval when reusable SOPs or prior debug cases are likely relevant - targeted search - deep research (debug-investigation) if still unresolved - minimal fix validation -11. If skipping memory due to cooldown or low-value delta, record reason in the stage report. -12. If intake information is missing, trigger `human-checkpoint` before deep research or planning. -13. If deep research was used for open-ended scoping, hand off to `research-plan` to convert findings into an execution-ready plan. Skip only if the user explicitly opts out. +13. If skipping memory due to cooldown or low-value delta, record reason in the stage report. +14. If intake information is missing, trigger `human-checkpoint` before deep research or planning. +15. If deep research was used for open-ended scoping, hand off to `research-plan` to convert findings into an execution-ready plan. Skip only if the user explicitly opts out. ## Replanning Policy @@ -135,6 +139,7 @@ Do not export shared memory during core task execution. 1. Complete the primary task first. 2. Treat shared export as a post-task phase. 3. Require `human-checkpoint` before publishing shared memory. +4. Sync the shared repo before opening the export PR. ## Decision Policy diff --git a/.agents/skills/run-governor/SKILL.md b/.agents/skills/run-governor/SKILL.md index 78d5520..38f770e 100644 --- a/.agents/skills/run-governor/SKILL.md +++ b/.agents/skills/run-governor/SKILL.md @@ -49,6 +49,7 @@ During run initialization, decide execution target before planning launch steps: 3. ask user whether to reuse stored remote fields for this run 4. if not reused or incomplete, collect only missing remote-required fields 5. persist the final decision and resolved paths into run manifest and project-context +6. if shared-memory retrieval/export is in scope, persist resolved shared repo metadata from `project-context` into the run manifest Remote-required fields: @@ -124,6 +125,7 @@ Maintain these files in `/logs/runs//`: - runtime output root - execution target (`local|remote`) - runtime host (if remote) + - shared memory repo path/url/branch/sync policy when configured - optional additional project roots - output directory mapping 3. `working/state.yaml` diff --git a/.agents/skills/run-governor/references/run-layout.md b/.agents/skills/run-governor/references/run-layout.md index 9e11dc1..e67e311 100644 --- a/.agents/skills/run-governor/references/run-layout.md +++ b/.agents/skills/run-governor/references/run-layout.md @@ -33,3 +33,4 @@ Notes: 3. Keep run_id consistent across control, runtime output, and snapshot paths. 4. In local execution, `runtime_project_root` can equal `local_project_root`. 5. In remote execution, `runtime_project_root` should be remote and explicit. +6. `run_manifest.yaml` may also record resolved shared-memory source metadata such as repo path, URL, branch, and sync policy. diff --git a/AGENTS.md b/AGENTS.md index 4a73ecb..07dea60 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,13 +9,14 @@ This workspace is for AI research and development tasks (reproduction, debugging 4. Trigger `human-checkpoint` using mode-aware policy, always for major safety risks and shared-memory publication. 5. Use `experiment-execution` only for actual run execution. 6. Use `project-context` to collect and persist per-project private runtime context before experiments or report/eval execution. -7. Use `deep-research` for deep external investigation and evidence synthesis, including early-stage project scoping when a user wants to write a research study or paper on a topic. +7. Use `deep-research` for deep external investigation and evidence synthesis, including early-stage project scoping when a user wants to write a research study or paper on a topic, unless the user is explicitly asking for a paper-writing deliverable right now. 8. Use `research-plan` when the user asks for a proposal, roadmap, ablation/evaluation plan, study design, or pre-implementation research decomposition. 9. After open-ended scoping in `deep-research`, hand off findings into `research-plan` by default; skip only if the user explicitly opts out. -10. Base conclusions on evidence only (command outputs, metrics, logs, and file diffs). -11. Prefer small, reversible, verifiable steps over broad speculative changes. -12. Follow `REPO_CONVENTIONS.md` for artifact placement and commit hygiene. -13. If a run was initialized before confirmation, stop and run violation recovery: acknowledge, ask whether to keep/clean artifacts, and wait for explicit reconfirmation before continuing. +10. Use `paper-writing` only when the user explicitly asks for a paper-writing deliverable such as drafting or revising a paper, section, or rebuttal. Do not use it for topic scoping, literature investigation, feasibility analysis, experiment design, or experiment execution. +11. Base conclusions on evidence only (command outputs, metrics, logs, and file diffs). +12. Prefer small, reversible, verifiable steps over broad speculative changes. +13. Follow `REPO_CONVENTIONS.md` for artifact placement and commit hygiene. +14. If a run was initialized before confirmation, stop and run violation recovery: acknowledge, ask whether to keep/clean artifacts, and wait for explicit reconfirmation before continuing. ## Memory Invocation Guardrails (Balanced) 1. `memory-manager` is mandatory for non-trivial runs, but only as a control-plane step, not per command. @@ -44,6 +45,12 @@ This workspace is for AI research and development tasks (reproduction, debugging 5. Cooldown for non-forced deep-research calls: - at most once per stage unless objective changed or new contradiction/high-impact uncertainty appears. +## Paper-Writing Trigger Guardrails +1. Activate `paper-writing` only when the user explicitly asks for a paper-writing output. +2. Valid triggers include drafting or revising a paper, a named paper section, or rebuttal text. +3. Do not activate `paper-writing` just because the request mentions papers, literature, comparisons, or related work if the actual need is still research, planning, or experiments. +4. If the user has not explicitly asked for paper-writing output, prefer `deep-research`, `research-plan`, or `experiment-execution` according to the current stage. + ## Skill Paths - `.agents/skills/run-governor` - `.agents/skills/research-workflow` @@ -54,6 +61,3 @@ This workspace is for AI research and development tasks (reproduction, debugging - `.agents/skills/deep-research` - `.agents/skills/project-context` - `.agents/skills/paper-writing` - -## Additional Skills -- `paper-writing`: Progressive-disclosure paper writing skill for CS/AI papers. Use when drafting or revising sections such as abstract, introduction, related work, method, figures, experiments, or rebuttal text. Includes section-specific references plus an arXiv source-fetch workflow for mining LaTeX organization from exemplar papers.