From a90e3d751aa323e2346cad2b88405878f6478ecb Mon Sep 17 00:00:00 2001
From: Jiale Cheng <92421404+chengjl19@users.noreply.github.com>
Date: Fri, 27 Feb 2026 21:44:33 +0800
Subject: [PATCH] Bootstrap shared memory repository structure and governance
 docs

---
 .github/pull_request_template.md  |  25 +++++
 .github/workflows/validate.yml    |  24 +++++
 .gitignore                        |   3 +
 CONTRIBUTING.md                   |  48 +++++++++
 README.md                         | 113 +++++++++++++++++++++
 episodes/environment/.gitkeep     |   1 +
 episodes/evaluation/.gitkeep      |   1 +
 episodes/reproduction/.gitkeep    |   1 +
 episodes/training/.gitkeep        |   1 +
 insights/debugging/.gitkeep       |   1 +
 insights/evaluation/.gitkeep      |   1 +
 insights/planning/.gitkeep        |   1 +
 procedures/debug/.gitkeep         |   1 +
 procedures/experiment/.gitkeep    |   1 +
 procedures/reproduce/.gitkeep     |   1 +
 procedures/setup/.gitkeep         |   1 +
 schemas/memory-record.schema.json |  71 ++++++++++++++
 scripts/validate_records.py       | 158 ++++++++++++++++++++++++++++++
 templates/episode.md              |  29 ++++++
 templates/insight.md              |  28 ++++++
 templates/procedure.md            |  28 ++++++
 21 files changed, 538 insertions(+)
 create mode 100644 .github/pull_request_template.md
 create mode 100644 .github/workflows/validate.yml
 create mode 100644 .gitignore
 create mode 100644 CONTRIBUTING.md
 create mode 100644 README.md
 create mode 100644 episodes/environment/.gitkeep
 create mode 100644 episodes/evaluation/.gitkeep
 create mode 100644 episodes/reproduction/.gitkeep
 create mode 100644 episodes/training/.gitkeep
 create mode 100644 insights/debugging/.gitkeep
 create mode 100644 insights/evaluation/.gitkeep
 create mode 100644 insights/planning/.gitkeep
 create mode 100644 procedures/debug/.gitkeep
 create mode 100644 procedures/experiment/.gitkeep
 create mode 100644 procedures/reproduce/.gitkeep
 create mode 100644 procedures/setup/.gitkeep
 create mode 100644 schemas/memory-record.schema.json
 create mode 100644 scripts/validate_records.py
 create mode 100644 templates/episode.md
 create mode 100644 templates/insight.md
 create mode 100644 templates/procedure.md

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..4325b83
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,25 @@
+## Summary
+
+Describe what memory records are added or updated.
+
+## Record Types
+
+- [ ] episode
+- [ ] procedure
+- [ ] insight
+
+## Checklist
+
+- [ ] Frontmatter required fields are complete.
+- [ ] `type` and folder location are consistent.
+- [ ] `schema_version` is present and valid.
+- [ ] Required sections exist (`Context`, `Reproduce`, `Evidence`, `Failure Boundary`).
+- [ ] Evidence is sufficient for another contributor to reproduce.
+- [ ] Failure boundaries and non-applicable conditions are documented.
+- [ ] No sensitive data or secrets are included.
+- [ ] `python3 scripts/validate_records.py` passes locally.
+
+## Notes for Reviewers
+
+Highlight risk, assumptions, and anything needing extra review.
+
diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
new file mode 100644
index 0000000..e1ea2d5
--- /dev/null
+++ b/.github/workflows/validate.yml
@@ -0,0 +1,24 @@
+name: Validate Records
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - codex/**
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Validate memory records
+        run: python3 scripts/validate_records.py
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..72d042a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.DS_Store
+__pycache__/
+.pytest_cache/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..cf55c43
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,48 @@
+# Contributing
+
+## Branch and PR Policy
+
+1. Create a feature branch from `main`.
+2. Keep one logical topic per pull request.
+3. Do not push directly to `main`.
+
+## What to Contribute
+
+Contribute only reusable memory records:
+- `episode`
+- `procedure`
+- `insight`
+
+Use the correct folder and template:
+- `episodes/*` with `templates/episode.md`
+- `procedures/*` with `templates/procedure.md`
+- `insights/*` with `templates/insight.md`
+
+## Required Checks
+
+Before opening a PR, run:
+
+```bash
+python3 scripts/validate_records.py
+```
+
+Your PR should pass:
+- schema and frontmatter checks
+- required section checks
+- content quality review
+
+## Content Quality Rules
+
+Each record must:
+- provide reproducible steps
+- provide evidence references
+- define applicability and failure boundary
+- avoid sensitive data
+
+## Schema Changes
+
+If you modify record schema:
+1. Update `schemas/memory-record.schema.json`.
+2. Explain migration impact in the PR description.
+3. Keep backward compatibility when possible.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e116583
--- /dev/null
+++ b/README.md
@@ -0,0 +1,113 @@
+# Open Research Memory
+
+Open Research Memory is a shared repository for reusable AI R&D experience records.
+
+The goal is to accumulate evidence-backed memory that other contributors can apply safely across projects.
+
+## Scope
+
+This repository accepts:
+- `verified` episodes
+- `active` procedures
+- `active` insights
+
+This repository does not accept:
+- run-local `working` state
+- unverified noise logs
+- secrets or organization-specific sensitive data
+
+## Repository Structure
+
+```text
+episodes/
+  environment/
+  training/
+  evaluation/
+  reproduction/
+procedures/
+  setup/
+  debug/
+  reproduce/
+  experiment/
+insights/
+  planning/
+  debugging/
+  evaluation/
+templates/
+schemas/
+scripts/
+```
+
+## Record Contract
+
+Each record must be a Markdown file with YAML frontmatter.
+
+Required frontmatter fields:
+- `id`
+- `type` (`episode|procedure|insight`)
+- `status`
+- `title`
+- `tags`
+- `created_at`
+- `updated_at`
+- `confidence`
+- `human_verified`
+- `source_run_id`
+- `schema_version`
+
+Recommended statuses:
+- `draft`
+- `reviewed`
+- `verified`
+- `trusted`
+- `deprecated`
+- `conflicted`
+- `active`
+
+## Minimum Content Requirements
+
+Every shared record must contain these sections:
+1. `Context`
+2. `Reproduce`
+3. `Evidence`
+4. `Failure Boundary`
+
+Use templates from [`templates/`](templates).
+
+## Contribution Workflow
+
+1. Export candidate records from your local workspace.
+2. Place records in the correct folder (`episodes/`, `procedures/`, `insights/`).
+3. Run validation:
+   - `python3 scripts/validate_records.py`
+4. Open a pull request.
+5. Merge only after checks and review pass.
+
+Do not push directly to `main`.
+
+## Validation
+
+Validation script:
+- checks frontmatter required fields
+- checks status/type consistency
+- checks minimum required sections
+- checks confidence and schema version format
+
+Run locally:
+
+```bash
+python3 scripts/validate_records.py
+```
+
+## Review Standard
+
+A record is mergeable only when it is:
+- reproducible by another contributor
+- evidence-backed (logs, metrics, commits, artifacts)
+- explicit about applicability and failure boundary
+- free of sensitive data
+
+## Schema
+
+See [`schemas/memory-record.schema.json`](schemas/memory-record.schema.json) for the machine-readable schema.
+
diff --git a/episodes/environment/.gitkeep b/episodes/environment/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/episodes/environment/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/episodes/evaluation/.gitkeep b/episodes/evaluation/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/episodes/evaluation/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/episodes/reproduction/.gitkeep b/episodes/reproduction/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/episodes/reproduction/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/episodes/training/.gitkeep b/episodes/training/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/episodes/training/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/insights/debugging/.gitkeep b/insights/debugging/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/insights/debugging/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/insights/evaluation/.gitkeep b/insights/evaluation/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/insights/evaluation/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/insights/planning/.gitkeep b/insights/planning/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/insights/planning/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/procedures/debug/.gitkeep b/procedures/debug/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/procedures/debug/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/procedures/experiment/.gitkeep b/procedures/experiment/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/procedures/experiment/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/procedures/reproduce/.gitkeep b/procedures/reproduce/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/procedures/reproduce/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/procedures/setup/.gitkeep b/procedures/setup/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/procedures/setup/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/schemas/memory-record.schema.json b/schemas/memory-record.schema.json
new file mode 100644
index 0000000..9f87785
--- /dev/null
+++ b/schemas/memory-record.schema.json
@@ -0,0 +1,71 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://recursive-forge.github.io/open-research-memory/schemas/memory-record.schema.json",
+  "title": "Open Research Memory Record Metadata",
+  "type": "object",
+  "additionalProperties": true,
+  "properties": {
+    "id": {
+      "type": "string",
+      "minLength": 3
+    },
+    "type": {
+      "type": "string",
+      "enum": ["episode", "procedure", "insight"]
+    },
+    "status": {
+      "type": "string",
+      "enum": ["draft", "reviewed", "verified", "trusted", "deprecated", "conflicted", "active"]
+    },
+    "title": {
+      "type": "string",
+      "minLength": 3
+    },
+    "tags": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "minLength": 1
+      },
+      "minItems": 1
+    },
+    "created_at": {
+      "type": "string",
+      "format": "date-time"
+    },
+    "updated_at": {
+      "type": "string",
+      "format": "date-time"
+    },
+    "confidence": {
+      "type": "number",
+      "minimum": 0,
+      "maximum": 1
+    },
+    "human_verified": {
+      "type": "boolean"
+    },
+    "source_run_id": {
+      "type": "string",
+      "minLength": 1
+    },
+    "schema_version": {
+      "type": "string",
+      "pattern": "^[0-9]+\\.[0-9]+(\\.[0-9]+)?$"
+    }
+  },
+  "required": [
+    "id",
+    "type",
+    "status",
+    "title",
+    "tags",
+    "created_at",
+    "updated_at",
+    "confidence",
+    "human_verified",
+    "source_run_id",
+    "schema_version"
+  ]
+}
+
diff --git a/scripts/validate_records.py b/scripts/validate_records.py
new file mode 100644
index 0000000..e09c1a4
--- /dev/null
+++ b/scripts/validate_records.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+import re
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+RECORD_DIRS = {
+    "episodes": "episode",
+    "procedures": "procedure",
+    "insights": "insight",
+}
+ALLOWED_STATUS = {
+    "draft",
+    "reviewed",
+    "verified",
+    "trusted",
+    "deprecated",
+    "conflicted",
+    "active",
+}
+REQUIRED_SECTIONS = [
+    "Context",
+    "Reproduce",
+    "Evidence",
+    "Failure Boundary",
+]
+
+
+def parse_frontmatter(text: str):
+    if not text.startswith("---\n"):
+        return None, None, "missing frontmatter start delimiter"
+    end = text.find("\n---\n", 4)
+    if end == -1:
+        return None, None, "missing frontmatter end delimiter"
+    fm = text[4:end]
+    body = text[end + 5 :]
+    return fm, body, None
+
+
+def get_value(frontmatter: str, key: str):
+    m = re.search(rf"(?m)^{re.escape(key)}\s*:\s*(.+)\s*$", frontmatter)
+    if not m:
+        return None
+    return m.group(1).strip().strip('"').strip("'")
+
+
+def has_field(frontmatter: str, key: str):
+    return re.search(rf"(?m)^{re.escape(key)}\s*:", frontmatter) is not None
+
+
+def check_sections(body: str):
+    missing = []
+    for section in REQUIRED_SECTIONS:
+        if re.search(rf"(?m)^##\s+{re.escape(section)}\s*$", body) is None:
+            missing.append(section)
+    return missing
+
+
+def check_record(path: Path, expected_type: str):
+    errors = []
+    text = path.read_text(encoding="utf-8")
+
+    frontmatter, body, err = parse_frontmatter(text)
+    if err:
+        return [f"{path}: {err}"]
+
+    required_fields = [
+        "id",
+        "type",
+        "status",
+        "title",
+        "tags",
+        "created_at",
+        "updated_at",
+        "confidence",
+        "human_verified",
+        "source_run_id",
+        "schema_version",
+    ]
+    for field in required_fields:
+        if not has_field(frontmatter, field):
+            errors.append(f"{path}: missing frontmatter field '{field}'")
+
+    declared_type = get_value(frontmatter, "type")
+    if declared_type and declared_type != expected_type:
+        errors.append(
+            f"{path}: type '{declared_type}' does not match folder type '{expected_type}'"
+        )
+
+    status = get_value(frontmatter, "status")
+    if status and status not in ALLOWED_STATUS:
+        errors.append(f"{path}: invalid status '{status}'")
+
+    schema_version = get_value(frontmatter, "schema_version")
+    if schema_version and re.match(r"^[0-9]+\.[0-9]+(\.[0-9]+)?$", schema_version) is None:
+        errors.append(f"{path}: invalid schema_version '{schema_version}'")
+
+    confidence = get_value(frontmatter, "confidence")
+    if confidence:
+        try:
+            value = float(confidence)
+            if value < 0 or value > 1:
+                errors.append(f"{path}: confidence must be in [0,1], got {confidence}")
+        except ValueError:
+            errors.append(f"{path}: confidence is not numeric: {confidence}")
+
+    human_verified = get_value(frontmatter, "human_verified")
+    if human_verified and human_verified not in {"true", "false"}:
+        errors.append(
+            f"{path}: human_verified must be true/false, got '{human_verified}'"
+        )
+
+    tags = get_value(frontmatter, "tags")
+    if tags is not None and tags.strip() in {"[]", ""}:
+        errors.append(f"{path}: tags must not be empty")
+
+    missing_sections = check_sections(body)
+    for section in missing_sections:
+        errors.append(f"{path}: missing required section '## {section}'")
+
+    return errors
+
+
+def main():
+    md_files = []
+    for folder in RECORD_DIRS.keys():
+        root = ROOT / folder
+        if not root.exists():
+            continue
+        md_files.extend(
+            p
+            for p in root.rglob("*.md")
+            if not p.name.startswith(".")
+        )
+
+    if not md_files:
+        print("No record markdown files found. Validation passed.")
+        return 0
+
+    all_errors = []
+    for path in sorted(md_files):
+        top = path.relative_to(ROOT).parts[0]
+        expected_type = RECORD_DIRS[top]
+        all_errors.extend(check_record(path, expected_type))
+
+    if all_errors:
+        print("Validation failed:")
+        for err in all_errors:
+            print(f"- {err}")
+        return 1
+
+    print(f"Validation passed for {len(md_files)} record file(s).")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
diff --git a/templates/episode.md b/templates/episode.md
new file mode 100644
index 0000000..1e4b6a1
--- /dev/null
+++ b/templates/episode.md
@@ -0,0 +1,29 @@
+---
+id: ep_YYYYMMDD_slug
+type: episode
+status: verified
+title: "<short title>"
+project: "<optional>"
+task_type: "<reproduce|debug|eval|train|plan>"
+tags: ["tag1", "tag2"]
+error_signature: "<optional>"
+source_run_id: "run_<id>"
+created_at: "2026-01-01T00:00:00Z"
+updated_at: "2026-01-01T00:00:00Z"
+human_verified: true
+confidence: 0.80
+schema_version: "1.0"
+---
+
+## Context
+Describe scope, assumptions, and where this episode applies.
+
+## Reproduce
+List exact environment and commands to reproduce.
+
+## Evidence
+Attach logs, metrics, commit hashes, and artifacts.
+
+## Failure Boundary
+Describe when this episode should not be reused.
+
diff --git a/templates/insight.md b/templates/insight.md
new file mode 100644
index 0000000..9c661fa
--- /dev/null
+++ b/templates/insight.md
@@ -0,0 +1,28 @@
+---
+id: ins_YYYYMMDD_slug
+type: insight
+status: active
+title: "<short title>"
+project: "<optional or cross-project>"
+task_type: "<optional>"
+tags: ["tag1", "tag2"]
+source_run_id: "run_<id>"
+created_at: "2026-01-01T00:00:00Z"
+updated_at: "2026-01-01T00:00:00Z"
+human_verified: true
+confidence: 0.75
+schema_version: "1.0"
+---
+
+## Context
+Describe where this insight is expected to hold.
+
+## Reproduce
+Describe how to validate this insight in a new task.
+
+## Evidence
+Reference supporting episodes/procedures and metrics.
+
+## Failure Boundary
+List known counterexamples and uncertainty boundary.
+
diff --git a/templates/procedure.md b/templates/procedure.md
new file mode 100644
index 0000000..f313b6b
--- /dev/null
+++ b/templates/procedure.md
@@ -0,0 +1,28 @@
+---
+id: proc_YYYYMMDD_slug
+type: procedure
+status: active
+title: "<short title>"
+project: "<optional>"
+task_type: "<setup|debug|reproduce|experiment>"
+tags: ["tag1", "tag2"]
+source_run_id: "run_<id>"
+created_at: "2026-01-01T00:00:00Z"
+updated_at: "2026-01-01T00:00:00Z"
+human_verified: true
+confidence: 0.85
+schema_version: "1.0"
+---
+
+## Context
+Define scope and prerequisites.
+
+## Reproduce
+Provide ordered SOP steps and commands.
+
+## Evidence
+Link supporting episode IDs and measured outcomes.
+
+## Failure Boundary
+List invalid conditions and escalation triggers.
+