diff --git a/.agents/skills/custom-codereview-guide.md b/.agents/skills/custom-codereview-guide.md new file mode 100644 index 00000000..75b44ede --- /dev/null +++ b/.agents/skills/custom-codereview-guide.md @@ -0,0 +1,42 @@ +--- +name: custom-codereview-guide +description: Repository-specific guardrails for OpenHands PR review +triggers: + - /codereview +--- + +# Autonomous Agent Stack - Review Guardrails + +You are the first-pass reviewer. Keep this workflow comment-only and focus on actionable issues. + +## Review Decisions + +- APPROVE only low-risk changes (docs-only, tests-only, non-runtime config edits). +- COMMENT for risky logic changes, design ambiguity, or missing tests. +- REQUEST CHANGES for security boundary violations or regression risk. + +## Must-Check Areas + +1. Security boundaries +- No bypass of guardrails in `src/security/`, `src/gatekeeper/`, or execution adapters. +- No unsafe command execution paths (`shell=True`, untrusted command composition, hidden runtime writes). +- No weakening of access checks in panel, webhook, gateway, or auth paths. + +2. Review-loop integrity +- Diff review must remain comment-only; no autonomous merge, no hidden approval logic. +- Reviewer output should include concrete file/line pointers and minimal reproduction guidance. + +3. Reliability and test coverage +- Runtime-path changes should include tests in `tests/`. +- Changes touching workflow/runner code should include failure-path assertions. + +4. Repository conventions +- Keep edits scoped and avoid unrelated churn. +- Respect existing linting/formatting conventions. +- Prefer explicit contracts and typed structures where practical. + +## Output Style + +- Prioritize high-severity issues first. +- Keep comments concise, specific, and actionable. +- Avoid duplicate comments when the same issue appears repeatedly. diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..5fd8da13 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,10 @@ +# Keep merge authority human-owned. +# Enable "Require review from Code Owners" in branch protection/rulesets. + +* @srxly888-creator + +/.github/** @srxly888-creator +/.agents/skills/** @srxly888-creator +/src/gatekeeper/ @srxly888-creator +/src/autoresearch/agent_protocol/ @srxly888-creator +/src/autoresearch/executions/ @srxly888-creator diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 249dc551..3986122c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: - "codex/**" - "feature/**" pull_request: + merge_group: jobs: lint-test-audit: diff --git a/.github/workflows/pr-review-by-openhands.yml b/.github/workflows/pr-review-by-openhands.yml new file mode 100644 index 00000000..203ac641 --- /dev/null +++ b/.github/workflows/pr-review-by-openhands.yml @@ -0,0 +1,50 @@ +name: PR Review by OpenHands + +on: + pull_request: + types: [labeled, review_requested] + +permissions: + contents: read + pull-requests: write + issues: write + +concurrency: + group: openhands-pr-review-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + pr-review: + # Security posture: + # 1) Trigger only via maintainer-controlled actions (label/reviewer request) + # 2) Skip fork PRs so repository secrets are never used on untrusted code + if: | + github.event.pull_request.head.repo.full_name == github.repository && + ( + github.event.label.name == 'review-this' || + ( + vars.OPENHANDS_REVIEWER_HANDLE && + github.event.requested_reviewer.login == vars.OPENHANDS_REVIEWER_HANDLE + ) + ) + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + + steps: + - name: Check required secret + if: ${{ env.LLM_API_KEY == '' }} + run: | + echo "::error::Missing LLM_API_KEY secret. Add it in Settings -> Secrets and variables -> Actions." + exit 1 + + - name: Run PR Review (comment-only) + uses: OpenHands/software-agent-sdk/.github/actions/pr-review@3635cda1d62f961e8d6a4f5a8f05a3aaa72d8ad2 + with: + llm-model: ${{ vars.OPENHANDS_REVIEW_MODEL || 'anthropic/claude-sonnet-4-5-20250929' }} + llm-base-url: ${{ vars.OPENHANDS_LLM_BASE_URL || '' }} + review-style: ${{ vars.OPENHANDS_REVIEW_STYLE || 'standard' }} + extensions-version: fdd6c7373bdf1c20cfdedb7261ffdbf49d5a76e5 + llm-api-key: ${{ env.LLM_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml new file mode 100644 index 00000000..e2caa748 --- /dev/null +++ b/.github/workflows/quality-gates.yml @@ -0,0 +1,61 @@ +name: Quality Gates + +on: + push: + branches: + - main + - "codex/**" + - "feature/**" + pull_request: + merge_group: + +permissions: + contents: read + +jobs: + reviewer-gates: + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + - name: Checkout + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + + - name: Setup Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.lock ]; then + pip install -r requirements.lock + else + pip install -r requirements.txt + fi + pip install -r requirements-review.lock + + - name: Mypy (reviewer core modules) + run: | + mypy --config-file mypy.ini \ + src/gatekeeper/static_analyzer.py \ + src/gatekeeper/business_enforcer.py \ + src/gatekeeper/llm_reviewer.py \ + src/gatekeeper/board_summarizer.py + + - name: Bandit (reviewer core modules) + run: | + bandit -q \ + src/gatekeeper/static_analyzer.py \ + src/gatekeeper/business_enforcer.py \ + src/gatekeeper/llm_reviewer.py \ + src/gatekeeper/board_summarizer.py + + - name: Semgrep (reviewer core modules) + run: | + semgrep --error --config=p/python \ + src/gatekeeper/static_analyzer.py \ + src/gatekeeper/business_enforcer.py \ + src/gatekeeper/llm_reviewer.py \ + src/gatekeeper/board_summarizer.py diff --git a/Makefile b/Makefile index b89f6a4c..276344ba 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ AEP_FALLBACK_AGENT ?= .PHONY: help setup doctor start test-quick clean .PHONY: ai-lab ai-lab-setup ai-lab-check ai-lab-up ai-lab-down ai-lab-status ai-lab-shell ai-lab-run masfactory-flight hygiene-check openhands openhands-dry-run openhands-controlled openhands-controlled-dry-run openhands-demo agent-run +.PHONY: review-gates-local help: @echo "Autonomous Agent Stack - common commands" @@ -49,6 +50,7 @@ help: @echo " make openhands-demo OH_BACKEND=mock Run minimal closed-loop demo (contract + failure policy)" @echo " make agent-run AEP_AGENT=openhands AEP_TASK='...' Run AEP v0 runner entrypoint" @echo " make hygiene-check FAIL_ON_FINDINGS=1 Run prompt hygiene audit for src/" + @echo " make review-gates-local Run mypy/bandit/semgrep on reviewer core modules" @echo " make test-quick Run quick smoke tests" @echo " make clean Remove Python cache folders" @echo "" @@ -150,6 +152,18 @@ hygiene-check: $(PYTHON) scripts/check_prompt_hygiene.py --root "$(HYGIENE_ROOT)" --output-dir "$(HYGIENE_OUTPUT_DIR)" --min-repeat "$(HYGIENE_MIN_REPEAT)" $$STRICT_FLAG; \ fi +review-gates-local: + @set -euo pipefail; \ + MYPY_BIN="mypy"; \ + BANDIT_BIN="bandit"; \ + SEMGREP_BIN="semgrep"; \ + if [[ -x "$(VENV)/bin/mypy" ]]; then MYPY_BIN="$(VENV)/bin/mypy"; fi; \ + if [[ -x "$(VENV)/bin/bandit" ]]; then BANDIT_BIN="$(VENV)/bin/bandit"; fi; \ + if [[ -x "$(VENV)/bin/semgrep" ]]; then SEMGREP_BIN="$(VENV)/bin/semgrep"; fi; \ + $$MYPY_BIN --config-file mypy.ini src/gatekeeper/static_analyzer.py src/gatekeeper/business_enforcer.py src/gatekeeper/llm_reviewer.py src/gatekeeper/board_summarizer.py; \ + $$BANDIT_BIN -q src/gatekeeper/static_analyzer.py src/gatekeeper/business_enforcer.py src/gatekeeper/llm_reviewer.py src/gatekeeper/board_summarizer.py; \ + $$SEMGREP_BIN --error --config=p/python src/gatekeeper/static_analyzer.py src/gatekeeper/business_enforcer.py src/gatekeeper/llm_reviewer.py src/gatekeeper/board_summarizer.py + openhands: OPENHANDS_TASK='$(OH_TASK)' OPENHANDS_DRY_RUN='$(OH_DRY_RUN)' bash ./scripts/openhands_start.sh diff --git a/README.md b/README.md index 165e1302..66236be9 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ make openhands-controlled OH_TASK="Create src/demo_math.py with add(a,b), then r make openhands-demo OH_BACKEND=mock OH_TASK="Create src/demo_math.py with add(a,b)." make agent-run AEP_AGENT=openhands AEP_TASK="Create src/demo_math.py with add(a,b)." make hygiene-check +make review-gates-local ``` `make hygiene-check` 会把结果写到 `logs/audit/prompt_hygiene/report.txt` 和 `logs/audit/prompt_hygiene/report.json`。 @@ -58,6 +59,21 @@ make hygiene-check `make agent-run` 走 AEP v0 统一执行内核:`JobSpec -> driver adapter -> patch gate -> decision`,OpenHands/Codex/本地脚本都可作为 driver 接入。 +`make review-gates-local` 会在本地运行 reviewer 核心模块的 `mypy + bandit + semgrep`,与 CI 的 `Quality Gates` 流程保持一致。 + +## PR 审查与门禁 + +- OpenHands 首轮审查(comment-only):`.github/workflows/pr-review-by-openhands.yml` + - 触发方式:默认 `review-this` label;可选通过 `OPENHANDS_REVIEWER_HANDLE` 启用 reviewer 触发 + - 安全策略:`pull_request` 事件、仅内部分支 PR(跳过 forks)、最小权限、action/extension 固定 SHA + - 合并策略:按需触发模式下不设为 required status check(仅作为 advisory reviewer) +- 质量门禁:`.github/workflows/quality-gates.yml` + - 检查项:`mypy + bandit + semgrep`(工具版本固定在 `requirements-review.lock`) + - 包含 `merge_group` 触发,兼容 merge queue +- 仓库 required checks 建议:`CI / lint-test-audit` + `Quality Gates / reviewer-gates` + +完整落地说明见:[PR Review Hardening](./docs/pr-review-hardening.md) + 如果端口冲突: ```bash diff --git a/docs/pr-review-hardening.md b/docs/pr-review-hardening.md new file mode 100644 index 00000000..89675eb9 --- /dev/null +++ b/docs/pr-review-hardening.md @@ -0,0 +1,57 @@ +# PR Review Hardening (OpenHands + Quality Gates) + +This repository now uses a two-layer review setup: + +1. AI first-pass reviewer (comment-only) +- Workflow: `.github/workflows/pr-review-by-openhands.yml` +- Trigger: + - Required: PR label `review-this` + - Optional: reviewer request for `OPENHANDS_REVIEWER_HANDLE` (repo variable) +- Safety defaults: + - `pull_request` event (not `pull_request_target`) + - Skip fork PRs (`head.repo.full_name == github.repository`) + - Minimal permissions (`contents: read`, `pull-requests: write`, `issues: write`) + - Human merge remains mandatory + - OpenHands action pinned to full commit SHA + - OpenHands extensions pinned to full commit SHA +- Required check policy: + - Do not mark this workflow as required while it is on-demand (`review-this` / optional reviewer request trigger). + - Keep it advisory, not merge-blocking. + +2. Hard quality gates for reviewer core +- Workflow: `.github/workflows/quality-gates.yml` +- Checks: + - `mypy` (typed consistency) + - `bandit` (security anti-patterns) + - `semgrep` (rule-based bug/security checks) + - Trigger includes `merge_group` for merge queue compatibility. + +## One-time repository setup + +1. Add Actions secret: +- `LLM_API_KEY` + +2. Create label: +- `review-this` + +3. Optional repository variables: +- `OPENHANDS_REVIEW_MODEL` +- `OPENHANDS_REVIEW_STYLE` (`standard` or `roasted`) +- `OPENHANDS_LLM_BASE_URL` +- `OPENHANDS_REVIEWER_HANDLE` (optional reviewer login; leave empty to run label-only) + +4. Configure merge protection on `main` +- Pick one as primary control plane: ruleset or branch protection (avoid duplicate management drift). +- Require pull request before merge +- Require status checks: + - Use job names/check contexts (not workflow filenames) + - `CI / lint-test-audit` (main CI line; matrix variants may appear in UI) + - `Quality Gates / reviewer-gates` +- Require review from Code Owners +- Restrict direct pushes to `main` + +## Notes + +- `CODEOWNERS` is in `.github/CODEOWNERS` and should be kept in sync with maintainers. +- Repository-specific review policy is in `.agents/skills/custom-codereview-guide.md`. +- If merge queue is enabled, keep both `CI` and `Quality Gates` workflows listening on `merge_group`. diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..26322927 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,7 @@ +[mypy] +python_version = 3.10 +explicit_package_bases = True +ignore_missing_imports = True +follow_imports = silent +warn_redundant_casts = True +warn_unused_ignores = True diff --git a/requirements-review.lock b/requirements-review.lock new file mode 100644 index 00000000..cbf02967 --- /dev/null +++ b/requirements-review.lock @@ -0,0 +1,4 @@ +# Pinned review gate tooling for CI reproducibility +mypy==1.19.1 +bandit==1.9.4 +semgrep==1.156.0 diff --git a/src/gatekeeper/board_summarizer.py b/src/gatekeeper/board_summarizer.py index 8f13f190..e4544ec8 100644 --- a/src/gatekeeper/board_summarizer.py +++ b/src/gatekeeper/board_summarizer.py @@ -7,7 +7,7 @@ 3. 极简卡片 UI """ -from typing import Dict, List +from typing import Dict, List, Optional from dataclasses import dataclass from src.gatekeeper.llm_reviewer import LLM_Diff_Reviewer, LLMReview @@ -23,7 +23,7 @@ class ReviewCard: class Board_Summarizer: """UI 汇报简化器""" - def __init__(self, llm_reviewer: LLM_Diff_Reviewer = None): + def __init__(self, llm_reviewer: Optional[LLM_Diff_Reviewer] = None): """初始化 Args: diff --git a/src/gatekeeper/static_analyzer.py b/src/gatekeeper/static_analyzer.py index 8c13c7fe..96b7ce22 100644 --- a/src/gatekeeper/static_analyzer.py +++ b/src/gatekeeper/static_analyzer.py @@ -12,7 +12,7 @@ import ast import re -from typing import Dict, List +from typing import Any, Dict, List from dataclasses import dataclass from enum import Enum @@ -56,7 +56,7 @@ def __init__(self): async def analyze_pr(self, pr_diff: str) -> Dict: """分析 PR 代码""" - result = { + result: Dict[str, Any] = { "safe": True, "violations": [], "ast_analysis": None, @@ -67,7 +67,7 @@ async def analyze_pr(self, pr_diff: str) -> Dict: for pattern, description in self.forbidden_patterns: matches = re.finditer(pattern, pr_diff, re.IGNORECASE | re.MULTILINE) for match in matches: - violation = { + violation: Dict[str, Any] = { "type": "forbidden_pattern", "description": f"[Security Reject] 检测到越权调用: {description}", "line_number": pr_diff[:match.start()].count('\n') + 1, @@ -95,13 +95,13 @@ async def analyze_pr(self, pr_diff: str) -> Dict: for node in ast.walk(tree): if isinstance(node, ast.Call): if self._is_dangerous_call(node): - violation = { + dangerous_violation: Dict[str, Any] = { "type": "dangerous_call", "description": f"[Security Reject] 检测到危险函数调用: {ast.dump(node)}", "line_number": node.lineno, "severity": SecurityLevel.LOW.value, } - result["violations"].append(violation) + result["violations"].append(dangerous_violation) result["safe"] = False except SyntaxError as e: # 语法错误不一定是安全问题,可能只是 diff 格式问题 @@ -118,9 +118,9 @@ async def analyze_pr(self, pr_diff: str) -> Dict: return result - def _analyze_ast(self, tree: ast.AST) -> Dict: + def _analyze_ast(self, tree: ast.AST) -> Dict[str, List[str]]: """AST 深度分析""" - analysis = { + analysis: Dict[str, List[str]] = { "imports": [], "function_calls": [], "modifications": [],