Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .agents/skills/custom-codereview-guide.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
name: custom-codereview-guide
description: Repository-specific guardrails for OpenHands PR review
triggers:
- /codereview
---

# Autonomous Agent Stack - Review Guardrails

You are the first-pass reviewer. Keep this workflow comment-only and focus on actionable issues.

## Review Decisions

- APPROVE only low-risk changes (docs-only, tests-only, non-runtime config edits).
- COMMENT for risky logic changes, design ambiguity, or missing tests.
- REQUEST CHANGES for security boundary violations or regression risk.

## Must-Check Areas

1. Security boundaries
- No bypass of guardrails in `src/security/`, `src/gatekeeper/`, or execution adapters.
- No unsafe command execution paths (`shell=True`, untrusted command composition, hidden runtime writes).
- No weakening of access checks in panel, webhook, gateway, or auth paths.

2. Review-loop integrity
- Diff review must remain comment-only; no autonomous merge, no hidden approval logic.
- Reviewer output should include concrete file/line pointers and minimal reproduction guidance.

3. Reliability and test coverage
- Runtime-path changes should include tests in `tests/`.
- Changes touching workflow/runner code should include failure-path assertions.

4. Repository conventions
- Keep edits scoped and avoid unrelated churn.
- Respect existing linting/formatting conventions.
- Prefer explicit contracts and typed structures where practical.

## Output Style

- Prioritize high-severity issues first.
- Keep comments concise, specific, and actionable.
- Avoid duplicate comments when the same issue appears repeatedly.
10 changes: 10 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Keep merge authority human-owned.
# Enable "Require review from Code Owners" in branch protection/rulesets.

* @srxly888-creator

/.github/** @srxly888-creator
/.agents/skills/** @srxly888-creator
/src/gatekeeper/ @srxly888-creator
/src/autoresearch/agent_protocol/ @srxly888-creator
/src/autoresearch/executions/ @srxly888-creator
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
- "codex/**"
- "feature/**"
pull_request:
merge_group:

jobs:
lint-test-audit:
Expand Down
50 changes: 50 additions & 0 deletions .github/workflows/pr-review-by-openhands.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: PR Review by OpenHands

on:
pull_request:
types: [labeled, review_requested]

permissions:
contents: read
pull-requests: write
issues: write

concurrency:
group: openhands-pr-review-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
pr-review:
# Security posture:
# 1) Trigger only via maintainer-controlled actions (label/reviewer request)
# 2) Skip fork PRs so repository secrets are never used on untrusted code
if: |
github.event.pull_request.head.repo.full_name == github.repository &&
(
github.event.label.name == 'review-this' ||
(
vars.OPENHANDS_REVIEWER_HANDLE &&
github.event.requested_reviewer.login == vars.OPENHANDS_REVIEWER_HANDLE
)
)
runs-on: ubuntu-latest
timeout-minutes: 20
env:
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}

steps:
- name: Check required secret
if: ${{ env.LLM_API_KEY == '' }}
run: |
echo "::error::Missing LLM_API_KEY secret. Add it in Settings -> Secrets and variables -> Actions."
exit 1

- name: Run PR Review (comment-only)
uses: OpenHands/software-agent-sdk/.github/actions/pr-review@3635cda1d62f961e8d6a4f5a8f05a3aaa72d8ad2
with:
llm-model: ${{ vars.OPENHANDS_REVIEW_MODEL || 'anthropic/claude-sonnet-4-5-20250929' }}
llm-base-url: ${{ vars.OPENHANDS_LLM_BASE_URL || '' }}
review-style: ${{ vars.OPENHANDS_REVIEW_STYLE || 'standard' }}
extensions-version: fdd6c7373bdf1c20cfdedb7261ffdbf49d5a76e5
llm-api-key: ${{ env.LLM_API_KEY }}
github-token: ${{ secrets.GITHUB_TOKEN }}
61 changes: 61 additions & 0 deletions .github/workflows/quality-gates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Quality Gates

on:
push:
branches:
- main
- "codex/**"
- "feature/**"
pull_request:
merge_group:

permissions:
contents: read

jobs:
reviewer-gates:
runs-on: ubuntu-latest
timeout-minutes: 20

steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5

- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.lock ]; then
pip install -r requirements.lock
else
pip install -r requirements.txt
fi
pip install -r requirements-review.lock

- name: Mypy (reviewer core modules)
run: |
mypy --config-file mypy.ini \
src/gatekeeper/static_analyzer.py \
src/gatekeeper/business_enforcer.py \
src/gatekeeper/llm_reviewer.py \
src/gatekeeper/board_summarizer.py

- name: Bandit (reviewer core modules)
run: |
bandit -q \
src/gatekeeper/static_analyzer.py \
src/gatekeeper/business_enforcer.py \
src/gatekeeper/llm_reviewer.py \
src/gatekeeper/board_summarizer.py

- name: Semgrep (reviewer core modules)
run: |
semgrep --error --config=p/python \
src/gatekeeper/static_analyzer.py \
src/gatekeeper/business_enforcer.py \
src/gatekeeper/llm_reviewer.py \
src/gatekeeper/board_summarizer.py
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ AEP_FALLBACK_AGENT ?=

.PHONY: help setup doctor start test-quick clean
.PHONY: ai-lab ai-lab-setup ai-lab-check ai-lab-up ai-lab-down ai-lab-status ai-lab-shell ai-lab-run masfactory-flight hygiene-check openhands openhands-dry-run openhands-controlled openhands-controlled-dry-run openhands-demo agent-run
.PHONY: review-gates-local

help:
@echo "Autonomous Agent Stack - common commands"
Expand All @@ -49,6 +50,7 @@ help:
@echo " make openhands-demo OH_BACKEND=mock Run minimal closed-loop demo (contract + failure policy)"
@echo " make agent-run AEP_AGENT=openhands AEP_TASK='...' Run AEP v0 runner entrypoint"
@echo " make hygiene-check FAIL_ON_FINDINGS=1 Run prompt hygiene audit for src/"
@echo " make review-gates-local Run mypy/bandit/semgrep on reviewer core modules"
@echo " make test-quick Run quick smoke tests"
@echo " make clean Remove Python cache folders"
@echo ""
Expand Down Expand Up @@ -150,6 +152,18 @@ hygiene-check:
$(PYTHON) scripts/check_prompt_hygiene.py --root "$(HYGIENE_ROOT)" --output-dir "$(HYGIENE_OUTPUT_DIR)" --min-repeat "$(HYGIENE_MIN_REPEAT)" $$STRICT_FLAG; \
fi

review-gates-local:
@set -euo pipefail; \
MYPY_BIN="mypy"; \
BANDIT_BIN="bandit"; \
SEMGREP_BIN="semgrep"; \
if [[ -x "$(VENV)/bin/mypy" ]]; then MYPY_BIN="$(VENV)/bin/mypy"; fi; \
if [[ -x "$(VENV)/bin/bandit" ]]; then BANDIT_BIN="$(VENV)/bin/bandit"; fi; \
if [[ -x "$(VENV)/bin/semgrep" ]]; then SEMGREP_BIN="$(VENV)/bin/semgrep"; fi; \
$$MYPY_BIN --config-file mypy.ini src/gatekeeper/static_analyzer.py src/gatekeeper/business_enforcer.py src/gatekeeper/llm_reviewer.py src/gatekeeper/board_summarizer.py; \
$$BANDIT_BIN -q src/gatekeeper/static_analyzer.py src/gatekeeper/business_enforcer.py src/gatekeeper/llm_reviewer.py src/gatekeeper/board_summarizer.py; \
$$SEMGREP_BIN --error --config=p/python src/gatekeeper/static_analyzer.py src/gatekeeper/business_enforcer.py src/gatekeeper/llm_reviewer.py src/gatekeeper/board_summarizer.py

openhands:
OPENHANDS_TASK='$(OH_TASK)' OPENHANDS_DRY_RUN='$(OH_DRY_RUN)' bash ./scripts/openhands_start.sh

Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ make openhands-controlled OH_TASK="Create src/demo_math.py with add(a,b), then r
make openhands-demo OH_BACKEND=mock OH_TASK="Create src/demo_math.py with add(a,b)."
make agent-run AEP_AGENT=openhands AEP_TASK="Create src/demo_math.py with add(a,b)."
make hygiene-check
make review-gates-local
```

`make hygiene-check` 会把结果写到 `logs/audit/prompt_hygiene/report.txt` 和 `logs/audit/prompt_hygiene/report.json`。
Expand All @@ -58,6 +59,21 @@ make hygiene-check

`make agent-run` 走 AEP v0 统一执行内核:`JobSpec -> driver adapter -> patch gate -> decision`,OpenHands/Codex/本地脚本都可作为 driver 接入。

`make review-gates-local` 会在本地运行 reviewer 核心模块的 `mypy + bandit + semgrep`,与 CI 的 `Quality Gates` 流程保持一致。

## PR 审查与门禁

- OpenHands 首轮审查(comment-only):`.github/workflows/pr-review-by-openhands.yml`
- 触发方式:默认 `review-this` label;可选通过 `OPENHANDS_REVIEWER_HANDLE` 启用 reviewer 触发
- 安全策略:`pull_request` 事件、仅内部分支 PR(跳过 forks)、最小权限、action/extension 固定 SHA
- 合并策略:按需触发模式下不设为 required status check(仅作为 advisory reviewer)
- 质量门禁:`.github/workflows/quality-gates.yml`
- 检查项:`mypy + bandit + semgrep`(工具版本固定在 `requirements-review.lock`)
- 包含 `merge_group` 触发,兼容 merge queue
- 仓库 required checks 建议:`CI / lint-test-audit` + `Quality Gates / reviewer-gates`

完整落地说明见:[PR Review Hardening](./docs/pr-review-hardening.md)

如果端口冲突:

```bash
Expand Down
57 changes: 57 additions & 0 deletions docs/pr-review-hardening.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# PR Review Hardening (OpenHands + Quality Gates)

This repository now uses a two-layer review setup:

1. AI first-pass reviewer (comment-only)
- Workflow: `.github/workflows/pr-review-by-openhands.yml`
- Trigger:
- Required: PR label `review-this`
- Optional: reviewer request for `OPENHANDS_REVIEWER_HANDLE` (repo variable)
- Safety defaults:
- `pull_request` event (not `pull_request_target`)
- Skip fork PRs (`head.repo.full_name == github.repository`)
- Minimal permissions (`contents: read`, `pull-requests: write`, `issues: write`)
- Human merge remains mandatory
- OpenHands action pinned to full commit SHA
- OpenHands extensions pinned to full commit SHA
- Required check policy:
- Do not mark this workflow as required while it is on-demand (`review-this` / optional reviewer request trigger).
- Keep it advisory, not merge-blocking.

2. Hard quality gates for reviewer core
- Workflow: `.github/workflows/quality-gates.yml`
- Checks:
- `mypy` (typed consistency)
- `bandit` (security anti-patterns)
- `semgrep` (rule-based bug/security checks)
- Trigger includes `merge_group` for merge queue compatibility.

## One-time repository setup

1. Add Actions secret:
- `LLM_API_KEY`

2. Create label:
- `review-this`

3. Optional repository variables:
- `OPENHANDS_REVIEW_MODEL`
- `OPENHANDS_REVIEW_STYLE` (`standard` or `roasted`)
- `OPENHANDS_LLM_BASE_URL`
- `OPENHANDS_REVIEWER_HANDLE` (optional reviewer login; leave empty to run label-only)

4. Configure merge protection on `main`
- Pick one as primary control plane: ruleset or branch protection (avoid duplicate management drift).
- Require pull request before merge
- Require status checks:
- Use job names/check contexts (not workflow filenames)
- `CI / lint-test-audit` (main CI line; matrix variants may appear in UI)
- `Quality Gates / reviewer-gates`
- Require review from Code Owners
- Restrict direct pushes to `main`

## Notes

- `CODEOWNERS` is in `.github/CODEOWNERS` and should be kept in sync with maintainers.
- Repository-specific review policy is in `.agents/skills/custom-codereview-guide.md`.
- If merge queue is enabled, keep both `CI` and `Quality Gates` workflows listening on `merge_group`.
7 changes: 7 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[mypy]
python_version = 3.10
explicit_package_bases = True
ignore_missing_imports = True
follow_imports = silent
warn_redundant_casts = True
warn_unused_ignores = True
4 changes: 4 additions & 0 deletions requirements-review.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Pinned review gate tooling for CI reproducibility
mypy==1.19.1
bandit==1.9.4
semgrep==1.156.0
4 changes: 2 additions & 2 deletions src/gatekeeper/board_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
3. 极简卡片 UI
"""

from typing import Dict, List
from typing import Dict, List, Optional
from dataclasses import dataclass
from src.gatekeeper.llm_reviewer import LLM_Diff_Reviewer, LLMReview

Expand All @@ -23,7 +23,7 @@ class ReviewCard:
class Board_Summarizer:
"""UI 汇报简化器"""

def __init__(self, llm_reviewer: LLM_Diff_Reviewer = None):
def __init__(self, llm_reviewer: Optional[LLM_Diff_Reviewer] = None):
"""初始化

Args:
Expand Down
14 changes: 7 additions & 7 deletions src/gatekeeper/static_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import ast
import re
from typing import Dict, List
from typing import Any, Dict, List
from dataclasses import dataclass
from enum import Enum

Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(self):

async def analyze_pr(self, pr_diff: str) -> Dict:
"""分析 PR 代码"""
result = {
result: Dict[str, Any] = {
"safe": True,
"violations": [],
"ast_analysis": None,
Expand All @@ -67,7 +67,7 @@ async def analyze_pr(self, pr_diff: str) -> Dict:
for pattern, description in self.forbidden_patterns:
matches = re.finditer(pattern, pr_diff, re.IGNORECASE | re.MULTILINE)
for match in matches:
violation = {
violation: Dict[str, Any] = {
"type": "forbidden_pattern",
"description": f"[Security Reject] 检测到越权调用: {description}",
"line_number": pr_diff[:match.start()].count('\n') + 1,
Expand Down Expand Up @@ -95,13 +95,13 @@ async def analyze_pr(self, pr_diff: str) -> Dict:
for node in ast.walk(tree):
if isinstance(node, ast.Call):
if self._is_dangerous_call(node):
violation = {
dangerous_violation: Dict[str, Any] = {
"type": "dangerous_call",
"description": f"[Security Reject] 检测到危险函数调用: {ast.dump(node)}",
"line_number": node.lineno,
"severity": SecurityLevel.LOW.value,
}
result["violations"].append(violation)
result["violations"].append(dangerous_violation)
result["safe"] = False
except SyntaxError as e:
# 语法错误不一定是安全问题,可能只是 diff 格式问题
Expand All @@ -118,9 +118,9 @@ async def analyze_pr(self, pr_diff: str) -> Dict:

return result

def _analyze_ast(self, tree: ast.AST) -> Dict:
def _analyze_ast(self, tree: ast.AST) -> Dict[str, List[str]]:
"""AST 深度分析"""
analysis = {
analysis: Dict[str, List[str]] = {
"imports": [],
"function_calls": [],
"modifications": [],
Expand Down
Loading