sourcegraph
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile‎
Lines changed: 30 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile.artifact_only‎
Lines changed: 31 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile.artifact_only‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile.sg_only‎
Lines changed: 33 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/environment/Dockerfile.sg_only‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/instruction.md‎
Lines changed: 45 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/instruction.md‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/instruction_mcp.md‎
Lines changed: 120 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/instruction_mcp.md‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/task.toml‎
Lines changed: 28 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/task.toml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/tests/eval.sh‎
Lines changed: 68 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/tests/eval.sh‎
Lines changed: 68 additions & 0 deletions
@@ -0,0 +1,30 @@
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Base tools
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    ca-certificates \
+    curl \
+    python3 \
+    g++ make \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace
+
+# Clone local checkout repos (baseline config: agent has local access to these)
+RUN git clone --depth 1 --branch v1.31.2 https://github.com/sg-benchmarks/envoy--v1.31.2 /workspace/envoy--v1.31.2
+RUN git clone --depth 1 --branch 84e84367 https://github.com/sg-benchmarks/data-plane-api--84e84367 /workspace/data-plane-api--84e84367
+RUN git clone --depth 1 --branch 71637ad6 https://github.com/sg-benchmarks/go-control-plane--71637ad6 /workspace/go-control-plane--71637ad6
+RUN git clone --depth 1 --branch 957dba5e https://github.com/sg-benchmarks/grpc--957dba5e /workspace/grpc--957dba5e
+
+# Initialize git identity for agent commits
+RUN git config --global user.email "agent@example.com" && \
+    git config --global user.name "Agent" && \
+    git config --global safe.directory '*'
+
+# Create log directories
+RUN mkdir -p /logs/agent /logs/verifier
+
+ENTRYPOINT []
@@ -0,0 +1,31 @@
+# ccx-compliance-052 — artifact_only variant
+# No local repo clone — agent uses Sourcegraph MCP exclusively for code access.
+# Agent produces answer.json artifact; verifier scores the artifact.
+
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV SOURCEGRAPH_REPOS="sg-benchmarks/envoy--v1.31.2,sg-benchmarks/data-plane-api--84e84367,sg-benchmarks/go-control-plane--71637ad6,sg-benchmarks/grpc--957dba5e"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    ca-certificates \
+    python3 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace
+
+# Empty workspace — agent discovers code via MCP tools only
+RUN git init && \
+    git config user.email "agent@example.com" && \
+    git config user.name "Agent" && \
+    git config --global safe.directory '*'
+
+# Create log directories
+RUN mkdir -p /logs/agent /logs/verifier
+
+# Mark artifact-only mode — verifiers and eval scripts check this flag
+RUN touch /tmp/.artifact_only_mode
+
+ENTRYPOINT []
@@ -0,0 +1,33 @@
+# CCX-compliance-052 — sg_only variant
+# No local repo clone — agent uses Sourcegraph MCP exclusively for code access.
+# The verifier clones mirror repos at verification time (no /repo_full/ backup).
+
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV SOURCEGRAPH_REPOS="sg-benchmarks/envoy--v1.31.2,sg-benchmarks/data-plane-api--84e84367,sg-benchmarks/go-control-plane--71637ad6,sg-benchmarks/grpc--957dba5e"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    ca-certificates \
+    python3 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace
+
+# Empty workspace — agent discovers code via MCP tools only
+RUN git init && \
+    git config user.email "agent@example.com" && \
+    git config user.name "Agent" && \
+    git config --global safe.directory '*'
+
+# Create log directories
+RUN mkdir -p /logs/agent /logs/verifier
+
+# Mark sg_only mode — verifiers and eval scripts check this flag
+RUN touch /tmp/.sg_only_mode
+
+RUN echo '{"workdir":"/workspace","repos":[{"mirror":"sg-benchmarks/envoy--v1.31.2","target_dir":"envoy"},{"mirror":"sg-benchmarks/data-plane-api--84e84367","target_dir":"data-plane-api"},{"mirror":"sg-benchmarks/go-control-plane--71637ad6","target_dir":"go-control-plane"},{"mirror":"sg-benchmarks/grpc--957dba5e","target_dir":"grpc"}]}' > /tmp/.sg_only_clone_manifest.json
+
+ENTRYPOINT []
@@ -0,0 +1,45 @@
+# Compliance Audit: Envoy Access Logging Configuration Coverage
+
+## Your Task
+
+Audit the access logging infrastructure in `envoyproxy/envoy`. Find all C++ source files under `source/extensions/access_loggers/` and `source/common/access_log/` that define access log filter implementations or access log sink implementations. Also find the corresponding `.proto` definitions in `envoyproxy/data-plane-api` under `envoy/extensions/access_loggers/`. For each implementation, report whether it supports structured (JSON) logging or only unstructured text logging.
+
+## Context
+
+You are working on a codebase task involving repos from the compliance domain.
+
+## Available Resources
+
+The local `/workspace/` directory contains: sg-benchmarks/envoy--v1.31.2, sg-benchmarks/data-plane-api--84e84367, sg-benchmarks/go-control-plane--71637ad6, sg-benchmarks/grpc--957dba5e.
+
+**Note:** Additional repositories are accessible via Sourcegraph MCP tools:
+- `sg-benchmarks/envoy--v1.31.2` (envoyproxy/envoy)
+- `sg-benchmarks/data-plane-api--84e84367` (envoyproxy/data-plane-api)
+- `sg-benchmarks/go-control-plane--71637ad6` (envoyproxy/go-control-plane)
+- `sg-benchmarks/grpc--957dba5e` (grpc/grpc)
+
+## Output Format
+
+Create a file at `/workspace/answer.json` with your findings in the following structure:
+
+```json
+{
+  "files": [
+    {"repo": "org/repo-name", "path": "relative/path/to/file.cpp"}
+  ],
+  "symbols": [
+    {"repo": "org/repo-name", "path": "relative/path/to/file.cpp", "symbol": "SymbolName"}
+  ],
+  "chain": [
+    {"repo": "org/repo-name", "path": "relative/path/to/file.cpp", "symbol": "FunctionName"}
+  ],
+  "text": "Narrative explanation of your findings, citing repos and file paths."
+}
+```
+
+Include only the fields relevant to this task. Your answer is evaluated against a closed-world oracle — completeness matters.
+
+## Evaluation
+
+Your answer will be scored on:
+- **File recall and precision**: Did you find all relevant files?
@@ -0,0 +1,120 @@
+# IMPORTANT: Source Code Access
+
+**Local source files are not present.** Your workspace does not contain source code. You **MUST** use Sourcegraph MCP tools to discover, read, and understand code before making any changes.
+
+**Target Repositories (version-pinned mirrors):**
+
+- `github.com/sg-benchmarks/envoy--v1.31.2` — use `repo:^github.com/sg-benchmarks/envoy--v1.31.2$` filter
+- `github.com/sg-benchmarks/data-plane-api--84e84367` — use `repo:^github.com/sg-benchmarks/data-plane-api--84e84367$` filter
+- `github.com/sg-benchmarks/go-control-plane--71637ad6` — use `repo:^github.com/sg-benchmarks/go-control-plane--71637ad6$` filter
+- `github.com/sg-benchmarks/grpc--957dba5e` — use `repo:^github.com/sg-benchmarks/grpc--957dba5e$` filter
+
+Scope ALL keyword_search/nls_search queries to these repos.
+Use the repo name as the `repo` parameter for read_file/go_to_definition/find_references.
+
+
+## Required Workflow
+
+1. **Search first** — Use MCP tools to find relevant files and understand existing patterns
+2. **Read remotely** — Use `sg_read_file` to read full file contents from Sourcegraph
+3. **Edit locally** — Use Edit, Write, and Bash to create or modify files in your working directory
+4. **Verify locally** — Run tests with Bash to check your changes
+
+## Tool Selection
+
+| Goal | Tool |
+|------|------|
+| Exact symbol/string | `sg_keyword_search` |
+| Concepts/semantic search | `sg_nls_search` |
+| Trace usage/callers | `sg_find_references` |
+| See implementation | `sg_go_to_definition` |
+| Read full file | `sg_read_file` |
+| Browse structure | `sg_list_files` |
+| Find repos | `sg_list_repos` |
+| Search commits | `sg_commit_search` |
+| Track changes | `sg_diff_search` |
+| Compare versions | `sg_compare_revisions` |
+
+**Decision logic:**
+1. Know the exact symbol? -> `sg_keyword_search`
+2. Know the concept, not the name? -> `sg_nls_search`
+3. Need definition of a symbol? -> `sg_go_to_definition`
+4. Need all callers/references? -> `sg_find_references`
+5. Need full file content? -> `sg_read_file`
+
+## Scoping (Always Do This)
+
+```
+repo:^github.com/ORG/REPO$           # Exact repo (preferred)
+repo:github.com/ORG/                 # All repos in org
+file:.*\.ts$                         # TypeScript only
+file:src/api/                        # Specific directory
+```
+
+Start narrow. Expand only if results are empty.
+
+## Efficiency Rules
+
+- Chain searches logically: search -> read -> references -> definition
+- Don't re-search for the same pattern; use results from prior calls
+- Prefer `sg_keyword_search` over `sg_nls_search` when you have exact terms
+- Read 2-3 related files before synthesising, rather than one at a time
+- Don't read 20+ remote files without writing code — once you understand the pattern, start implementing
+
+## If Stuck
+
+If MCP search returns no results:
+1. Broaden the search query (synonyms, partial identifiers)
+2. Try `sg_nls_search` for semantic matching
+3. Use `sg_list_files` to browse the directory structure
+4. Use `sg_list_repos` to verify the repository name
+
+---
+
+**Sourcegraph Repositories:** `github.com/sg-benchmarks/envoy--v1.31.2`, `github.com/sg-benchmarks/data-plane-api--84e84367`, `github.com/sg-benchmarks/go-control-plane--71637ad6`, `github.com/sg-benchmarks/grpc--957dba5e`
+
+# Compliance Audit: Envoy Access Logging Configuration Coverage
+
+## Your Task
+
+Audit the access logging infrastructure in `envoyproxy/envoy`. Find all C++ source files under `source/extensions/access_loggers/` and `source/common/access_log/` that define access log filter implementations or access log sink implementations. Also find the corresponding `.proto` definitions in `envoyproxy/data-plane-api` under `envoy/extensions/access_loggers/`. For each implementation, report whether it supports structured (JSON) logging or only unstructured text logging.
+
+## Context
+
+You are working on a codebase task involving repos from the compliance domain.
+
+## Available Resources
+
+The local `/workspace/` directory contains: sg-benchmarks/envoy--v1.31.2, sg-benchmarks/data-plane-api--84e84367, sg-benchmarks/go-control-plane--71637ad6, sg-benchmarks/grpc--957dba5e.
+
+**Note:** Additional repositories are accessible via Sourcegraph MCP tools:
+- `sg-benchmarks/envoy--v1.31.2` (envoyproxy/envoy)
+- `sg-benchmarks/data-plane-api--84e84367` (envoyproxy/data-plane-api)
+- `sg-benchmarks/go-control-plane--71637ad6` (envoyproxy/go-control-plane)
+- `sg-benchmarks/grpc--957dba5e` (grpc/grpc)
+
+## Output Format
+
+Create a file at `/workspace/answer.json` with your findings in the following structure:
+
+```json
+{
+  "files": [
+    {"repo": "org/repo-name", "path": "relative/path/to/file.go"}
+  ],
+  "symbols": [
+    {"repo": "org/repo-name", "path": "relative/path/to/file.go", "symbol": "SymbolName"}
+  ],
+  "chain": [
+    {"repo": "org/repo-name", "path": "relative/path/to/file.go", "symbol": "FunctionName"}
+  ],
+  "text": "Narrative explanation of your findings, citing repos and file paths."
+}
+```
+
+Include only the fields relevant to this task. Your answer is evaluated against a closed-world oracle — completeness matters.
+
+## Evaluation
+
+Your answer will be scored on:
+- **File recall and precision**: Did you find all relevant files?
@@ -0,0 +1,28 @@
+version = "1.0"
+
+[metadata]
+name = "CCX-compliance-052"
+description = "Compliance Audit: Envoy Access Logging Configuration Coverage"
+license = "Apache-2.0"
+
+[task]
+id = "CCX-compliance-052"
+repo = "sg-benchmarks/envoy--v1.31.2"
+category = "compliance-audit"
+language = "c++"
+difficulty = "hard"
+time_limit_sec = 900
+mcp_suite = "ccb_mcp_compliance"
+use_case_id = 52
+repo_set_id = "envoy-service-mesh"
+mcp_unique = true
+
+[verification]
+type = "test"
+command = "bash /tests/eval.sh"
+
+reward_type = "score"
+description = "Compliance Audit: Envoy Access Logging Configuration Coverage"
+
+[environment]
+build_timeout_sec = 600.0
@@ -0,0 +1,68 @@
+#!/bin/bash
+# eval.sh — MCP-unique benchmark evaluator for CCX-compliance-052
+# Exit-code-first (SWE-Factory pattern):
+#   exit 0 — agent produced useful output (composite score > 0)
+#   exit 1 — total failure (composite score == 0 or missing answer)
+#
+# Writes /logs/verifier/reward.txt with the composite score [0.0, 1.0]
+
+set -euo pipefail
+
+TASK_ID="CCX-compliance-052"
+ANSWER_PATH="/workspace/answer.json"
+TASK_SPEC_PATH="/tests/task_spec.json"
+ORACLE_CHECKS="/tests/oracle_checks.py"
+REWARD_PATH="/logs/verifier/reward.txt"
+
+mkdir -p /logs/verifier
+
+echo "=== CCX-compliance-052 evaluator ==="
+echo "Task spec: $TASK_SPEC_PATH"
+echo "Answer:    $ANSWER_PATH"
+echo ""
+
+# sg_only mode guard: restore full repo if verifier wrapper exists
+if [ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ]; then
+    echo "sg_only mode: sourcing verifier wrapper..."
+    source /tests/sgonly_verifier_wrapper.sh
+fi
+
+# Verify answer file exists
+if [ ! -f "$ANSWER_PATH" ]; then
+    echo "ERROR: answer.json not found at $ANSWER_PATH"
+    echo "0.0" > "$REWARD_PATH"
+    exit 1
+fi
+
+# Validate answer is valid JSON
+if ! python3 -c "import json; json.load(open('$ANSWER_PATH'))" 2>/dev/null; then
+    echo "ERROR: answer.json is not valid JSON"
+    echo "0.0" > "$REWARD_PATH"
+    exit 1
+fi
+
+echo "answer.json found and valid JSON"
+
+# Run oracle checks
+if [ ! -f "$ORACLE_CHECKS" ]; then
+    echo "ERROR: oracle_checks.py not found at $ORACLE_CHECKS"
+    echo "0.0" > "$REWARD_PATH"
+    exit 1
+fi
+
+echo "Running oracle checks..."
+SCORE=$(python3 "$ORACLE_CHECKS" --answer "$ANSWER_PATH" --spec "$TASK_SPEC_PATH" --verbose 2>&1 | tee /dev/stderr | tail -1)
+
+# Validate score is a number
+if ! echo "$SCORE" | python3 -c "import sys; float(sys.stdin.read().strip())" 2>/dev/null; then
+    echo "ERROR: oracle_checks.py did not return a valid score: $SCORE"
+    echo "0.0" > "$REWARD_PATH"
+    exit 1
+fi
+
+echo ""
+echo "Composite score: $SCORE"
+echo "$SCORE" > "$REWARD_PATH"
+
+# Exit based on score (SWE-Factory exit-code-first pattern)
+python3 -c "import sys; sys.exit(0 if float('$SCORE') > 0 else 1)"