-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenenv.yaml
More file actions
96 lines (96 loc) · 2.58 KB
/
openenv.yaml
File metadata and controls
96 lines (96 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
spec_version: 1
name: code_review_env
type: space
runtime: fastapi
app: server.app:app
port: 8000
description: "Interactive PR code review environment for security, correctness, and false-positive benchmarking."
tasks:
- id: authz_admin_export
description: "Broken access control on tenant audit export."
difficulty: medium
- id: sql_injection_report_filters
description: "SQL injection in a revenue report helper."
difficulty: medium
- id: path_traversal_receipts
description: "Filesystem path traversal in receipt download handling."
difficulty: medium
- id: ssrf_webhook_preview
description: "Server-side request forgery in webhook previewing."
difficulty: hard
- id: jwt_exp_disabled
description: "Subtle JWT validation regressions in token parsing."
difficulty: hard
- id: wallet_race_condition
description: "Concurrent money movement bug in wallet transfers."
difficulty: hard
- id: frontend_xss_preview
description: "Client-side XSS via unsanitized markdown preview."
difficulty: medium
- id: safe_logging_refactor
description: "Clean refactor task designed to punish false positives."
difficulty: easy
action_space:
type: object
description: "Structured review interaction over changed files and final rubric submission."
properties:
action_type:
type: string
enum: [list_files, inspect_file, search_code, submit_review]
file_path:
type: string
description: "Changed file path to inspect."
view_mode:
type: string
enum: [diff, full]
start_line:
type: integer
end_line:
type: integer
query:
type: string
findings:
type: array
items:
type: object
properties:
file_path:
type: string
line_start:
type: integer
line_end:
type: integer
severity:
type: string
enum: [low, medium, high, critical]
category:
type: string
title:
type: string
explanation:
type: string
confidence:
type: number
required: [action_type]
observation_space:
type: object
description: "Episode state, rendered code context, and final scorecard."
properties:
reward:
type: number
done:
type: boolean
phase:
type: string
task_id:
type: string
difficulty:
type: string
displayed_content:
type: string
action_result:
type: string
attempts_remaining:
type: integer
scorecard:
type: object