openenv-code-review-arena/openenv.yaml at main · Rohan5commit/openenv-code-review-arena · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
spec_version: 1
name: code_review_env
type: space
runtime: fastapi
app: server.app:app
port: 8000
description: "Interactive PR code review environment for security, correctness, and false-positive benchmarking."
tasks:
  - id: authz_admin_export
    description: "Broken access control on tenant audit export."
    difficulty: medium
  - id: sql_injection_report_filters
    description: "SQL injection in a revenue report helper."
    difficulty: medium
  - id: path_traversal_receipts
    description: "Filesystem path traversal in receipt download handling."
    difficulty: medium
  - id: ssrf_webhook_preview
    description: "Server-side request forgery in webhook previewing."
    difficulty: hard
  - id: jwt_exp_disabled
    description: "Subtle JWT validation regressions in token parsing."
    difficulty: hard
  - id: wallet_race_condition
    description: "Concurrent money movement bug in wallet transfers."
    difficulty: hard
  - id: frontend_xss_preview
    description: "Client-side XSS via unsanitized markdown preview."
    difficulty: medium
  - id: safe_logging_refactor
    description: "Clean refactor task designed to punish false positives."
    difficulty: easy
action_space:
  type: object
  description: "Structured review interaction over changed files and final rubric submission."
  properties:
    action_type:
      type: string
      enum: [list_files, inspect_file, search_code, submit_review]
    file_path:
      type: string
      description: "Changed file path to inspect."
    view_mode:
      type: string
      enum: [diff, full]
    start_line:
      type: integer
    end_line:
      type: integer
    query:
      type: string
    findings:
      type: array
      items:
        type: object
        properties:
          file_path:
            type: string
          line_start:
            type: integer
          line_end:
            type: integer
          severity:
            type: string
            enum: [low, medium, high, critical]
          category:
            type: string
          title:
            type: string
          explanation:
            type: string
          confidence:
            type: number
  required: [action_type]
observation_space:
  type: object
  description: "Episode state, rendered code context, and final scorecard."
  properties:
    reward:
      type: number
    done:
      type: boolean
    phase:
      type: string
    task_id:
      type: string
    difficulty:
      type: string
    displayed_content:
      type: string
    action_result:
      type: string
    attempts_remaining:
      type: integer
    scorecard:
      type: object