@@ -3,8 +3,8 @@ name: Run Benchmark
33on :
44 workflow_dispatch :
55 inputs :
6- run :
7- description : " Number of times to run the benchmark "
6+ agent :
7+ description : " Agent to use "
88 required : true
99 type : string
1010 model :
3434 strategy :
3535 matrix :
3636 task : ${{ fromJson(needs.prepare.outputs.tasks) }}
37+ run : [1, 2, 3]
3738 environment : production
3839 steps :
3940 - name : Checkout repository
@@ -52,16 +53,110 @@ jobs:
5253
5354 - name : Print benchmark config
5455 env :
55- RUN_COUNT : ${{ inputs.run }}
5656 MODEL : ${{ inputs.model }}
5757 TASK : ${{ matrix.task }}
58+ RUN : ${{ matrix.run }}
5859 run : |
59- echo "Run count: ${RUN_COUNT}"
6060 echo "Model: ${MODEL}"
6161 echo "Task: ${TASK}"
62+ echo "Run: ${RUN}"
6263
6364 - name : Run benchmark
6465 env :
6566 OPENCODE_API_KEY : ${{ secrets.OPENCODE_API_KEY }}
6667 DEBUG : true
67- run : bun dev opencode --task ${{ matrix.task }} --model ${{ inputs.model }}
68+ TASK : ${{ matrix.task }}
69+ MODEL : ${{ inputs.model }}
70+ AGENT : ${{ inputs.agent }}
71+ RESULT_PATH : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}.json
72+ run : bun github/run.ts
73+
74+ - name : Upload benchmark results
75+ uses : actions/upload-artifact@v4
76+ with :
77+ name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}
78+ path : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}.json
79+
80+ summarize-runs :
81+ needs : benchmark
82+ runs-on : ubuntu-latest
83+ strategy :
84+ matrix :
85+ task : ${{ fromJson(needs.prepare.outputs.tasks) }}
86+ steps :
87+ - name : Checkout repository
88+ uses : actions/checkout@v4
89+
90+ - name : Setup Bun
91+ uses : oven-sh/setup-bun@v1
92+ with :
93+ bun-version : 1.2.21
94+
95+ - name : Install dependencies
96+ run : bun install
97+
98+ - name : Download run 1 results
99+ uses : actions/download-artifact@v4
100+ with :
101+ name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run1
102+ path : results
103+
104+ - name : Download run 2 results
105+ uses : actions/download-artifact@v4
106+ with :
107+ name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run2
108+ path : results
109+
110+ - name : Download run 3 results
111+ uses : actions/download-artifact@v4
112+ with :
113+ name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run3
114+ path : results
115+
116+ - name : Summarize runs
117+ env :
118+ RESULT_PATHS : results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run1.json,results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run2.json,results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run3.json
119+ RUNS_SUMMARY_PATH : runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}.json
120+ run : bun github/summarize-runs.ts
121+
122+ - name : Upload runs summary
123+ uses : actions/upload-artifact@v4
124+ with :
125+ name : runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}
126+ path : runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}.json
127+
128+ summarize-tasks :
129+ needs : summarize-runs
130+ runs-on : ubuntu-latest
131+ steps :
132+ - name : Checkout repository
133+ uses : actions/checkout@v4
134+
135+ - name : Setup Bun
136+ uses : oven-sh/setup-bun@v1
137+ with :
138+ bun-version : 1.2.21
139+
140+ - name : Install dependencies
141+ run : bun install
142+
143+ - name : Download all runs summaries
144+ uses : actions/download-artifact@v4
145+ with :
146+ pattern : runs-summary-*
147+ path : runs-summaries
148+
149+ - name : Summarize tasks
150+ env :
151+ RUNS_SUMMARY_PATHS : runs-summaries/*/runs-summary-*.json
152+ run : |
153+ RUNS_SUMMARY_PATHS_COMMA=$(find runs-summaries -name 'runs-summary-*.json' | tr '\n' ',' | sed 's/,$//')
154+ export RUNS_SUMMARY_PATHS="$RUNS_SUMMARY_PATHS_COMMA"
155+ export TASKS_SUMMARY_PATH=tasks-summary.json
156+ bun github/summarize-tasks.ts
157+
158+ - name : Upload tasks summary
159+ uses : actions/upload-artifact@v4
160+ with :
161+ name : tasks-summary
162+ path : tasks-summary.json
0 commit comments