Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
1313 commits
Select commit Hold shift + click to select a range
62f0740
more
fzyzcjy Jan 17, 2026
c8bebbc
more
fzyzcjy Jan 17, 2026
6c80b1c
more
fzyzcjy Jan 17, 2026
881d5a3
more
fzyzcjy Jan 17, 2026
25819c8
more
fzyzcjy Jan 17, 2026
a34fd40
Revert "more"
fzyzcjy Jan 17, 2026
73a8ad4
Revert "more"
fzyzcjy Jan 17, 2026
887814b
more
fzyzcjy Jan 17, 2026
8d00a0c
more
fzyzcjy Jan 17, 2026
5832c22
more
fzyzcjy Jan 17, 2026
8bd8459
more
fzyzcjy Jan 17, 2026
90c934a
more
fzyzcjy Jan 17, 2026
5a15002
more
fzyzcjy Jan 17, 2026
80ba47d
more
fzyzcjy Jan 17, 2026
885aff8
more
fzyzcjy Jan 17, 2026
075aa93
more
fzyzcjy Jan 17, 2026
8d50974
more
fzyzcjy Jan 17, 2026
6651808
more
fzyzcjy Jan 17, 2026
546db91
more
fzyzcjy Jan 17, 2026
ef42bd4
more
fzyzcjy Jan 17, 2026
8d469d2
more
fzyzcjy Jan 17, 2026
b0bd948
more
fzyzcjy Jan 17, 2026
d1ab853
more
fzyzcjy Jan 17, 2026
d745520
more
fzyzcjy Jan 17, 2026
d3b9088
more
fzyzcjy Jan 17, 2026
5379bb9
more
fzyzcjy Jan 17, 2026
6aab421
revert
fzyzcjy Jan 17, 2026
9988ce0
more
fzyzcjy Jan 17, 2026
ec5334d
more
fzyzcjy Jan 17, 2026
0bd4900
more
fzyzcjy Jan 17, 2026
f7afe9e
more
fzyzcjy Jan 17, 2026
ccb96a1
more
fzyzcjy Jan 17, 2026
ec14934
more
fzyzcjy Jan 17, 2026
017e56b
more
fzyzcjy Jan 17, 2026
c7c6c80
more
fzyzcjy Jan 17, 2026
acbe421
more
fzyzcjy Jan 17, 2026
b3d4a7c
more
fzyzcjy Jan 17, 2026
bedf4c6
more
fzyzcjy Jan 17, 2026
d3009e1
re
fzyzcjy Jan 17, 2026
3a58c37
more
fzyzcjy Jan 17, 2026
53d517f
more
fzyzcjy Jan 17, 2026
c119bb1
more
fzyzcjy Jan 17, 2026
e16a2f4
more
fzyzcjy Jan 17, 2026
24f3a53
more
fzyzcjy Jan 17, 2026
66a8080
more
fzyzcjy Jan 17, 2026
8da2b26
more
fzyzcjy Jan 17, 2026
97938c6
more
fzyzcjy Jan 17, 2026
42c1909
rm
fzyzcjy Jan 17, 2026
9dd92c5
Revert "rm"
fzyzcjy Jan 17, 2026
2c3342d
Merge branch 'feat/ac8113ae' into feat/ac8113_temp
fzyzcjy Jan 17, 2026
48bc432
more
fzyzcjy Jan 17, 2026
0cf6887
more
fzyzcjy Jan 17, 2026
eaec817
more
fzyzcjy Jan 17, 2026
546057e
more
fzyzcjy Jan 17, 2026
975bf63
more
fzyzcjy Jan 17, 2026
8135d11
more
fzyzcjy Jan 17, 2026
a9832a2
more
fzyzcjy Jan 17, 2026
b02bf22
more
fzyzcjy Jan 17, 2026
7438e8d
more
fzyzcjy Jan 17, 2026
f762a16
more
fzyzcjy Jan 17, 2026
3c84384
more
fzyzcjy Jan 17, 2026
1656021
fmt
fzyzcjy Jan 17, 2026
b117f1c
more
fzyzcjy Jan 17, 2026
dbf3c9b
more
fzyzcjy Jan 17, 2026
97a4627
more
fzyzcjy Jan 17, 2026
ac83f23
cp
fzyzcjy Jan 17, 2026
81374b2
cp
fzyzcjy Jan 17, 2026
a424559
cp
fzyzcjy Jan 17, 2026
60c87ce
Merge branch 'feat/ac8113ah' into feat/ac8113_temp
fzyzcjy Jan 17, 2026
20b106d
more
fzyzcjy Jan 17, 2026
c784f1d
cp
fzyzcjy Jan 17, 2026
098b322
cp
fzyzcjy Jan 17, 2026
e92c4ab
cp
fzyzcjy Jan 17, 2026
d713a40
cp
fzyzcjy Jan 17, 2026
01dfaa5
cp
fzyzcjy Jan 17, 2026
2b3f376
Merge branch 'feat/ac8113al' into feat/ac8113_temp
fzyzcjy Jan 17, 2026
e05023a
more
fzyzcjy Jan 17, 2026
30d4fd4
more
fzyzcjy Jan 17, 2026
56a888c
more
fzyzcjy Jan 17, 2026
ba3e70f
more
fzyzcjy Jan 17, 2026
3a84f37
more
fzyzcjy Jan 17, 2026
6d2fb04
more
fzyzcjy Jan 17, 2026
cd60df5
fmt
fzyzcjy Jan 17, 2026
00ffc94
cp
fzyzcjy Jan 17, 2026
c2d133a
Merge branch 'feat/ac8113am' into feat/ac8113_temp
fzyzcjy Jan 17, 2026
13e3b3c
more
fzyzcjy Jan 17, 2026
046eafc
more
fzyzcjy Jan 17, 2026
0d78906
fmt
fzyzcjy Jan 17, 2026
eb1a584
more
fzyzcjy Jan 17, 2026
e8b0c56
more
fzyzcjy Jan 17, 2026
65b03df
more
fzyzcjy Jan 17, 2026
05f8d42
more
fzyzcjy Jan 17, 2026
9237a31
more
fzyzcjy Jan 17, 2026
c2c9a06
more
fzyzcjy Jan 17, 2026
5915d18
more
fzyzcjy Jan 17, 2026
6f15aa3
more
fzyzcjy Jan 17, 2026
72d1e9c
more
fzyzcjy Jan 17, 2026
6631df3
more
fzyzcjy Jan 17, 2026
9253b29
more
fzyzcjy Jan 17, 2026
db50646
more
fzyzcjy Jan 17, 2026
5721502
more
fzyzcjy Jan 17, 2026
656321c
more
fzyzcjy Jan 17, 2026
947ba76
more
fzyzcjy Jan 17, 2026
41c6302
more
fzyzcjy Jan 17, 2026
eca406d
more
fzyzcjy Jan 17, 2026
ff33cb2
more
fzyzcjy Jan 17, 2026
3dfe89d
more
fzyzcjy Jan 17, 2026
88c8ad7
fmt
fzyzcjy Jan 17, 2026
48a37aa
more
fzyzcjy Jan 17, 2026
3dbec46
more
fzyzcjy Jan 17, 2026
6d394fa
more
fzyzcjy Jan 17, 2026
372ffd5
more
fzyzcjy Jan 17, 2026
be93a54
more
fzyzcjy Jan 17, 2026
31335d5
more
fzyzcjy Jan 17, 2026
292668f
more
fzyzcjy Jan 17, 2026
c8a48be
more
fzyzcjy Jan 17, 2026
f1b7915
more
fzyzcjy Jan 17, 2026
d4c7f1e
more
fzyzcjy Jan 17, 2026
a809b79
more
fzyzcjy Jan 17, 2026
dac1fef
more
fzyzcjy Jan 17, 2026
a1e3c95
more
fzyzcjy Jan 17, 2026
5dcb99b
more
fzyzcjy Jan 17, 2026
dded67b
more
fzyzcjy Jan 17, 2026
e298997
more
fzyzcjy Jan 17, 2026
77f8a31
more
fzyzcjy Jan 17, 2026
2dd83e0
more
fzyzcjy Jan 17, 2026
cc61b8c
more
fzyzcjy Jan 17, 2026
11025b6
more
fzyzcjy Jan 17, 2026
f1ce4df
more
fzyzcjy Jan 17, 2026
c33a5b1
more
fzyzcjy Jan 17, 2026
6dfba51
more
fzyzcjy Jan 17, 2026
1186f99
more
fzyzcjy Jan 17, 2026
abb73f9
more
fzyzcjy Jan 17, 2026
1395976
more
fzyzcjy Jan 17, 2026
b9ed9ba
fmt
fzyzcjy Jan 17, 2026
0935e2a
more
fzyzcjy Jan 17, 2026
531b5c4
more
fzyzcjy Jan 17, 2026
70def71
more
fzyzcjy Jan 17, 2026
77d16b4
more
fzyzcjy Jan 17, 2026
fa02ab0
more
fzyzcjy Jan 17, 2026
55c070b
more
fzyzcjy Jan 17, 2026
b402944
more
fzyzcjy Jan 17, 2026
bd45c05
more
fzyzcjy Jan 17, 2026
fc5bc5c
more
fzyzcjy Jan 17, 2026
01fbcb2
more
fzyzcjy Jan 17, 2026
a73b1bd
more
fzyzcjy Jan 17, 2026
86bf5f1
more
fzyzcjy Jan 17, 2026
940d4af
more
fzyzcjy Jan 17, 2026
78bfeac
more
fzyzcjy Jan 17, 2026
655bac9
more
fzyzcjy Jan 17, 2026
3e5b2c1
more
fzyzcjy Jan 17, 2026
8a5f4f8
more
fzyzcjy Jan 17, 2026
cc86dc4
fmt
fzyzcjy Jan 17, 2026
a7c0f2c
more
fzyzcjy Jan 17, 2026
3f0352a
more
fzyzcjy Jan 17, 2026
b2126a6
more
fzyzcjy Jan 17, 2026
eddeb94
more
fzyzcjy Jan 17, 2026
53113c5
more
fzyzcjy Jan 17, 2026
c80a168
more
fzyzcjy Jan 17, 2026
09d2f01
more
fzyzcjy Jan 17, 2026
27291e5
more
fzyzcjy Jan 17, 2026
e200ac7
more
fzyzcjy Jan 17, 2026
4d7b67c
fmt
fzyzcjy Jan 17, 2026
05661fc
more
fzyzcjy Jan 17, 2026
bcdf944
more
fzyzcjy Jan 17, 2026
dd4712d
more
fzyzcjy Jan 17, 2026
5989c85
more
fzyzcjy Jan 17, 2026
fff1e45
more
fzyzcjy Jan 17, 2026
fed4e4d
more
fzyzcjy Jan 17, 2026
e6514d7
more
fzyzcjy Jan 17, 2026
fa3f955
more
fzyzcjy Jan 17, 2026
651c496
more
fzyzcjy Jan 17, 2026
98094b7
fmt
fzyzcjy Jan 17, 2026
62bfd76
more
fzyzcjy Jan 17, 2026
1152ad2
more
fzyzcjy Jan 17, 2026
71150f2
more
fzyzcjy Jan 17, 2026
82dea15
more
fzyzcjy Jan 17, 2026
8ce49d0
more
fzyzcjy Jan 17, 2026
d192ea6
more
fzyzcjy Jan 17, 2026
c34736a
more
fzyzcjy Jan 17, 2026
092b0de
Revert "more"
fzyzcjy Jan 17, 2026
07ade27
more
fzyzcjy Jan 17, 2026
83993c3
more
fzyzcjy Jan 17, 2026
610e1f3
more
fzyzcjy Jan 17, 2026
4acb639
more
fzyzcjy Jan 17, 2026
9a5d1e2
more
fzyzcjy Jan 17, 2026
2f3cd3a
more
fzyzcjy Jan 17, 2026
0a2c819
fmt
fzyzcjy Jan 17, 2026
9d5d2b7
cp
fzyzcjy Jan 17, 2026
86edb01
cp
fzyzcjy Jan 17, 2026
4072939
cp
fzyzcjy Jan 17, 2026
987f99b
cp
fzyzcjy Jan 17, 2026
1e0d1b4
Merge branch 'feat/ac8113at' into feat/ac8113_temp
fzyzcjy Jan 17, 2026
886caeb
more
fzyzcjy Jan 17, 2026
57cb338
fix
fzyzcjy Jan 18, 2026
78f5688
more
fzyzcjy Jan 18, 2026
1900614
mv
fzyzcjy Jan 18, 2026
ad050b8
more
fzyzcjy Jan 18, 2026
a7f64aa
more
fzyzcjy Jan 18, 2026
b043229
mv
fzyzcjy Jan 18, 2026
a0bf348
more
fzyzcjy Jan 18, 2026
08b2a61
more
fzyzcjy Jan 18, 2026
5f10020
mv
fzyzcjy Jan 18, 2026
e4260f9
mv
fzyzcjy Jan 18, 2026
c27d7de
mv
fzyzcjy Jan 18, 2026
9aeaaf1
more
fzyzcjy Jan 18, 2026
60834cb
more
fzyzcjy Jan 18, 2026
9db4afc
more
fzyzcjy Jan 18, 2026
0971e44
more
fzyzcjy Jan 18, 2026
0ba162e
more
fzyzcjy Jan 18, 2026
6718577
more
fzyzcjy Jan 18, 2026
08605c4
more
fzyzcjy Jan 18, 2026
87a3af3
more
fzyzcjy Jan 18, 2026
c2daacf
fmt
fzyzcjy Jan 18, 2026
6b1bc2e
more
fzyzcjy Jan 18, 2026
fc5ec2c
more
fzyzcjy Jan 18, 2026
b537395
fmt
fzyzcjy Jan 18, 2026
e99d3ae
more
fzyzcjy Jan 18, 2026
8f282c9
more
fzyzcjy Jan 18, 2026
02773ab
more
fzyzcjy Jan 18, 2026
9af19ae
more
fzyzcjy Jan 18, 2026
443ad39
more
fzyzcjy Jan 18, 2026
8ed78e5
more
fzyzcjy Jan 18, 2026
695df96
more
fzyzcjy Jan 18, 2026
45a0259
more
fzyzcjy Jan 18, 2026
219c4e1
merge
fzyzcjy Jan 18, 2026
4b9704f
more
fzyzcjy Jan 18, 2026
bb7deae
more
fzyzcjy Jan 18, 2026
3c4ec84
fix: use pip install instead of large docker image
fzyzcjy Jan 18, 2026
ad996b9
chore: use uv for faster dependency installation
fzyzcjy Jan 18, 2026
091577f
chore: separate pytest from main package installation
fzyzcjy Jan 18, 2026
b269de0
more
fzyzcjy Jan 18, 2026
3b31227
more
fzyzcjy Jan 18, 2026
51dd13f
rm
fzyzcjy Jan 18, 2026
9127f4f
more
fzyzcjy Jan 18, 2026
6ab64c7
more
fzyzcjy Jan 18, 2026
bf0a3b4
more
fzyzcjy Jan 18, 2026
0697448
more
fzyzcjy Jan 18, 2026
d6e522e
more
fzyzcjy Jan 18, 2026
6ab728b
more
fzyzcjy Jan 18, 2026
d964184
fmt
fzyzcjy Jan 18, 2026
775552f
fix: typo args.device -> args.devices
fzyzcjy Jan 18, 2026
083f676
fix: skip gated llama model and fix tool_index expectations
fzyzcjy Jan 18, 2026
3a64139
Merge branch 'feat/ac8113aw' into feat/ac8113ax
fzyzcjy Jan 18, 2026
3689e4f
Merge branch 'feat/ac8113ax' into feat/ac8113ay
fzyzcjy Jan 18, 2026
2711f50
move retrieve_from_text api to middleware
guapisolo Jan 16, 2026
37b4b5f
temporarily give up cross turn inherit
guapisolo Jan 19, 2026
70b963c
fix assistant think problem
guapisolo Jan 19, 2026
4ef014b
small fix
guapisolo Jan 19, 2026
ae13c47
give up because only assistant before last user was cut
guapisolo Jan 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 40 additions & 28 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,46 @@ concurrency:

jobs:

fast:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request)
runs-on: self-hosted
container:
image: radixark/miles:latest
options: >
--gpus all
--ipc=host
--shm-size=16g
--ulimit memlock=-1
--ulimit stack=67108864
--memory=0
--memory-swap=0
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
strategy:
fail-fast: false
matrix:
info: [{"num_gpus": 0, "test_file": "fast"}]
defaults:
run:
working-directory: ${{ github.workspace }}
env:
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
MILES_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install
shell: bash
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages

- name: Execute
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- pytest tests/${{ matrix.info.test_file }}

e2e-test-short:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-short'))
runs-on: self-hosted
Expand All @@ -38,10 +78,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down Expand Up @@ -82,10 +118,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down Expand Up @@ -126,10 +158,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down Expand Up @@ -170,10 +198,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down Expand Up @@ -214,10 +238,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down Expand Up @@ -258,10 +278,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down Expand Up @@ -302,10 +318,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand Down
14 changes: 8 additions & 6 deletions .github/workflows/pr-test.yml.j2
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
<% set jobs = {
'fast': {
'test_executor': 'pytest',
'tests': [
{'test_file': 'fast', 'num_gpus': 0},
],
},
'e2e-test-short': {
'label': 'run-ci-short',
'tests': [
Expand Down Expand Up @@ -95,7 +101,7 @@ concurrency:
jobs:
<% for job_name, config in jobs.items() %>
<< job_name >>:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, '<< config.label >>'))
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request<% if config.label %> && contains(github.event.pull_request.labels.*.name, '<< config.label >>')<% endif %>)
runs-on: self-hosted
container:
image: << config.image if config.image else 'radixark/miles:latest' >>
Expand All @@ -107,10 +113,6 @@ jobs:
--ulimit stack=67108864
--memory=0
--memory-swap=0
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
-e HTTP_PROXY=$HTTP_PROXY
-e HTTPS_PROXY=$HTTPS_PROXY
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
Expand All @@ -136,5 +138,5 @@ jobs:

- name: Execute
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- << config.test_executor | default('python') >> tests/${{ matrix.info.test_file }}
<% endfor %>
1 change: 1 addition & 0 deletions examples/openai_format/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""OpenAI format examples."""
57 changes: 57 additions & 0 deletions examples/openai_format/dapo_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
DAPO math OpenAI format example for token in/out verification.
"""

import argparse
from typing import Any

from openai import AsyncOpenAI

from miles.rollout.base_types import GenerateFnInput, GenerateFnOutput
from miles.rollout.generate_utils.openai_endpoint_utils import (
OpenAIEndpointTracer,
compute_samples_from_openai_records,
)
from miles.rollout.generate_utils.sample_utils import merge_samples

_DAPO_MATH_SYSTEM_PROMPT = (
"Solve the math problem and return the final answer as \\boxed{integer}. "
"Keep the reasoning concise and finish with the boxed answer."
)


async def generate(input: GenerateFnInput) -> GenerateFnOutput:
tracer = await OpenAIEndpointTracer.create(input.args)
messages = _normalize_prompt(input.sample.prompt)
await _run_single_turn_openai(base_url=tracer.base_url, messages=messages)

records = await tracer.collect_records()
samples = compute_samples_from_openai_records(input.sample, records, input.state.tokenizer)
if not input.args.generate_multi_samples:
samples = merge_samples(samples, input.state.tokenizer)
return GenerateFnOutput(samples=samples)


def _add_arguments(parser: argparse.ArgumentParser):
parser.add_argument("--generate-multi-samples", action="store_true")


generate.add_arguments = _add_arguments


def build_dapo_math_messages(question: str) -> list[dict[str, str]]:
return [
{"role": "system", "content": _DAPO_MATH_SYSTEM_PROMPT},
{"role": "user", "content": question},
]


def _normalize_prompt(prompt: Any) -> list[dict[str, Any]]:
if isinstance(prompt, list):
return prompt
return build_dapo_math_messages(prompt)


async def _run_single_turn_openai(base_url: str, messages: list[dict[str, Any]]) -> None:
client = AsyncOpenAI(base_url=base_url, api_key="empty")
await client.chat.completions.create(model="default", messages=messages)
33 changes: 28 additions & 5 deletions miles/ray/rollout.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,15 @@
from sglang.srt.constants import GPU_MEMORY_TYPE_CUDA_GRAPH, GPU_MEMORY_TYPE_KV_CACHE, GPU_MEMORY_TYPE_WEIGHTS

from miles.backends.sglang_utils.sglang_engine import SGLangEngine
from miles.rollout.base_types import call_rollout_fn
from miles.rollout.base_types import (
RolloutFnConstructorInput,
RolloutFnEvalInput,
RolloutFnTrainInput,
call_rollout_fn,
)
from miles.rollout.inference_rollout.compatibility import call_rollout_function, load_rollout_function
from miles.utils import tracking_utils
from miles.utils.environ import enable_experimental_rollout_refactor
from miles.utils.health_monitor import RolloutHealthMonitor
from miles.utils.http_utils import _wrap_ipv6, find_available_port, get_host_info, init_http_client
from miles.utils.iter_utils import group_by
Expand Down Expand Up @@ -53,8 +60,14 @@ def __init__(self, args, pg):
data_source_cls = load_function(self.args.data_source_path)
self.data_source = data_source_cls(args)

self.generate_rollout = load_function(self.args.rollout_function_path)
self.eval_generate_rollout = load_function(self.args.eval_function_path)
self.use_experimental_refactor = enable_experimental_rollout_refactor()
if self.use_experimental_refactor:
input = RolloutFnConstructorInput(args=args, data_source=self.data_source)
self.generate_rollout = load_rollout_function(input, self.args.rollout_function_path)
self.eval_generate_rollout = load_rollout_function(input, self.args.eval_function_path)
else:
self.generate_rollout = load_function(self.args.rollout_function_path)
self.eval_generate_rollout = load_function(self.args.eval_function_path)
self.custom_reward_post_process_func = None
if self.args.custom_reward_post_process_path is not None:
self.custom_reward_post_process_func = load_function(self.args.custom_reward_post_process_path)
Expand Down Expand Up @@ -142,7 +155,12 @@ def eval(self, rollout_id):
return
self.health_monitoring_resume()

result = call_rollout_fn(self.eval_generate_rollout, self.args, rollout_id, self.data_source, evaluation=True)
if self.use_experimental_refactor:
result = call_rollout_function(self.eval_generate_rollout, RolloutFnEvalInput(rollout_id=rollout_id))
else:
result = call_rollout_fn(
self.eval_generate_rollout, self.args, rollout_id, self.data_source, evaluation=True
)
data = result.data
self._save_debug_rollout_data(data, rollout_id=rollout_id, evaluation=True)
metrics = _log_eval_rollout_data(rollout_id, self.args, data, result.metrics)
Expand Down Expand Up @@ -224,7 +242,12 @@ def _get_rollout_data(self, rollout_id):
)
metrics = None
else:
data = call_rollout_fn(self.generate_rollout, self.args, rollout_id, self.data_source, evaluation=False)
if self.use_experimental_refactor:
data = call_rollout_function(self.generate_rollout, RolloutFnTrainInput(rollout_id=rollout_id))
else:
data = call_rollout_fn(
self.generate_rollout, self.args, rollout_id, self.data_source, evaluation=False
)
metrics = data.metrics
data = data.samples
# flatten the data if it is a list of lists
Expand Down
66 changes: 65 additions & 1 deletion miles/rollout/base_types.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,86 @@
from __future__ import annotations

from argparse import Namespace
from dataclasses import dataclass
from typing import Any
from typing import TYPE_CHECKING, Any

from miles.rollout.data_source import DataSource
from miles.utils.types import Sample

if TYPE_CHECKING:
from miles.rollout.inference_rollout.inference_rollout_common import GenerateState


@dataclass(frozen=True)
class RolloutFnConstructorInput:
args: Namespace
# TODO may refactor DataSource API
data_source: DataSource


@dataclass(frozen=True)
class RolloutFnBaseInput:
rollout_id: int

@property
def evaluation(self):
raise NotImplementedError


# subclassing for different data in the future
@dataclass(frozen=True)
class RolloutFnTrainInput(RolloutFnBaseInput):
@property
def evaluation(self):
return False


@dataclass(frozen=True)
class RolloutFnEvalInput(RolloutFnBaseInput):
@property
def evaluation(self):
return True


# TODO make it frozen
@dataclass
class RolloutFnTrainOutput:
samples: list[list[Sample]]
metrics: dict[str, Any] = None


# TODO make it frozen
@dataclass
class RolloutFnEvalOutput:
data: dict[str, dict[str, Any]]
metrics: dict[str, Any] = None


RolloutFnInput = RolloutFnTrainInput | RolloutFnEvalInput
RolloutFnOutput = RolloutFnTrainOutput | RolloutFnEvalOutput


@dataclass(frozen=True)
class GenerateFnInput:
state: GenerateState
sample: Sample
sampling_params: dict[str, Any]
evaluation: bool

@property
def args(self) -> Namespace:
return self.state.args


@dataclass(frozen=True)
class GenerateFnOutput:
# One generate may lead to multiple samples, such as multi-agent, tree-like exploration, or
# multi-turn with removing thinking tokens.
samples: Sample | list[Sample]


def call_rollout_fn(fn, *args, evaluation: bool, **kwargs):
"""Legacy rollout function call interface. Used when MILES_EXPERIMENTAL_ROLLOUT_REFACTOR is disabled."""
output = fn(*args, **kwargs, evaluation=evaluation)

# compatibility for legacy version
Expand Down
Loading