Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
0120399
test ci
Jan 5, 2026
4a6cab5
update
Jan 5, 2026
1ff2341
update
Jan 5, 2026
a825ce9
Merge branch 'radixark:main' into miles-ci
yushengsu-thu Jan 9, 2026
50f56c5
update
yushengsu-thu Jan 9, 2026
b7a89d6
Merge remote-tracking branch 'upstream/main' into miles-ci
yushengsu-thu Jan 17, 2026
ebd8df6
update
yushengsu-thu Jan 17, 2026
099b501
update
yushengsu-thu Jan 17, 2026
cf579d8
update
yushengsu-thu Jan 17, 2026
df4b2f5
update
yushengsu-thu Jan 17, 2026
7e549e2
update
yushengsu-thu Jan 17, 2026
11f2bd8
fix pre-test http proxy problem
yushengsu-thu Jan 17, 2026
c800b7f
update
yushengsu-thu Jan 17, 2026
1613989
update
yushengsu-thu Jan 17, 2026
b44926d
update
yushengsu-thu Jan 17, 2026
c0e729c
update
yushengsu-thu Jan 18, 2026
7823fc3
update
yushengsu-thu Jan 18, 2026
bd933da
update
yushengsu-thu Jan 18, 2026
46e584b
update
yushengsu-thu Jan 18, 2026
df42816
update
yushengsu-thu Jan 18, 2026
e56305e
update
yushengsu-thu Jan 18, 2026
3b19e59
update
yushengsu-thu Jan 18, 2026
f144f30
update
yushengsu-thu Jan 18, 2026
795c283
update
yushengsu-thu Jan 18, 2026
6febe6b
update
yushengsu-thu Jan 18, 2026
fa32531
update
yushengsu-thu Jan 18, 2026
1441193
update
yushengsu-thu Jan 18, 2026
73e9a12
update
yushengsu-thu Jan 18, 2026
7153c4c
update
yushengsu-thu Jan 18, 2026
d2cb61d
fix ci test
yushengsu-thu Jan 18, 2026
e2ce0fe
fix
yushengsu-thu Jan 18, 2026
5acfadd
update
yushengsu-thu Jan 18, 2026
c9dc559
update
yushengsu-thu Jan 18, 2026
5680741
update
yushengsu-thu Jan 18, 2026
3f72150
update
yushengsu-thu Jan 18, 2026
a099bb5
update
yushengsu-thu Jan 18, 2026
537e282
update
yushengsu-thu Jan 18, 2026
27f3a77
update
yushengsu-thu Jan 18, 2026
640ac7a
update
yushengsu-thu Jan 18, 2026
62bfa82
update
yushengsu-thu Jan 18, 2026
cb24e9c
update
yushengsu-thu Jan 18, 2026
52968a3
update
yushengsu-thu Jan 18, 2026
8bcb7f0
update
yushengsu-thu Jan 18, 2026
bb95d36
update
yushengsu-thu Jan 18, 2026
ea52de9
update
yushengsu-thu Jan 18, 2026
08ab3fa
update
yushengsu-thu Jan 18, 2026
28fd2a9
update
yushengsu-thu Jan 18, 2026
412086b
update
yushengsu-thu Jan 18, 2026
6c9a176
update
yushengsu-thu Jan 18, 2026
48cc27f
update
yushengsu-thu Jan 18, 2026
16545ed
update
yushengsu-thu Jan 18, 2026
c1bdc87
remove http becasue need not proxy - VPN
yushengsu-thu Jan 18, 2026
5166592
update
yushengsu-thu Jan 18, 2026
7c95bb3
update
yushengsu-thu Jan 18, 2026
2aabc4c
update
yushengsu-thu Jan 18, 2026
00dd900
update
yushengsu-thu Jan 18, 2026
0daaa5b
update
yushengsu-thu Jan 18, 2026
2716d3a
update
yushengsu-thu Jan 18, 2026
14ecd13
update
yushengsu-thu Jan 19, 2026
59b192a
update
yushengsu-thu Jan 19, 2026
09557fe
update
yushengsu-thu Jan 19, 2026
e4e7303
update
yushengsu-thu Jan 19, 2026
d2dbba6
update
yushengsu-thu Jan 19, 2026
40ebfcf
update
yushengsu-thu Jan 19, 2026
0bbdd8f
Merge remote-tracking branch 'upstream/main' into miles-ci
yushengsu-thu Jan 20, 2026
77d89fe
test
yushengsu-thu Jan 20, 2026
943e43f
update
yushengsu-thu Jan 20, 2026
006b25f
Merge remote-tracking branch 'upstream/main' into miles-ci
yushengsu-thu Jan 22, 2026
e277f0e
Merge remote-tracking branch 'upstream/main' into miles-ci
yushengsu-thu Jan 22, 2026
1475de6
fix run-ci-image problem
yushengsu-thu Jan 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 20 additions & 65 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,17 @@ jobs:
options: >
--gpus all
--ipc=host
--shm-size=16g
--shm-size=32g
--ulimit memlock=-1
--ulimit stack=67108864
--memory=0
--memory-swap=0
-v /mnt/nvme0n1/miles_ci:/data/miles_ci
-v /mnt/nvme0n1/miles_ci/models:/root/models
-v /mnt/nvme0n1/miles_ci/datasets:/root/datasets
--privileged
--ulimit nofile=65535:65535
-v /tmp:/tmp
strategy:
fail-fast: false
matrix:
Expand All @@ -52,11 +55,26 @@ jobs:
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
MILES_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
MILES_TEST_USE_DEEPEP: ${{ matrix.info.use_deepep || '0' }}
MILES_TEST_USE_FP8_ROLLOUT: ${{ matrix.info.use_fp8_rollout || '0' }}
MILES_TEST_ENABLE_EVAL: ${{ matrix.info.enable_eval || '1' }}

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Cleanup Ray processes
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
pkill -9 -f gcs_server 2>/dev/null || true
pkill -9 -f 'ray-dashboard' 2>/dev/null || true
pkill -9 sglang 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true
sleep 3

- name: Install
shell: bash
run: cd $GITHUB_WORKSPACE && pip install -e . --no-deps --break-system-packages
Expand Down Expand Up @@ -123,15 +141,6 @@ jobs:
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true

e2e-test-fsdp:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-fsdp'))
runs-on: self-hosted
Expand Down Expand Up @@ -190,15 +199,6 @@ jobs:
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true

e2e-test-megatron:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-megatron'))
runs-on: self-hosted
Expand Down Expand Up @@ -257,15 +257,6 @@ jobs:
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true

e2e-test-precision:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-precision'))
runs-on: self-hosted
Expand Down Expand Up @@ -324,15 +315,6 @@ jobs:
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true

e2e-test-ckpt:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-ckpt'))
runs-on: self-hosted
Expand Down Expand Up @@ -391,15 +373,6 @@ jobs:
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true

e2e-test-long:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-long'))
runs-on: self-hosted
Expand Down Expand Up @@ -458,20 +431,11 @@ jobs:
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true

e2e-test-image:
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-image'))
runs-on: self-hosted
container:
image: radixark/miles-test:latest
image: radixark/miles:latest
options: >
--gpus all
--ipc=host
Expand Down Expand Up @@ -524,12 +488,3 @@ jobs:
- name: Execute
shell: bash
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

- name: Post-test cleanup
if: always()
shell: bash
run: |
pkill -9 -f 'ray::' 2>/dev/null || true
pkill -9 -f raylet 2>/dev/null || true
ray stop --force 2>/dev/null || true
rm -rf /tmp/ray/* 2>/dev/null || true
2 changes: 1 addition & 1 deletion .github/workflows/pr-test.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
},
'e2e-test-image': {
'label': 'run-ci-image',
'image': 'radixark/miles-test:latest',
'image': 'radixark/miles:latest',
'tests': [
{'test_file': 'test_qwen2.5_0.5B_gsm8k_async_short.py', 'num_gpus': 4},
{'test_file': 'test_qwen2.5_0.5B_gsm8k_short.py', 'num_gpus': 4},
Expand Down
Loading